@jsonstudio/llms 0.6.147 → 0.6.187
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.js +15 -1
- package/dist/conversion/compat/actions/auto-thinking.d.ts +6 -0
- package/dist/conversion/compat/actions/auto-thinking.js +25 -0
- package/dist/conversion/compat/actions/field-mapping.d.ts +14 -0
- package/dist/conversion/compat/actions/field-mapping.js +155 -0
- package/dist/conversion/compat/actions/qwen-transform.d.ts +3 -0
- package/dist/conversion/compat/actions/qwen-transform.js +209 -0
- package/dist/conversion/compat/actions/request-rules.d.ts +24 -0
- package/dist/conversion/compat/actions/request-rules.js +63 -0
- package/dist/conversion/compat/actions/response-blacklist.d.ts +14 -0
- package/dist/conversion/compat/actions/response-blacklist.js +85 -0
- package/dist/conversion/compat/actions/response-normalize.d.ts +5 -0
- package/dist/conversion/compat/actions/response-normalize.js +121 -0
- package/dist/conversion/compat/actions/response-validate.d.ts +5 -0
- package/dist/conversion/compat/actions/response-validate.js +76 -0
- package/dist/conversion/compat/actions/snapshot.d.ts +8 -0
- package/dist/conversion/compat/actions/snapshot.js +21 -0
- package/dist/conversion/compat/actions/tool-schema.d.ts +6 -0
- package/dist/conversion/compat/actions/tool-schema.js +91 -0
- package/dist/conversion/compat/actions/universal-shape-filter.d.ts +74 -0
- package/dist/conversion/compat/actions/universal-shape-filter.js +382 -0
- package/dist/conversion/compat/profiles/chat-glm.json +187 -13
- package/dist/conversion/compat/profiles/chat-iflow.json +177 -9
- package/dist/conversion/compat/profiles/chat-lmstudio.json +10 -2
- package/dist/conversion/compat/profiles/chat-qwen.json +14 -10
- package/dist/conversion/hub/pipeline/compat/compat-engine.d.ts +7 -2
- package/dist/conversion/hub/pipeline/compat/compat-engine.js +409 -5
- package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +47 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +2 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +35 -1
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
- package/dist/conversion/hub/pipeline/target-utils.js +3 -0
- package/dist/conversion/hub/response/response-runtime.js +23 -15
- package/dist/conversion/responses/responses-host-policy.d.ts +6 -0
- package/dist/conversion/responses/responses-host-policy.js +14 -0
- package/dist/conversion/responses/responses-openai-bridge.js +51 -2
- package/dist/conversion/shared/anthropic-message-utils.js +6 -0
- package/dist/conversion/shared/bridge-actions.js +1 -1
- package/dist/conversion/shared/bridge-policies.js +0 -1
- package/dist/conversion/shared/responses-conversation-store.js +3 -26
- package/dist/conversion/shared/responses-reasoning-registry.d.ts +4 -0
- package/dist/conversion/shared/responses-reasoning-registry.js +62 -1
- package/dist/conversion/shared/responses-response-utils.js +23 -1
- package/dist/conversion/shared/tool-canonicalizer.d.ts +2 -0
- package/dist/conversion/shared/tool-filter-pipeline.js +11 -0
- package/dist/router/virtual-router/bootstrap.js +218 -39
- package/dist/router/virtual-router/classifier.js +19 -51
- package/dist/router/virtual-router/context-advisor.d.ts +21 -0
- package/dist/router/virtual-router/context-advisor.js +76 -0
- package/dist/router/virtual-router/engine.d.ts +11 -27
- package/dist/router/virtual-router/engine.js +191 -396
- package/dist/router/virtual-router/features.js +24 -607
- package/dist/router/virtual-router/health-manager.js +2 -7
- package/dist/router/virtual-router/message-utils.d.ts +7 -0
- package/dist/router/virtual-router/message-utils.js +66 -0
- package/dist/router/virtual-router/provider-registry.js +6 -2
- package/dist/router/virtual-router/token-estimator.d.ts +2 -0
- package/dist/router/virtual-router/token-estimator.js +16 -0
- package/dist/router/virtual-router/tool-signals.d.ts +13 -0
- package/dist/router/virtual-router/tool-signals.js +403 -0
- package/dist/router/virtual-router/types.d.ts +21 -7
- package/dist/router/virtual-router/types.js +1 -0
- package/package.json +2 -2
|
@@ -3,24 +3,19 @@ import { ProviderRegistry } from './provider-registry.js';
|
|
|
3
3
|
import { RouteLoadBalancer } from './load-balancer.js';
|
|
4
4
|
import { RoutingClassifier } from './classifier.js';
|
|
5
5
|
import { buildRoutingFeatures } from './features.js';
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
const ANSI_RESET = '\x1b[0m';
|
|
9
|
-
const STICKY_PLAN_TTL_MS = 30 * 60 * 1000;
|
|
10
|
-
const ERROR_STREAK_TTL_MS = 10 * 60 * 1000;
|
|
11
|
-
const ERROR_STREAK_THRESHOLD = 4;
|
|
6
|
+
import { ContextAdvisor } from './context-advisor.js';
|
|
7
|
+
import { DEFAULT_MODEL_CONTEXT_TOKENS, DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
12
8
|
export class VirtualRouterEngine {
|
|
13
9
|
routing = {};
|
|
14
10
|
providerRegistry = new ProviderRegistry();
|
|
15
11
|
healthManager = new ProviderHealthManager();
|
|
16
12
|
loadBalancer = new RouteLoadBalancer();
|
|
17
13
|
classifier = new RoutingClassifier({});
|
|
14
|
+
contextAdvisor = new ContextAdvisor();
|
|
15
|
+
contextRouting;
|
|
18
16
|
routeStats = new Map();
|
|
19
17
|
debug = console; // thin hook; host may monkey-patch for colored logging
|
|
20
18
|
healthConfig = null;
|
|
21
|
-
stickyPlans = new Map();
|
|
22
|
-
selectionHistory = new Map();
|
|
23
|
-
providerErrorStreaks = new Map();
|
|
24
19
|
initialize(config) {
|
|
25
20
|
this.validateConfig(config);
|
|
26
21
|
this.routing = config.routing;
|
|
@@ -30,6 +25,8 @@ export class VirtualRouterEngine {
|
|
|
30
25
|
this.healthManager.registerProviders(Object.keys(config.providers));
|
|
31
26
|
this.loadBalancer = new RouteLoadBalancer(config.loadBalancing);
|
|
32
27
|
this.classifier = new RoutingClassifier(config.classifier);
|
|
28
|
+
this.contextRouting = config.contextRouting ?? { warnRatio: 0.9, hardLimit: false };
|
|
29
|
+
this.contextAdvisor.configure(this.contextRouting);
|
|
33
30
|
this.routeStats = new Map();
|
|
34
31
|
for (const routeName of Object.keys(this.routing)) {
|
|
35
32
|
this.routeStats.set(routeName, { hits: 0 });
|
|
@@ -37,42 +34,21 @@ export class VirtualRouterEngine {
|
|
|
37
34
|
}
|
|
38
35
|
route(request, metadata) {
|
|
39
36
|
const features = buildRoutingFeatures(request, metadata);
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if (stickyActivation) {
|
|
44
|
-
selection = this.selectStickyTarget(stickyActivation, metadata);
|
|
45
|
-
if (selection) {
|
|
46
|
-
classification = this.buildStickyClassification(stickyActivation);
|
|
47
|
-
}
|
|
48
|
-
else {
|
|
49
|
-
stickyActivation = null;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
if (!selection || !classification) {
|
|
53
|
-
classification = this.classifier.classify(features);
|
|
54
|
-
classification = this.ensureConfiguredClassification(classification);
|
|
55
|
-
const routeName = classification.routeName || DEFAULT_ROUTE;
|
|
56
|
-
selection = this.selectProvider(routeName, metadata, classification);
|
|
57
|
-
}
|
|
58
|
-
if (!selection || !classification) {
|
|
59
|
-
throw new VirtualRouterError('Virtual router failed to select provider', VirtualRouterErrorCode.ROUTE_NOT_FOUND);
|
|
60
|
-
}
|
|
37
|
+
const classification = this.classifier.classify(features);
|
|
38
|
+
const routeName = classification.routeName || DEFAULT_ROUTE;
|
|
39
|
+
const selection = this.selectProvider(routeName, metadata, classification, features);
|
|
61
40
|
const target = this.providerRegistry.buildTarget(selection.providerKey);
|
|
62
41
|
this.healthManager.recordSuccess(selection.providerKey);
|
|
63
|
-
this.resetProviderErrorStreak(selection.providerKey);
|
|
64
42
|
this.incrementRouteStat(selection.routeUsed, selection.providerKey);
|
|
65
|
-
const
|
|
66
|
-
this.
|
|
67
|
-
if (
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
const
|
|
74
|
-
this.debug?.log?.(hitLog);
|
|
75
|
-
const didFallback = selection.routeUsed !== (classification.routeName || DEFAULT_ROUTE) || classification.fallback;
|
|
43
|
+
const hitReason = this.buildHitReason(selection.routeUsed, selection.providerKey, classification, features);
|
|
44
|
+
const formatted = this.formatVirtualRouterHit(selection.routeUsed, selection.providerKey, target.modelId || '', hitReason);
|
|
45
|
+
if (formatted) {
|
|
46
|
+
this.debug?.log?.(formatted);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
this.debug?.log?.('[virtual-router-hit]', selection.routeUsed, selection.providerKey, target.modelId || '', hitReason ? `reason=${hitReason}` : '');
|
|
50
|
+
}
|
|
51
|
+
const didFallback = selection.routeUsed !== routeName || classification.fallback;
|
|
76
52
|
return {
|
|
77
53
|
target,
|
|
78
54
|
decision: {
|
|
@@ -108,9 +84,6 @@ export class VirtualRouterEngine {
|
|
|
108
84
|
}
|
|
109
85
|
}
|
|
110
86
|
handleProviderError(event) {
|
|
111
|
-
if (event?.runtime?.requestId) {
|
|
112
|
-
this.dropStickyForRequest(event.runtime.requestId);
|
|
113
|
-
}
|
|
114
87
|
const derived = this.mapProviderError(event);
|
|
115
88
|
if (!derived) {
|
|
116
89
|
return;
|
|
@@ -132,206 +105,6 @@ export class VirtualRouterEngine {
|
|
|
132
105
|
health: this.healthManager.getSnapshot()
|
|
133
106
|
};
|
|
134
107
|
}
|
|
135
|
-
consumeSticky(metadata, features) {
|
|
136
|
-
const prevId = this.extractPreviousRequestId(metadata);
|
|
137
|
-
if (!prevId) {
|
|
138
|
-
return null;
|
|
139
|
-
}
|
|
140
|
-
this.pruneStickyPlans();
|
|
141
|
-
const planned = this.stickyPlans.get(prevId);
|
|
142
|
-
if (planned) {
|
|
143
|
-
this.stickyPlans.delete(prevId);
|
|
144
|
-
const activation = {
|
|
145
|
-
...planned,
|
|
146
|
-
sourceRequestId: prevId,
|
|
147
|
-
mode: 'planned'
|
|
148
|
-
};
|
|
149
|
-
if (planned.remainingRounds > 1 && metadata.requestId) {
|
|
150
|
-
this.stickyPlans.set(metadata.requestId, {
|
|
151
|
-
...planned,
|
|
152
|
-
remainingRounds: planned.remainingRounds - 1,
|
|
153
|
-
createdAt: Date.now()
|
|
154
|
-
});
|
|
155
|
-
}
|
|
156
|
-
return activation;
|
|
157
|
-
}
|
|
158
|
-
return this.maybeForceStickyFromHistory(prevId, features);
|
|
159
|
-
}
|
|
160
|
-
selectStickyTarget(sticky, metadata) {
|
|
161
|
-
if (sticky.strategy === 'target' && sticky.providerKey) {
|
|
162
|
-
if (!this.healthManager.isAvailable(sticky.providerKey)) {
|
|
163
|
-
return null;
|
|
164
|
-
}
|
|
165
|
-
const pool = this.routing[sticky.routeName] ?? [];
|
|
166
|
-
return { providerKey: sticky.providerKey, routeUsed: sticky.routeName, pool };
|
|
167
|
-
}
|
|
168
|
-
const pool = this.routing[sticky.routeName];
|
|
169
|
-
if (!Array.isArray(pool) || pool.length === 0) {
|
|
170
|
-
return null;
|
|
171
|
-
}
|
|
172
|
-
const stub = {
|
|
173
|
-
routeName: sticky.routeName,
|
|
174
|
-
confidence: 1,
|
|
175
|
-
reasoning: `sticky:${sticky.reason}`,
|
|
176
|
-
fallback: false,
|
|
177
|
-
candidates: [sticky.routeName]
|
|
178
|
-
};
|
|
179
|
-
return this.selectProvider(sticky.routeName, metadata, stub);
|
|
180
|
-
}
|
|
181
|
-
buildStickyClassification(sticky) {
|
|
182
|
-
return {
|
|
183
|
-
routeName: sticky.routeName,
|
|
184
|
-
confidence: 1,
|
|
185
|
-
reasoning: `sticky:${sticky.reason}`,
|
|
186
|
-
fallback: false,
|
|
187
|
-
candidates: [sticky.routeName]
|
|
188
|
-
};
|
|
189
|
-
}
|
|
190
|
-
recordSelectionSnapshot(requestId, routeName, providerKey, modelId) {
|
|
191
|
-
if (!requestId || !providerKey) {
|
|
192
|
-
return;
|
|
193
|
-
}
|
|
194
|
-
this.selectionHistory.set(requestId, {
|
|
195
|
-
routeName,
|
|
196
|
-
providerKey,
|
|
197
|
-
modelId,
|
|
198
|
-
createdAt: Date.now()
|
|
199
|
-
});
|
|
200
|
-
this.pruneStickyPlans();
|
|
201
|
-
}
|
|
202
|
-
buildStickyPlan(features, selection, target) {
|
|
203
|
-
const descriptor = this.resolveStickyDescriptor(selection.routeUsed, features);
|
|
204
|
-
if (!descriptor || descriptor.rounds <= 0) {
|
|
205
|
-
return null;
|
|
206
|
-
}
|
|
207
|
-
const plan = {
|
|
208
|
-
routeName: selection.routeUsed,
|
|
209
|
-
strategy: descriptor.strategy,
|
|
210
|
-
providerKey: descriptor.strategy === 'target' ? selection.providerKey : undefined,
|
|
211
|
-
modelId: descriptor.strategy === 'target' ? target.modelId : undefined,
|
|
212
|
-
remainingRounds: descriptor.rounds,
|
|
213
|
-
totalRounds: descriptor.rounds,
|
|
214
|
-
reason: descriptor.reason,
|
|
215
|
-
createdAt: Date.now()
|
|
216
|
-
};
|
|
217
|
-
return plan;
|
|
218
|
-
}
|
|
219
|
-
storeStickyPlan(requestId, plan) {
|
|
220
|
-
if (!requestId) {
|
|
221
|
-
return;
|
|
222
|
-
}
|
|
223
|
-
this.pruneStickyPlans();
|
|
224
|
-
if (plan && plan.remainingRounds > 0) {
|
|
225
|
-
this.stickyPlans.set(requestId, plan);
|
|
226
|
-
}
|
|
227
|
-
else {
|
|
228
|
-
this.stickyPlans.delete(requestId);
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
dropStickyForRequest(requestId) {
|
|
232
|
-
if (!requestId) {
|
|
233
|
-
return;
|
|
234
|
-
}
|
|
235
|
-
this.stickyPlans.delete(requestId);
|
|
236
|
-
this.selectionHistory.delete(requestId);
|
|
237
|
-
}
|
|
238
|
-
resolveStickyDescriptor(routeName, features) {
|
|
239
|
-
if (this.shouldForceApplyPatchSticky(features)) {
|
|
240
|
-
return { strategy: 'target', rounds: 1, reason: 'apply_patch' };
|
|
241
|
-
}
|
|
242
|
-
if (routeName === 'coding' || routeName === 'thinking') {
|
|
243
|
-
return { strategy: 'pool', rounds: 3, reason: routeName };
|
|
244
|
-
}
|
|
245
|
-
if (routeName === 'tools') {
|
|
246
|
-
return { strategy: 'pool', rounds: 0, reason: routeName };
|
|
247
|
-
}
|
|
248
|
-
if (routeName === DEFAULT_ROUTE || !routeName) {
|
|
249
|
-
return null;
|
|
250
|
-
}
|
|
251
|
-
return { strategy: 'pool', rounds: 1, reason: routeName };
|
|
252
|
-
}
|
|
253
|
-
maybeForceStickyFromHistory(prevId, features) {
|
|
254
|
-
if (!this.shouldForceApplyPatchSticky(features)) {
|
|
255
|
-
return null;
|
|
256
|
-
}
|
|
257
|
-
const snapshot = this.selectionHistory.get(prevId);
|
|
258
|
-
if (!snapshot) {
|
|
259
|
-
return null;
|
|
260
|
-
}
|
|
261
|
-
if (!this.healthManager.isAvailable(snapshot.providerKey)) {
|
|
262
|
-
return null;
|
|
263
|
-
}
|
|
264
|
-
return {
|
|
265
|
-
routeName: snapshot.routeName,
|
|
266
|
-
providerKey: snapshot.providerKey,
|
|
267
|
-
modelId: snapshot.modelId,
|
|
268
|
-
strategy: 'target',
|
|
269
|
-
remainingRounds: 0,
|
|
270
|
-
totalRounds: 1,
|
|
271
|
-
reason: 'apply_patch',
|
|
272
|
-
createdAt: Date.now(),
|
|
273
|
-
sourceRequestId: prevId,
|
|
274
|
-
mode: 'forced'
|
|
275
|
-
};
|
|
276
|
-
}
|
|
277
|
-
shouldForceApplyPatchSticky(features) {
|
|
278
|
-
const name = (features.lastAssistantToolName || '').toLowerCase();
|
|
279
|
-
if (name === 'apply_patch') {
|
|
280
|
-
return true;
|
|
281
|
-
}
|
|
282
|
-
const detail = (features.lastAssistantToolDetail || '').toLowerCase();
|
|
283
|
-
if (detail.includes('apply_patch')) {
|
|
284
|
-
return true;
|
|
285
|
-
}
|
|
286
|
-
return false;
|
|
287
|
-
}
|
|
288
|
-
extractPreviousRequestId(metadata) {
|
|
289
|
-
const resume = metadata.responsesResume;
|
|
290
|
-
if (resume && typeof resume.previousRequestId === 'string' && resume.previousRequestId.trim()) {
|
|
291
|
-
return resume.previousRequestId.trim();
|
|
292
|
-
}
|
|
293
|
-
return undefined;
|
|
294
|
-
}
|
|
295
|
-
pruneStickyPlans() {
|
|
296
|
-
const cutoff = Date.now() - STICKY_PLAN_TTL_MS;
|
|
297
|
-
for (const [key, plan] of this.stickyPlans.entries()) {
|
|
298
|
-
if (!plan || plan.createdAt < cutoff) {
|
|
299
|
-
this.stickyPlans.delete(key);
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
for (const [key, snapshot] of this.selectionHistory.entries()) {
|
|
303
|
-
if (!snapshot || snapshot.createdAt < cutoff) {
|
|
304
|
-
this.selectionHistory.delete(key);
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
buildErrorSignature(code, statusCode, message) {
|
|
309
|
-
const normalizedMessage = typeof message === 'string'
|
|
310
|
-
? message.trim().toLowerCase().replace(/\s+/g, ' ').slice(0, 120)
|
|
311
|
-
: '';
|
|
312
|
-
const codeToken = code?.toUpperCase() || 'ERR_UNKNOWN';
|
|
313
|
-
const statusToken = typeof statusCode === 'number' ? String(statusCode) : 'NA';
|
|
314
|
-
return `${statusToken}|${codeToken}|${normalizedMessage}`;
|
|
315
|
-
}
|
|
316
|
-
bumpProviderErrorStreak(providerKey, signature) {
|
|
317
|
-
if (!providerKey || !signature) {
|
|
318
|
-
return 0;
|
|
319
|
-
}
|
|
320
|
-
const now = Date.now();
|
|
321
|
-
const entry = this.providerErrorStreaks.get(providerKey);
|
|
322
|
-
if (!entry || entry.signature !== signature || now - entry.lastAt > ERROR_STREAK_TTL_MS) {
|
|
323
|
-
this.providerErrorStreaks.set(providerKey, { signature, count: 1, lastAt: now });
|
|
324
|
-
return 1;
|
|
325
|
-
}
|
|
326
|
-
const next = { signature, count: entry.count + 1, lastAt: now };
|
|
327
|
-
this.providerErrorStreaks.set(providerKey, next);
|
|
328
|
-
return next.count;
|
|
329
|
-
}
|
|
330
|
-
resetProviderErrorStreak(providerKey) {
|
|
331
|
-
if (providerKey) {
|
|
332
|
-
this.providerErrorStreaks.delete(providerKey);
|
|
333
|
-
}
|
|
334
|
-
}
|
|
335
108
|
validateConfig(config) {
|
|
336
109
|
if (!config.routing || typeof config.routing !== 'object') {
|
|
337
110
|
throw new VirtualRouterError('routing configuration is required', VirtualRouterErrorCode.CONFIG_ERROR);
|
|
@@ -358,30 +131,47 @@ export class VirtualRouterEngine {
|
|
|
358
131
|
}
|
|
359
132
|
}
|
|
360
133
|
}
|
|
361
|
-
selectProvider(requestedRoute, metadata, classification) {
|
|
362
|
-
const
|
|
363
|
-
const candidates = this.buildRouteCandidates(normalizedRoute, classification.candidates);
|
|
134
|
+
selectProvider(requestedRoute, metadata, classification, features) {
|
|
135
|
+
const candidates = this.buildRouteCandidates(requestedRoute, classification.candidates);
|
|
364
136
|
const stickyKey = this.resolveStickyKey(metadata);
|
|
365
137
|
const attempted = [];
|
|
366
|
-
|
|
138
|
+
const visitedRoutes = new Set();
|
|
139
|
+
const fallbackRoute = this.resolveFallbackRoute();
|
|
140
|
+
const routeQueue = this.initializeRouteQueue(candidates, fallbackRoute);
|
|
141
|
+
const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
|
|
142
|
+
? Math.max(0, features.estimatedTokens)
|
|
143
|
+
: 0;
|
|
144
|
+
while (routeQueue.length) {
|
|
145
|
+
const routeName = routeQueue.shift();
|
|
146
|
+
if (visitedRoutes.has(routeName)) {
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
367
149
|
const pool = this.routing[routeName];
|
|
368
150
|
if (!Array.isArray(pool) || pool.length === 0) {
|
|
151
|
+
visitedRoutes.add(routeName);
|
|
369
152
|
attempted.push(routeName);
|
|
370
153
|
continue;
|
|
371
154
|
}
|
|
372
|
-
const
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
155
|
+
const contextResult = this.contextAdvisor.classify(pool, estimatedTokens, (key) => this.providerRegistry.get(key));
|
|
156
|
+
if (this.maybeDeferToFallback(routeName, contextResult, routeQueue, visitedRoutes, fallbackRoute)) {
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
visitedRoutes.add(routeName);
|
|
160
|
+
const prioritizedPools = this.buildContextCandidatePools(contextResult);
|
|
161
|
+
for (const candidatePool of prioritizedPools) {
|
|
162
|
+
const providerKey = this.loadBalancer.select({
|
|
163
|
+
routeName,
|
|
164
|
+
candidates: candidatePool,
|
|
165
|
+
stickyKey,
|
|
166
|
+
availabilityCheck: (key) => this.healthManager.isAvailable(key)
|
|
167
|
+
});
|
|
168
|
+
if (providerKey) {
|
|
169
|
+
return { providerKey, routeUsed: routeName, pool };
|
|
170
|
+
}
|
|
380
171
|
}
|
|
381
|
-
attempted.push(routeName);
|
|
172
|
+
attempted.push(this.describeAttempt(routeName, contextResult));
|
|
382
173
|
}
|
|
383
|
-
|
|
384
|
-
throw new VirtualRouterError(`All providers unavailable for route ${failureRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: failureRoute, attempted });
|
|
174
|
+
throw new VirtualRouterError(`All providers unavailable for route ${requestedRoute}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { routeName: requestedRoute, attempted });
|
|
385
175
|
}
|
|
386
176
|
incrementRouteStat(routeName, providerKey) {
|
|
387
177
|
if (!this.routeStats.has(routeName)) {
|
|
@@ -395,6 +185,64 @@ export class VirtualRouterEngine {
|
|
|
395
185
|
providerHealthConfig() {
|
|
396
186
|
return this.healthManager.getConfig();
|
|
397
187
|
}
|
|
188
|
+
initializeRouteQueue(candidates, fallbackRoute) {
|
|
189
|
+
const queue = Array.from(new Set(candidates));
|
|
190
|
+
if (fallbackRoute && !queue.includes(fallbackRoute)) {
|
|
191
|
+
queue.push(fallbackRoute);
|
|
192
|
+
}
|
|
193
|
+
return queue;
|
|
194
|
+
}
|
|
195
|
+
resolveFallbackRoute() {
|
|
196
|
+
const candidate = this.contextRouting?.fallbackRoute;
|
|
197
|
+
if (!candidate) {
|
|
198
|
+
return undefined;
|
|
199
|
+
}
|
|
200
|
+
const pool = this.routing[candidate];
|
|
201
|
+
if (!Array.isArray(pool) || pool.length === 0) {
|
|
202
|
+
return undefined;
|
|
203
|
+
}
|
|
204
|
+
return candidate;
|
|
205
|
+
}
|
|
206
|
+
maybeDeferToFallback(routeName, contextResult, queue, visited, fallbackRoute) {
|
|
207
|
+
if (!fallbackRoute || fallbackRoute === routeName || visited.has(fallbackRoute)) {
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
if (!this.contextAdvisor.prefersFallback(contextResult)) {
|
|
211
|
+
return false;
|
|
212
|
+
}
|
|
213
|
+
const fallbackPool = this.routing[fallbackRoute];
|
|
214
|
+
if (!Array.isArray(fallbackPool) || fallbackPool.length === 0) {
|
|
215
|
+
return false;
|
|
216
|
+
}
|
|
217
|
+
queue.unshift(routeName);
|
|
218
|
+
queue.unshift(fallbackRoute);
|
|
219
|
+
return true;
|
|
220
|
+
}
|
|
221
|
+
buildContextCandidatePools(result) {
|
|
222
|
+
const ordered = [];
|
|
223
|
+
if (result.safe.length) {
|
|
224
|
+
ordered.push(result.safe);
|
|
225
|
+
}
|
|
226
|
+
if (result.risky.length) {
|
|
227
|
+
ordered.push(result.risky);
|
|
228
|
+
}
|
|
229
|
+
if (result.overflow.length && this.contextAdvisor.allowsOverflow()) {
|
|
230
|
+
ordered.push(result.overflow);
|
|
231
|
+
}
|
|
232
|
+
return ordered;
|
|
233
|
+
}
|
|
234
|
+
describeAttempt(routeName, result) {
|
|
235
|
+
if (result.safe.length > 0) {
|
|
236
|
+
return `${routeName}:health`;
|
|
237
|
+
}
|
|
238
|
+
if (result.risky.length > 0) {
|
|
239
|
+
return `${routeName}:context_risky`;
|
|
240
|
+
}
|
|
241
|
+
if (result.overflow.length > 0) {
|
|
242
|
+
return `${routeName}:context_overflow`;
|
|
243
|
+
}
|
|
244
|
+
return routeName;
|
|
245
|
+
}
|
|
398
246
|
resolveStickyKey(metadata) {
|
|
399
247
|
const resume = metadata.responsesResume;
|
|
400
248
|
if (resume && typeof resume.previousRequestId === 'string' && resume.previousRequestId.trim()) {
|
|
@@ -447,13 +295,6 @@ export class VirtualRouterEngine {
|
|
|
447
295
|
cooldownOverrideMs = Math.max(10 * 60_000, this.providerHealthConfig().fatalCooldownMs ?? 10 * 60_000);
|
|
448
296
|
reason = 'compatibility';
|
|
449
297
|
}
|
|
450
|
-
const signature = this.buildErrorSignature(code, statusCode, event.message);
|
|
451
|
-
const streak = this.bumpProviderErrorStreak(providerKey, signature);
|
|
452
|
-
if (streak >= ERROR_STREAK_THRESHOLD) {
|
|
453
|
-
fatal = true;
|
|
454
|
-
reason = reason === 'unknown' ? 'repeated_error' : `${reason}|repeated`;
|
|
455
|
-
cooldownOverrideMs = Math.max(this.providerHealthConfig().fatalCooldownMs ?? 5 * 60_000, 5 * 60_000);
|
|
456
|
-
}
|
|
457
298
|
return {
|
|
458
299
|
providerKey,
|
|
459
300
|
routeName,
|
|
@@ -517,59 +358,6 @@ export class VirtualRouterEngine {
|
|
|
517
358
|
}
|
|
518
359
|
return filtered.length ? filtered : [DEFAULT_ROUTE];
|
|
519
360
|
}
|
|
520
|
-
ensureConfiguredClassification(classification) {
|
|
521
|
-
const normalizedRoute = this.normalizeRouteName(classification.routeName);
|
|
522
|
-
const normalizedCandidates = this.normalizeCandidateList(normalizedRoute, classification.candidates);
|
|
523
|
-
const fallback = normalizedRoute === DEFAULT_ROUTE ? true : classification.fallback;
|
|
524
|
-
return {
|
|
525
|
-
...classification,
|
|
526
|
-
routeName: normalizedRoute,
|
|
527
|
-
fallback,
|
|
528
|
-
candidates: normalizedCandidates
|
|
529
|
-
};
|
|
530
|
-
}
|
|
531
|
-
normalizeCandidateList(primaryRoute, rawCandidates) {
|
|
532
|
-
const base = rawCandidates && rawCandidates.length ? rawCandidates : [primaryRoute];
|
|
533
|
-
const deduped = [];
|
|
534
|
-
for (const routeName of base) {
|
|
535
|
-
if (!routeName) {
|
|
536
|
-
continue;
|
|
537
|
-
}
|
|
538
|
-
if (!this.isRouteConfigured(routeName)) {
|
|
539
|
-
continue;
|
|
540
|
-
}
|
|
541
|
-
if (!deduped.includes(routeName)) {
|
|
542
|
-
deduped.push(routeName);
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
if (!deduped.includes(primaryRoute) && this.isRouteConfigured(primaryRoute)) {
|
|
546
|
-
deduped.push(primaryRoute);
|
|
547
|
-
}
|
|
548
|
-
if (!deduped.includes(DEFAULT_ROUTE) && this.isRouteConfigured(DEFAULT_ROUTE)) {
|
|
549
|
-
deduped.push(DEFAULT_ROUTE);
|
|
550
|
-
}
|
|
551
|
-
if (!deduped.length && this.isRouteConfigured(DEFAULT_ROUTE)) {
|
|
552
|
-
deduped.push(DEFAULT_ROUTE);
|
|
553
|
-
}
|
|
554
|
-
return this.sortByPriority(deduped);
|
|
555
|
-
}
|
|
556
|
-
normalizeRouteName(routeName) {
|
|
557
|
-
if (routeName && this.isRouteConfigured(routeName)) {
|
|
558
|
-
return routeName;
|
|
559
|
-
}
|
|
560
|
-
if (this.isRouteConfigured(DEFAULT_ROUTE)) {
|
|
561
|
-
return DEFAULT_ROUTE;
|
|
562
|
-
}
|
|
563
|
-
const firstConfigured = Object.keys(this.routing).find((key) => this.isRouteConfigured(key));
|
|
564
|
-
return firstConfigured || DEFAULT_ROUTE;
|
|
565
|
-
}
|
|
566
|
-
isRouteConfigured(routeName) {
|
|
567
|
-
if (!routeName) {
|
|
568
|
-
return false;
|
|
569
|
-
}
|
|
570
|
-
const pool = this.routing[routeName];
|
|
571
|
-
return Array.isArray(pool) && pool.length > 0;
|
|
572
|
-
}
|
|
573
361
|
sortByPriority(routeNames) {
|
|
574
362
|
return [...routeNames].sort((a, b) => this.routeWeight(a) - this.routeWeight(b));
|
|
575
363
|
}
|
|
@@ -577,89 +365,96 @@ export class VirtualRouterEngine {
|
|
|
577
365
|
const idx = ROUTE_PRIORITY.indexOf(routeName);
|
|
578
366
|
return idx >= 0 ? idx : ROUTE_PRIORITY.length;
|
|
579
367
|
}
|
|
580
|
-
buildHitReason(routeUsed, classification, features
|
|
368
|
+
buildHitReason(routeUsed, providerKey, classification, features) {
|
|
581
369
|
const reasoning = classification.reasoning || '';
|
|
582
370
|
const primary = reasoning.split('|')[0] || '';
|
|
583
|
-
const
|
|
584
|
-
const
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
return
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
371
|
+
const commandDetail = features.lastAssistantToolLabel;
|
|
372
|
+
const base = (() => {
|
|
373
|
+
if (routeUsed === 'tools') {
|
|
374
|
+
return this.decorateWithDetail(primary || 'tools', primary, commandDetail);
|
|
375
|
+
}
|
|
376
|
+
if (routeUsed === 'thinking') {
|
|
377
|
+
return this.decorateWithDetail(primary || 'thinking', primary, commandDetail);
|
|
378
|
+
}
|
|
379
|
+
if (routeUsed === 'coding') {
|
|
380
|
+
return this.decorateWithDetail(primary || 'coding', primary, commandDetail);
|
|
381
|
+
}
|
|
382
|
+
if (routeUsed === 'websearch') {
|
|
383
|
+
return this.decorateWithDetail(primary || 'websearch', primary, commandDetail);
|
|
384
|
+
}
|
|
385
|
+
if (routeUsed === DEFAULT_ROUTE && classification.fallback) {
|
|
386
|
+
return primary || 'fallback:default';
|
|
387
|
+
}
|
|
388
|
+
if (primary) {
|
|
389
|
+
return primary;
|
|
390
|
+
}
|
|
391
|
+
return routeUsed ? `route:${routeUsed}` : 'route:unknown';
|
|
392
|
+
})();
|
|
393
|
+
const contextDetail = this.describeContextUsage(providerKey, features.estimatedTokens);
|
|
394
|
+
if (contextDetail) {
|
|
395
|
+
return `${base}|context:${contextDetail}`;
|
|
396
|
+
}
|
|
397
|
+
return base;
|
|
398
|
+
}
|
|
399
|
+
decorateWithDetail(baseLabel, primaryReason, detail) {
|
|
400
|
+
const normalizedDetail = detail && detail.trim();
|
|
401
|
+
if (!normalizedDetail) {
|
|
402
|
+
return primaryReason || baseLabel;
|
|
403
|
+
}
|
|
404
|
+
if (primaryReason) {
|
|
405
|
+
return `${primaryReason}(${normalizedDetail})`;
|
|
406
|
+
}
|
|
407
|
+
return `${baseLabel}(${normalizedDetail})`;
|
|
408
|
+
}
|
|
409
|
+
formatVirtualRouterHit(routeName, providerKey, modelId, hitReason) {
|
|
410
|
+
try {
|
|
411
|
+
const prefixColor = '\x1b[38;5;208m';
|
|
412
|
+
const reset = '\x1b[0m';
|
|
413
|
+
const routeColor = this.resolveRouteColor(routeName);
|
|
414
|
+
const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
|
|
415
|
+
const targetLabel = `${routeName} -> ${providerKey}${modelId ? '.' + modelId : ''}`;
|
|
416
|
+
const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
|
|
417
|
+
return `${prefix} ${routeColor}${targetLabel}${reasonLabel}${reset}`;
|
|
418
|
+
}
|
|
419
|
+
catch {
|
|
420
|
+
return `[virtual-router-hit] ${routeName} -> ${providerKey}${modelId ? '.' + modelId : ''}${hitReason ? ` reason=${hitReason}` : ''}`;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
resolveRouteColor(routeName) {
|
|
424
|
+
const reset = '\x1b[0m';
|
|
425
|
+
const map = {
|
|
426
|
+
tools: '\x1b[38;5;214m',
|
|
427
|
+
thinking: '\x1b[34m',
|
|
428
|
+
coding: '\x1b[35m',
|
|
429
|
+
longcontext: '\x1b[38;5;141m',
|
|
430
|
+
websearch: '\x1b[32m',
|
|
431
|
+
vision: '\x1b[38;5;207m',
|
|
432
|
+
background: '\x1b[90m'
|
|
433
|
+
};
|
|
434
|
+
return map[routeName] ?? '\x1b[36m';
|
|
640
435
|
}
|
|
641
|
-
|
|
642
|
-
if (!
|
|
643
|
-
return
|
|
436
|
+
describeContextUsage(providerKey, estimatedTokens) {
|
|
437
|
+
if (typeof estimatedTokens !== 'number' || !Number.isFinite(estimatedTokens) || estimatedTokens <= 0) {
|
|
438
|
+
return undefined;
|
|
644
439
|
}
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
440
|
+
let limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
441
|
+
try {
|
|
442
|
+
const profile = this.providerRegistry.get(providerKey);
|
|
443
|
+
if (profile?.maxContextTokens && Number.isFinite(profile.maxContextTokens)) {
|
|
444
|
+
limit = profile.maxContextTokens;
|
|
445
|
+
}
|
|
650
446
|
}
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
return false;
|
|
447
|
+
catch {
|
|
448
|
+
limit = DEFAULT_MODEL_CONTEXT_TOKENS;
|
|
654
449
|
}
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
return false;
|
|
450
|
+
if (!limit || limit <= 0) {
|
|
451
|
+
return undefined;
|
|
658
452
|
}
|
|
659
|
-
const
|
|
660
|
-
|
|
661
|
-
|
|
453
|
+
const ratio = estimatedTokens / limit;
|
|
454
|
+
const threshold = this.contextRouting?.warnRatio ?? 0.9;
|
|
455
|
+
if (ratio < threshold) {
|
|
456
|
+
return undefined;
|
|
662
457
|
}
|
|
663
|
-
return
|
|
458
|
+
return `${ratio.toFixed(2)}/${Math.round(limit)}`;
|
|
664
459
|
}
|
|
665
460
|
}
|