@jsonstudio/llms 0.6.567 → 0.6.568
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.js +33 -4
- package/dist/conversion/codecs/openai-openai-codec.js +2 -1
- package/dist/conversion/codecs/responses-openai-codec.js +3 -2
- package/dist/conversion/compat/actions/glm-history-image-trim.d.ts +2 -0
- package/dist/conversion/compat/actions/glm-history-image-trim.js +88 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +6 -1
- package/dist/conversion/hub/pipeline/hub-pipeline.js +25 -13
- package/dist/conversion/hub/process/chat-process.js +65 -11
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +16 -3
- package/dist/conversion/hub/semantic-mappers/responses-mapper.js +51 -2
- package/dist/conversion/hub/types/chat-envelope.d.ts +1 -0
- package/dist/conversion/shared/anthropic-message-utils.js +54 -0
- package/dist/conversion/shared/args-mapping.js +11 -3
- package/dist/conversion/shared/responses-output-builder.js +42 -21
- package/dist/conversion/shared/streaming-text-extractor.d.ts +25 -0
- package/dist/conversion/shared/streaming-text-extractor.js +31 -38
- package/dist/conversion/shared/text-markup-normalizer.js +42 -27
- package/dist/conversion/shared/tool-filter-pipeline.js +2 -1
- package/dist/conversion/shared/tool-harvester.js +43 -12
- package/dist/conversion/shared/tool-mapping.d.ts +1 -0
- package/dist/conversion/shared/tool-mapping.js +33 -19
- package/dist/filters/index.d.ts +1 -0
- package/dist/filters/index.js +1 -0
- package/dist/filters/special/request-tools-normalize.js +14 -4
- package/dist/filters/special/response-apply-patch-toon-decode.d.ts +23 -0
- package/dist/filters/special/response-apply-patch-toon-decode.js +109 -0
- package/dist/filters/special/response-tool-arguments-toon-decode.d.ts +10 -0
- package/dist/filters/special/response-tool-arguments-toon-decode.js +55 -13
- package/dist/guidance/index.js +69 -42
- package/dist/router/virtual-router/bootstrap.js +10 -5
- package/dist/router/virtual-router/classifier.js +9 -4
- package/dist/router/virtual-router/engine-health.d.ts +11 -0
- package/dist/router/virtual-router/engine-health.js +217 -4
- package/dist/router/virtual-router/engine-logging.d.ts +2 -1
- package/dist/router/virtual-router/engine-logging.js +35 -3
- package/dist/router/virtual-router/engine.d.ts +17 -1
- package/dist/router/virtual-router/engine.js +154 -6
- package/dist/router/virtual-router/routing-instructions.d.ts +2 -0
- package/dist/router/virtual-router/routing-instructions.js +19 -1
- package/dist/router/virtual-router/tool-signals.js +57 -11
- package/dist/router/virtual-router/types.d.ts +30 -0
- package/dist/router/virtual-router/types.js +1 -1
- package/dist/servertool/engine.js +3 -0
- package/dist/servertool/handlers/iflow-model-error-retry.d.ts +1 -0
- package/dist/servertool/handlers/iflow-model-error-retry.js +93 -0
- package/dist/servertool/handlers/stop-message-auto.js +61 -4
- package/dist/servertool/server-side-tools.d.ts +1 -0
- package/dist/servertool/server-side-tools.js +27 -0
- package/dist/sse/sse-to-json/builders/anthropic-response-builder.js +16 -0
- package/dist/tools/apply-patch-structured.d.ts +20 -0
- package/dist/tools/apply-patch-structured.js +239 -0
- package/dist/tools/tool-description-utils.d.ts +5 -0
- package/dist/tools/tool-description-utils.js +50 -0
- package/dist/tools/tool-registry.js +11 -193
- package/package.json +2 -2
|
@@ -1,4 +1,105 @@
|
|
|
1
1
|
const SERIES_COOLDOWN_DETAIL_KEY = 'virtualRouterSeriesCooldown';
|
|
2
|
+
const QUOTA_RECOVERY_DETAIL_KEY = 'virtualRouterQuotaRecovery';
|
|
3
|
+
const QUOTA_DEPLETED_DETAIL_KEY = 'virtualRouterQuotaDepleted';
|
|
4
|
+
function parseDurationToMs(value) {
|
|
5
|
+
if (!value || typeof value !== 'string') {
|
|
6
|
+
return null;
|
|
7
|
+
}
|
|
8
|
+
const pattern = /(\d+(?:\.\d+)?)(ms|s|m|h)/gi;
|
|
9
|
+
let totalMs = 0;
|
|
10
|
+
let matched = false;
|
|
11
|
+
let match;
|
|
12
|
+
while ((match = pattern.exec(value)) !== null) {
|
|
13
|
+
matched = true;
|
|
14
|
+
const amount = Number.parseFloat(match[1]);
|
|
15
|
+
if (!Number.isFinite(amount)) {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
const unit = match[2].toLowerCase();
|
|
19
|
+
if (unit === 'ms') {
|
|
20
|
+
totalMs += amount;
|
|
21
|
+
}
|
|
22
|
+
else if (unit === 'h') {
|
|
23
|
+
totalMs += amount * 3_600_000;
|
|
24
|
+
}
|
|
25
|
+
else if (unit === 'm') {
|
|
26
|
+
totalMs += amount * 60_000;
|
|
27
|
+
}
|
|
28
|
+
else if (unit === 's') {
|
|
29
|
+
totalMs += amount * 1_000;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (!matched) {
|
|
33
|
+
const seconds = Number.parseFloat(value);
|
|
34
|
+
if (Number.isFinite(seconds)) {
|
|
35
|
+
totalMs = seconds * 1_000;
|
|
36
|
+
matched = true;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (!matched || totalMs <= 0) {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
return Math.round(totalMs);
|
|
43
|
+
}
|
|
44
|
+
function readEnvSchedule(name, fallback) {
|
|
45
|
+
const raw = (process.env[name] || '').trim();
|
|
46
|
+
if (!raw) {
|
|
47
|
+
return fallback;
|
|
48
|
+
}
|
|
49
|
+
const parts = raw.split(',').map((token) => token.trim()).filter(Boolean);
|
|
50
|
+
const parsed = [];
|
|
51
|
+
for (const part of parts) {
|
|
52
|
+
const ms = parseDurationToMs(part);
|
|
53
|
+
if (ms && ms > 0) {
|
|
54
|
+
parsed.push(ms);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return parsed.length ? parsed : fallback;
|
|
58
|
+
}
|
|
59
|
+
function readEnvDuration(name, fallbackMs) {
|
|
60
|
+
const raw = (process.env[name] || '').trim();
|
|
61
|
+
if (!raw) {
|
|
62
|
+
return fallbackMs;
|
|
63
|
+
}
|
|
64
|
+
const ms = parseDurationToMs(raw);
|
|
65
|
+
return ms && ms > 0 ? ms : fallbackMs;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* 对没有 quotaResetDelay 的 429 错误,在 VirtualRouter 内部维护一个简单的阶梯退避策略:
|
|
69
|
+
* - 默认:第 1 次 5 分钟,第 2 次 1 小时,第 3 次 6 小时,第 4 次及以上 24 小时封顶;
|
|
70
|
+
* - 可通过环境变量 ROUTECODEX_RL_SCHEDULE / RCC_RL_SCHEDULE 调整(例如 "5m,1h,6h,24h")。
|
|
71
|
+
*
|
|
72
|
+
* 这里的“次数”针对 providerKey 计数,并带有简单的时间窗口:若距离上次 429 超过 24 小时,则重置计数。
|
|
73
|
+
* 该状态仅用于路由决策,不反映在 healthConfig 上,使 Host 与 VirtualRouter 对 429 处理职责清晰分层。
|
|
74
|
+
*/
|
|
75
|
+
const NO_QUOTA_RATE_LIMIT_SCHEDULE_MS = readEnvSchedule('ROUTECODEX_RL_SCHEDULE', [
|
|
76
|
+
5 * 60_000,
|
|
77
|
+
60 * 60_000,
|
|
78
|
+
6 * 60 * 60_000,
|
|
79
|
+
24 * 60 * 60_000
|
|
80
|
+
]);
|
|
81
|
+
const rateLimitBackoffByProvider = new Map();
|
|
82
|
+
const RATE_LIMIT_RESET_WINDOW_MS = readEnvDuration('ROUTECODEX_RL_RESET_WINDOW', 24 * 60 * 60_000);
|
|
83
|
+
function computeRateLimitCooldownMsForProvider(providerKey, now) {
|
|
84
|
+
const prev = rateLimitBackoffByProvider.get(providerKey);
|
|
85
|
+
let nextCount = 1;
|
|
86
|
+
if (prev) {
|
|
87
|
+
const elapsed = now - prev.lastAt;
|
|
88
|
+
if (Number.isFinite(elapsed) && elapsed >= 0 && elapsed < RATE_LIMIT_RESET_WINDOW_MS) {
|
|
89
|
+
nextCount = prev.count + 1;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const idx = Math.min(nextCount - 1, NO_QUOTA_RATE_LIMIT_SCHEDULE_MS.length - 1);
|
|
93
|
+
const ttl = NO_QUOTA_RATE_LIMIT_SCHEDULE_MS[idx];
|
|
94
|
+
rateLimitBackoffByProvider.set(providerKey, { count: nextCount, lastAt: now });
|
|
95
|
+
return ttl;
|
|
96
|
+
}
|
|
97
|
+
export function resetRateLimitBackoffForProvider(providerKey) {
|
|
98
|
+
if (!providerKey) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
rateLimitBackoffByProvider.delete(providerKey);
|
|
102
|
+
}
|
|
2
103
|
export function handleProviderFailureImpl(event, healthManager, healthConfig, markProviderCooldown) {
|
|
3
104
|
if (!event || !event.providerKey) {
|
|
4
105
|
return;
|
|
@@ -10,9 +111,17 @@ export function handleProviderFailureImpl(event, healthManager, healthConfig, ma
|
|
|
10
111
|
healthManager.tripProvider(event.providerKey, event.reason, event.cooldownOverrideMs);
|
|
11
112
|
}
|
|
12
113
|
else if (event.reason === 'rate_limit' && event.statusCode === 429) {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
114
|
+
// 对非致命的 429 错误:
|
|
115
|
+
// - 若 ProviderErrorEvent 已携带显式 cooldownOverrideMs(例如来自 quotaResetDelay),则直接使用;
|
|
116
|
+
// - 否则针对该 providerKey 启用阶梯退避策略(5min → 1h → 6h → 24h),
|
|
117
|
+
// 在冷却期内从路由池中移除该 alias,避免持续命中上游。
|
|
118
|
+
const providerKey = event.providerKey;
|
|
119
|
+
let ttl = event.cooldownOverrideMs;
|
|
120
|
+
if (!ttl || !Number.isFinite(ttl) || ttl <= 0) {
|
|
121
|
+
ttl = computeRateLimitCooldownMsForProvider(providerKey, Date.now());
|
|
122
|
+
}
|
|
123
|
+
healthManager.cooldownProvider(providerKey, event.reason, ttl);
|
|
124
|
+
markProviderCooldown(providerKey, ttl);
|
|
16
125
|
}
|
|
17
126
|
else {
|
|
18
127
|
healthManager.recordFailure(event.providerKey, event.reason);
|
|
@@ -35,10 +144,20 @@ export function mapProviderErrorImpl(event, healthConfig) {
|
|
|
35
144
|
const code = event.code?.toUpperCase() ?? 'ERR_UNKNOWN';
|
|
36
145
|
const stage = event.stage?.toLowerCase() ?? 'unknown';
|
|
37
146
|
const recoverable = event.recoverable === true;
|
|
147
|
+
const providerFamily = runtime.providerFamily &&
|
|
148
|
+
typeof runtime.providerFamily === 'string'
|
|
149
|
+
? runtime.providerFamily
|
|
150
|
+
: undefined;
|
|
151
|
+
const providerId = runtime.providerId &&
|
|
152
|
+
typeof runtime.providerId === 'string'
|
|
153
|
+
? runtime.providerId
|
|
154
|
+
: undefined;
|
|
155
|
+
const providerTag = (providerFamily || providerId || '').toLowerCase();
|
|
156
|
+
const isOAuthAuth406 = statusCode === 406 && (providerTag === 'iflow' || providerTag === 'qwen');
|
|
38
157
|
let fatal = !recoverable;
|
|
39
158
|
let reason = deriveReason(code, stage, statusCode);
|
|
40
159
|
let cooldownOverrideMs;
|
|
41
|
-
if (statusCode === 401 || statusCode === 402 || statusCode === 403 || code.includes('AUTH')) {
|
|
160
|
+
if (statusCode === 401 || statusCode === 402 || statusCode === 403 || code.includes('AUTH') || isOAuthAuth406) {
|
|
42
161
|
fatal = true;
|
|
43
162
|
cooldownOverrideMs = Math.max(10 * 60_000, healthConfig.fatalCooldownMs ?? 10 * 60_000);
|
|
44
163
|
reason = 'auth';
|
|
@@ -117,6 +236,100 @@ export function applySeriesCooldownImpl(event, providerRegistry, healthManager,
|
|
|
117
236
|
});
|
|
118
237
|
}
|
|
119
238
|
}
|
|
239
|
+
function extractQuotaRecoveryDetail(event) {
|
|
240
|
+
if (!event || !event.details || typeof event.details !== 'object') {
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
const raw = event.details[QUOTA_RECOVERY_DETAIL_KEY];
|
|
244
|
+
if (!raw || typeof raw !== 'object') {
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
const record = raw;
|
|
248
|
+
const providerKeyRaw = record.providerKey;
|
|
249
|
+
if (typeof providerKeyRaw !== 'string' || !providerKeyRaw.trim()) {
|
|
250
|
+
return null;
|
|
251
|
+
}
|
|
252
|
+
const reason = typeof record.reason === 'string' && record.reason.trim()
|
|
253
|
+
? record.reason.trim()
|
|
254
|
+
: undefined;
|
|
255
|
+
return {
|
|
256
|
+
providerKey: providerKeyRaw.trim(),
|
|
257
|
+
reason
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* 处理来自 Host 侧的配额恢复事件:
|
|
262
|
+
* - 清除指定 providerKey 在健康管理器中的熔断/冷却状态;
|
|
263
|
+
* - 清理对应的速率退避计数;
|
|
264
|
+
* - 调用调用方提供的 clearProviderCooldown 回调移除显式 cooldown TTL。
|
|
265
|
+
*
|
|
266
|
+
* 返回值表示是否已处理(true=已处理且后续应跳过常规错误映射逻辑)。
|
|
267
|
+
*/
|
|
268
|
+
export function applyQuotaRecoveryImpl(event, healthManager, clearProviderCooldown, debug) {
|
|
269
|
+
const detail = extractQuotaRecoveryDetail(event);
|
|
270
|
+
if (!detail) {
|
|
271
|
+
return false;
|
|
272
|
+
}
|
|
273
|
+
const providerKey = detail.providerKey;
|
|
274
|
+
try {
|
|
275
|
+
healthManager.recordSuccess(providerKey);
|
|
276
|
+
resetRateLimitBackoffForProvider(providerKey);
|
|
277
|
+
clearProviderCooldown(providerKey);
|
|
278
|
+
debug?.log?.('[virtual-router] quota recovery', {
|
|
279
|
+
providerKey,
|
|
280
|
+
reason: detail.reason
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
catch {
|
|
284
|
+
// 恢复失败不得影响主路由流程
|
|
285
|
+
}
|
|
286
|
+
return true;
|
|
287
|
+
}
|
|
288
|
+
function extractQuotaDepletedDetail(event) {
|
|
289
|
+
if (!event || !event.details || typeof event.details !== 'object') {
|
|
290
|
+
return null;
|
|
291
|
+
}
|
|
292
|
+
const raw = event.details[QUOTA_DEPLETED_DETAIL_KEY];
|
|
293
|
+
if (!raw || typeof raw !== 'object') {
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
const record = raw;
|
|
297
|
+
const providerKeyRaw = record.providerKey;
|
|
298
|
+
if (typeof providerKeyRaw !== 'string' || !providerKeyRaw.trim()) {
|
|
299
|
+
return null;
|
|
300
|
+
}
|
|
301
|
+
const cooldownMs = typeof record.cooldownMs === 'number' && Number.isFinite(record.cooldownMs) && record.cooldownMs > 0
|
|
302
|
+
? record.cooldownMs
|
|
303
|
+
: undefined;
|
|
304
|
+
const reason = typeof record.reason === 'string' && record.reason.trim()
|
|
305
|
+
? record.reason.trim()
|
|
306
|
+
: undefined;
|
|
307
|
+
return {
|
|
308
|
+
providerKey: providerKeyRaw.trim(),
|
|
309
|
+
cooldownMs,
|
|
310
|
+
reason
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
export function applyQuotaDepletedImpl(event, healthManager, markProviderCooldown, debug) {
|
|
314
|
+
const detail = extractQuotaDepletedDetail(event);
|
|
315
|
+
if (!detail) {
|
|
316
|
+
return false;
|
|
317
|
+
}
|
|
318
|
+
const ttl = detail.cooldownMs;
|
|
319
|
+
try {
|
|
320
|
+
healthManager.cooldownProvider(detail.providerKey, 'rate_limit', ttl);
|
|
321
|
+
markProviderCooldown(detail.providerKey, ttl);
|
|
322
|
+
debug?.log?.('[virtual-router] quota depleted', {
|
|
323
|
+
providerKey: detail.providerKey,
|
|
324
|
+
cooldownMs: ttl,
|
|
325
|
+
reason: detail.reason
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
catch {
|
|
329
|
+
// ignore failures
|
|
330
|
+
}
|
|
331
|
+
return true;
|
|
332
|
+
}
|
|
120
333
|
function resolveSeriesCooldownTargets(detail, event, providerRegistry) {
|
|
121
334
|
const candidates = new Set();
|
|
122
335
|
const push = (key) => {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { type ClassificationResult, type RoutingFeatures, type RoutingInstructionMode, type VirtualRouterContextRoutingConfig } from './types.js';
|
|
2
2
|
import { ProviderRegistry } from './provider-registry.js';
|
|
3
|
+
import type { RoutingInstructionState } from './routing-instructions.js';
|
|
3
4
|
type LoggingDeps = {
|
|
4
5
|
providerRegistry: ProviderRegistry;
|
|
5
6
|
contextRouting: VirtualRouterContextRoutingConfig | undefined;
|
|
@@ -15,5 +16,5 @@ export declare function describeTargetProvider(providerKey: string, fallbackMode
|
|
|
15
16
|
resolvedModel?: string;
|
|
16
17
|
};
|
|
17
18
|
export declare function buildHitReason(routeUsed: string, providerKey: string, classification: ClassificationResult, features: RoutingFeatures, mode: RoutingInstructionMode | undefined, deps: LoggingDeps): string;
|
|
18
|
-
export declare function formatVirtualRouterHit(routeName: string, poolId: string | undefined, providerKey: string, modelId?: string, hitReason?: string, stickyScope?: string): string;
|
|
19
|
+
export declare function formatVirtualRouterHit(routeName: string, poolId: string | undefined, providerKey: string, modelId?: string, hitReason?: string, stickyScope?: string, routingState?: RoutingInstructionState): string;
|
|
19
20
|
export {};
|
|
@@ -132,7 +132,7 @@ export function buildHitReason(routeUsed, providerKey, classification, features,
|
|
|
132
132
|
}
|
|
133
133
|
return base;
|
|
134
134
|
}
|
|
135
|
-
export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason, stickyScope) {
|
|
135
|
+
export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason, stickyScope, routingState) {
|
|
136
136
|
try {
|
|
137
137
|
const now = new Date();
|
|
138
138
|
const hours = String(now.getHours()).padStart(2, '0');
|
|
@@ -144,6 +144,7 @@ export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId,
|
|
|
144
144
|
const timeColor = '\x1b[90m';
|
|
145
145
|
const stickyColor = '\x1b[33m';
|
|
146
146
|
const routeColor = resolveRouteColor(routeName);
|
|
147
|
+
const stopColor = '\x1b[38;5;214m';
|
|
147
148
|
const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
|
|
148
149
|
const timeLabel = `${timeColor}${timestamp}${reset}`;
|
|
149
150
|
const { providerLabel, resolvedModel } = describeTargetProvider(providerKey, modelId);
|
|
@@ -152,7 +153,29 @@ export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId,
|
|
|
152
153
|
const stickyText = formatStickyScope(stickyScope);
|
|
153
154
|
const stickyLabel = stickyText ? ` ${stickyColor}[sticky:${stickyText}]${reset}` : '';
|
|
154
155
|
const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
|
|
155
|
-
|
|
156
|
+
let stopLabel = '';
|
|
157
|
+
if (routingState?.stopMessageText && typeof routingState.stopMessageMaxRepeats === 'number') {
|
|
158
|
+
const text = routingState.stopMessageText;
|
|
159
|
+
const safeText = text.length > 24 ? `${text.slice(0, 21)}…` : text;
|
|
160
|
+
const used = typeof routingState.stopMessageUsed === 'number' && Number.isFinite(routingState.stopMessageUsed)
|
|
161
|
+
? routingState.stopMessageUsed
|
|
162
|
+
: 0;
|
|
163
|
+
const updatedAt = typeof routingState.stopMessageUpdatedAt === 'number' && Number.isFinite(routingState.stopMessageUpdatedAt)
|
|
164
|
+
? routingState.stopMessageUpdatedAt
|
|
165
|
+
: undefined;
|
|
166
|
+
const lastUsedAt = typeof routingState.stopMessageLastUsedAt === 'number' && Number.isFinite(routingState.stopMessageLastUsedAt)
|
|
167
|
+
? routingState.stopMessageLastUsedAt
|
|
168
|
+
: undefined;
|
|
169
|
+
const parts = [`"${safeText}"`, `${used}/${Math.floor(routingState.stopMessageMaxRepeats)}`];
|
|
170
|
+
if (updatedAt) {
|
|
171
|
+
parts.push(`set=${new Date(updatedAt).toLocaleString(undefined, { hour12: false })}`);
|
|
172
|
+
}
|
|
173
|
+
if (lastUsedAt) {
|
|
174
|
+
parts.push(`last=${new Date(lastUsedAt).toLocaleString(undefined, { hour12: false })}`);
|
|
175
|
+
}
|
|
176
|
+
stopLabel = ` ${stopColor}[stopMessage:${parts.join(' ')}]${reset}`;
|
|
177
|
+
}
|
|
178
|
+
return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${stickyLabel}${reasonLabel}${stopLabel}${reset}`;
|
|
156
179
|
}
|
|
157
180
|
catch {
|
|
158
181
|
const now = new Date();
|
|
@@ -160,6 +183,15 @@ export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId,
|
|
|
160
183
|
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
161
184
|
const stickyText = formatStickyScope(stickyScope);
|
|
162
185
|
const stickyLabel = stickyText ? ` [sticky:${stickyText}]` : '';
|
|
163
|
-
|
|
186
|
+
let stopLabel = '';
|
|
187
|
+
if (routingState?.stopMessageText && typeof routingState.stopMessageMaxRepeats === 'number') {
|
|
188
|
+
const text = routingState.stopMessageText;
|
|
189
|
+
const safeText = text.length > 24 ? `${text.slice(0, 21)}…` : text;
|
|
190
|
+
const used = typeof routingState.stopMessageUsed === 'number' && Number.isFinite(routingState.stopMessageUsed)
|
|
191
|
+
? routingState.stopMessageUsed
|
|
192
|
+
: 0;
|
|
193
|
+
stopLabel = ` [stopMessage:"${safeText}" ${used}/${Math.floor(routingState.stopMessageMaxRepeats)}]`;
|
|
194
|
+
}
|
|
195
|
+
return `[virtual-router-hit] ${timestamp} ${routeLabel} -> ${providerKey}${modelId ? '.' + modelId : ''}${stickyLabel}${hitReason ? ` reason=${hitReason}` : ''}${stopLabel}`;
|
|
164
196
|
}
|
|
165
197
|
}
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
-
import { type RoutingDecision, type RoutingDiagnostics, type RouterMetadataInput, type VirtualRouterConfig, type TargetMetadata, type ProviderFailureEvent, type ProviderErrorEvent } from './types.js';
|
|
1
|
+
import { type RoutingDecision, type RoutingDiagnostics, type RouterMetadataInput, type VirtualRouterConfig, type TargetMetadata, type ProviderFailureEvent, type ProviderErrorEvent, type VirtualRouterHealthStore } from './types.js';
|
|
2
2
|
import type { ProcessedRequest, StandardizedRequest } from '../../conversion/hub/types/standardized.js';
|
|
3
|
+
import { type RoutingInstructionState } from './routing-instructions.js';
|
|
4
|
+
interface RoutingInstructionStateStore {
|
|
5
|
+
loadSync(key: string): RoutingInstructionState | null;
|
|
6
|
+
saveAsync(key: string, state: RoutingInstructionState | null): void;
|
|
7
|
+
}
|
|
3
8
|
export declare class VirtualRouterEngine {
|
|
4
9
|
private routing;
|
|
5
10
|
private readonly providerRegistry;
|
|
@@ -14,7 +19,13 @@ export declare class VirtualRouterEngine {
|
|
|
14
19
|
private healthConfig;
|
|
15
20
|
private readonly statsCenter;
|
|
16
21
|
private webSearchForce;
|
|
22
|
+
private healthStore?;
|
|
23
|
+
private routingStateStore;
|
|
17
24
|
private routingInstructionState;
|
|
25
|
+
constructor(deps?: {
|
|
26
|
+
healthStore?: VirtualRouterHealthStore;
|
|
27
|
+
routingStateStore?: RoutingInstructionStateStore;
|
|
28
|
+
});
|
|
18
29
|
initialize(config: VirtualRouterConfig): void;
|
|
19
30
|
route(request: StandardizedRequest | ProcessedRequest, metadata: RouterMetadataInput): {
|
|
20
31
|
target: TargetMetadata;
|
|
@@ -82,5 +93,10 @@ export declare class VirtualRouterEngine {
|
|
|
82
93
|
private isRoutingStateEmpty;
|
|
83
94
|
private persistRoutingInstructionState;
|
|
84
95
|
private markProviderCooldown;
|
|
96
|
+
private clearProviderCooldown;
|
|
85
97
|
private isProviderCoolingDown;
|
|
98
|
+
private restoreHealthFromStore;
|
|
99
|
+
private buildHealthSnapshot;
|
|
100
|
+
private persistHealthSnapshot;
|
|
86
101
|
}
|
|
102
|
+
export {};
|
|
@@ -10,7 +10,7 @@ import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRo
|
|
|
10
10
|
import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
|
|
11
11
|
import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
|
|
12
12
|
import { selectProviderImpl } from './engine-selection.js';
|
|
13
|
-
import { applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
|
|
13
|
+
import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
|
|
14
14
|
export class VirtualRouterEngine {
|
|
15
15
|
routing = {};
|
|
16
16
|
providerRegistry = new ProviderRegistry();
|
|
@@ -26,7 +26,20 @@ export class VirtualRouterEngine {
|
|
|
26
26
|
statsCenter = getStatsCenter();
|
|
27
27
|
// Derived flags from VirtualRouterConfig/routing used by process / response layers.
|
|
28
28
|
webSearchForce = false;
|
|
29
|
+
healthStore;
|
|
30
|
+
routingStateStore = {
|
|
31
|
+
loadSync: loadRoutingInstructionStateSync,
|
|
32
|
+
saveAsync: saveRoutingInstructionStateAsync
|
|
33
|
+
};
|
|
29
34
|
routingInstructionState = new Map();
|
|
35
|
+
constructor(deps) {
|
|
36
|
+
if (deps?.healthStore) {
|
|
37
|
+
this.healthStore = deps.healthStore;
|
|
38
|
+
}
|
|
39
|
+
if (deps?.routingStateStore) {
|
|
40
|
+
this.routingStateStore = deps.routingStateStore;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
30
43
|
initialize(config) {
|
|
31
44
|
this.validateConfig(config);
|
|
32
45
|
this.routing = config.routing;
|
|
@@ -34,6 +47,8 @@ export class VirtualRouterEngine {
|
|
|
34
47
|
this.healthManager.configure(config.health);
|
|
35
48
|
this.healthConfig = config.health ?? null;
|
|
36
49
|
this.healthManager.registerProviders(Object.keys(config.providers));
|
|
50
|
+
this.providerCooldowns.clear();
|
|
51
|
+
this.restoreHealthFromStore();
|
|
37
52
|
this.loadBalancer = new RouteLoadBalancer(config.loadBalancing);
|
|
38
53
|
this.classifier = new RoutingClassifier(config.classifier);
|
|
39
54
|
this.contextRouting = config.contextRouting ?? { warnRatio: 0.9, hardLimit: false };
|
|
@@ -52,6 +67,15 @@ export class VirtualRouterEngine {
|
|
|
52
67
|
if (metadataInstructions.length > 0) {
|
|
53
68
|
routingState = applyRoutingInstructions(metadataInstructions, routingState);
|
|
54
69
|
}
|
|
70
|
+
const disableStickyRoutes = metadata &&
|
|
71
|
+
typeof metadata === 'object' &&
|
|
72
|
+
metadata.disableStickyRoutes === true;
|
|
73
|
+
if (disableStickyRoutes && routingState.stickyTarget) {
|
|
74
|
+
routingState = {
|
|
75
|
+
...routingState,
|
|
76
|
+
stickyTarget: undefined
|
|
77
|
+
};
|
|
78
|
+
}
|
|
55
79
|
const instructions = parseRoutingInstructions(request.messages);
|
|
56
80
|
if (instructions.length > 0) {
|
|
57
81
|
routingState = applyRoutingInstructions(instructions, routingState);
|
|
@@ -98,7 +122,7 @@ export class VirtualRouterEngine {
|
|
|
98
122
|
const hitReason = buildHitReason(selection.routeUsed, selection.providerKey, classification, features, routingMode, { providerRegistry: this.providerRegistry, contextRouting: this.contextRouting });
|
|
99
123
|
const stickyScope = routingMode !== 'none' ? this.resolveSessionScope(metadata) : undefined;
|
|
100
124
|
const routeForLog = routingMode === 'sticky' ? 'sticky' : selection.routeUsed;
|
|
101
|
-
const formatted = formatVirtualRouterHit(routeForLog, selection.poolId, selection.providerKey, target.modelId || '', hitReason, stickyScope);
|
|
125
|
+
const formatted = formatVirtualRouterHit(routeForLog, selection.poolId, selection.providerKey, target.modelId || '', hitReason, stickyScope, routingState);
|
|
102
126
|
if (formatted) {
|
|
103
127
|
this.debug?.log?.(formatted);
|
|
104
128
|
}
|
|
@@ -132,6 +156,24 @@ export class VirtualRouterEngine {
|
|
|
132
156
|
handleProviderFailureImpl(event, this.healthManager, this.providerHealthConfig(), (key, ttl) => this.markProviderCooldown(key, ttl));
|
|
133
157
|
}
|
|
134
158
|
handleProviderError(event) {
|
|
159
|
+
if (this.healthStore && typeof this.healthStore.recordProviderError === 'function') {
|
|
160
|
+
try {
|
|
161
|
+
this.healthStore.recordProviderError(event);
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
// ignore persistence errors
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// 配额恢复事件优先处理:一旦识别到 virtualRouterQuotaRecovery,
|
|
168
|
+
// 直接清理健康状态/冷却 TTL,避免继续走常规错误映射逻辑。
|
|
169
|
+
const handledByQuota = applyQuotaRecoveryImpl(event, this.healthManager, (key) => this.clearProviderCooldown(key), this.debug);
|
|
170
|
+
if (handledByQuota) {
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
const handledByQuotaDepleted = applyQuotaDepletedImpl(event, this.healthManager, (key, ttl) => this.markProviderCooldown(key, ttl), this.debug);
|
|
174
|
+
if (handledByQuotaDepleted) {
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
135
177
|
applySeriesCooldownImpl(event, this.providerRegistry, this.healthManager, (key, ttl) => this.markProviderCooldown(key, ttl), this.debug);
|
|
136
178
|
const derived = mapProviderErrorImpl(event, this.providerHealthConfig());
|
|
137
179
|
if (!derived) {
|
|
@@ -262,7 +304,9 @@ export class VirtualRouterEngine {
|
|
|
262
304
|
disabledModels: new Map(),
|
|
263
305
|
stopMessageText: undefined,
|
|
264
306
|
stopMessageMaxRepeats: undefined,
|
|
265
|
-
stopMessageUsed: undefined
|
|
307
|
+
stopMessageUsed: undefined,
|
|
308
|
+
stopMessageUpdatedAt: undefined,
|
|
309
|
+
stopMessageLastUsedAt: undefined
|
|
266
310
|
};
|
|
267
311
|
}
|
|
268
312
|
this.routingInstructionState.set(key, initial);
|
|
@@ -763,17 +807,28 @@ export class VirtualRouterEngine {
|
|
|
763
807
|
const noDisabledProviders = state.disabledProviders.size === 0;
|
|
764
808
|
const noDisabledKeys = state.disabledKeys.size === 0;
|
|
765
809
|
const noDisabledModels = state.disabledModels.size === 0;
|
|
766
|
-
|
|
810
|
+
const noStopMessage = (!state.stopMessageText || !state.stopMessageText.trim()) &&
|
|
811
|
+
(typeof state.stopMessageMaxRepeats !== 'number' || !Number.isFinite(state.stopMessageMaxRepeats)) &&
|
|
812
|
+
(typeof state.stopMessageUsed !== 'number' || !Number.isFinite(state.stopMessageUsed)) &&
|
|
813
|
+
(typeof state.stopMessageUpdatedAt !== 'number' || !Number.isFinite(state.stopMessageUpdatedAt)) &&
|
|
814
|
+
(typeof state.stopMessageLastUsedAt !== 'number' || !Number.isFinite(state.stopMessageLastUsedAt));
|
|
815
|
+
return (noForced &&
|
|
816
|
+
noSticky &&
|
|
817
|
+
noAllowed &&
|
|
818
|
+
noDisabledProviders &&
|
|
819
|
+
noDisabledKeys &&
|
|
820
|
+
noDisabledModels &&
|
|
821
|
+
noStopMessage);
|
|
767
822
|
}
|
|
768
823
|
persistRoutingInstructionState(key, state) {
|
|
769
824
|
if (!key || (!key.startsWith('session:') && !key.startsWith('conversation:'))) {
|
|
770
825
|
return;
|
|
771
826
|
}
|
|
772
827
|
if (this.isRoutingStateEmpty(state)) {
|
|
773
|
-
|
|
828
|
+
this.routingStateStore.saveAsync(key, null);
|
|
774
829
|
return;
|
|
775
830
|
}
|
|
776
|
-
|
|
831
|
+
this.routingStateStore.saveAsync(key, state);
|
|
777
832
|
}
|
|
778
833
|
markProviderCooldown(providerKey, cooldownMs) {
|
|
779
834
|
if (!providerKey) {
|
|
@@ -784,6 +839,15 @@ export class VirtualRouterEngine {
|
|
|
784
839
|
return;
|
|
785
840
|
}
|
|
786
841
|
this.providerCooldowns.set(providerKey, Date.now() + ttl);
|
|
842
|
+
this.persistHealthSnapshot();
|
|
843
|
+
}
|
|
844
|
+
clearProviderCooldown(providerKey) {
|
|
845
|
+
if (!providerKey) {
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
if (this.providerCooldowns.delete(providerKey)) {
|
|
849
|
+
this.persistHealthSnapshot();
|
|
850
|
+
}
|
|
787
851
|
}
|
|
788
852
|
isProviderCoolingDown(providerKey) {
|
|
789
853
|
if (!providerKey) {
|
|
@@ -799,4 +863,88 @@ export class VirtualRouterEngine {
|
|
|
799
863
|
}
|
|
800
864
|
return true;
|
|
801
865
|
}
|
|
866
|
+
restoreHealthFromStore() {
|
|
867
|
+
if (!this.healthStore || typeof this.healthStore.loadInitialSnapshot !== 'function') {
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
let snapshot = null;
|
|
871
|
+
try {
|
|
872
|
+
snapshot = this.healthStore.loadInitialSnapshot();
|
|
873
|
+
}
|
|
874
|
+
catch {
|
|
875
|
+
snapshot = null;
|
|
876
|
+
}
|
|
877
|
+
if (!snapshot) {
|
|
878
|
+
return;
|
|
879
|
+
}
|
|
880
|
+
const now = Date.now();
|
|
881
|
+
const providerKeys = new Set();
|
|
882
|
+
for (const pools of Object.values(this.routing)) {
|
|
883
|
+
for (const pool of pools) {
|
|
884
|
+
for (const key of pool.targets) {
|
|
885
|
+
if (typeof key === 'string' && key) {
|
|
886
|
+
providerKeys.add(key);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
const byKey = new Map();
|
|
892
|
+
for (const entry of snapshot.cooldowns || []) {
|
|
893
|
+
if (!entry || !entry.providerKey) {
|
|
894
|
+
continue;
|
|
895
|
+
}
|
|
896
|
+
if (!providerKeys.has(entry.providerKey)) {
|
|
897
|
+
continue;
|
|
898
|
+
}
|
|
899
|
+
if (!Number.isFinite(entry.cooldownExpiresAt) || entry.cooldownExpiresAt <= now) {
|
|
900
|
+
continue;
|
|
901
|
+
}
|
|
902
|
+
byKey.set(entry.providerKey, entry);
|
|
903
|
+
this.providerCooldowns.set(entry.providerKey, entry.cooldownExpiresAt);
|
|
904
|
+
}
|
|
905
|
+
for (const state of snapshot.providers || []) {
|
|
906
|
+
if (!state || !state.providerKey) {
|
|
907
|
+
continue;
|
|
908
|
+
}
|
|
909
|
+
if (!providerKeys.has(state.providerKey)) {
|
|
910
|
+
continue;
|
|
911
|
+
}
|
|
912
|
+
if (state.cooldownExpiresAt && state.cooldownExpiresAt > now) {
|
|
913
|
+
const ttl = state.cooldownExpiresAt - now;
|
|
914
|
+
if (ttl > 0) {
|
|
915
|
+
this.healthManager.tripProvider(state.providerKey, state.reason, ttl);
|
|
916
|
+
if (!byKey.has(state.providerKey)) {
|
|
917
|
+
this.providerCooldowns.set(state.providerKey, state.cooldownExpiresAt);
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
buildHealthSnapshot() {
|
|
924
|
+
const providers = this.healthManager.getSnapshot();
|
|
925
|
+
const cooldowns = [];
|
|
926
|
+
const now = Date.now();
|
|
927
|
+
for (const [providerKey, expiry] of this.providerCooldowns.entries()) {
|
|
928
|
+
if (!expiry || expiry <= now) {
|
|
929
|
+
continue;
|
|
930
|
+
}
|
|
931
|
+
cooldowns.push({
|
|
932
|
+
providerKey,
|
|
933
|
+
cooldownExpiresAt: expiry
|
|
934
|
+
});
|
|
935
|
+
}
|
|
936
|
+
return { providers, cooldowns };
|
|
937
|
+
}
|
|
938
|
+
persistHealthSnapshot() {
|
|
939
|
+
if (!this.healthStore || typeof this.healthStore.persistSnapshot !== 'function') {
|
|
940
|
+
return;
|
|
941
|
+
}
|
|
942
|
+
try {
|
|
943
|
+
const snapshot = this.buildHealthSnapshot();
|
|
944
|
+
this.healthStore.persistSnapshot(snapshot);
|
|
945
|
+
}
|
|
946
|
+
catch {
|
|
947
|
+
// 持久化失败不影响路由主流程
|
|
948
|
+
}
|
|
949
|
+
}
|
|
802
950
|
}
|
|
@@ -31,6 +31,8 @@ export interface RoutingInstructionState {
|
|
|
31
31
|
stopMessageText?: string;
|
|
32
32
|
stopMessageMaxRepeats?: number;
|
|
33
33
|
stopMessageUsed?: number;
|
|
34
|
+
stopMessageUpdatedAt?: number;
|
|
35
|
+
stopMessageLastUsedAt?: number;
|
|
34
36
|
}
|
|
35
37
|
export declare function parseRoutingInstructions(messages: StandardizedMessage[]): RoutingInstruction[];
|
|
36
38
|
export declare function applyRoutingInstructions(instructions: RoutingInstruction[], currentState: RoutingInstructionState): RoutingInstructionState;
|
|
@@ -275,7 +275,9 @@ export function applyRoutingInstructions(instructions, currentState) {
|
|
|
275
275
|
disabledModels: new Map(Array.from(currentState.disabledModels.entries()).map(([k, v]) => [k, new Set(v)])),
|
|
276
276
|
stopMessageText: currentState.stopMessageText,
|
|
277
277
|
stopMessageMaxRepeats: currentState.stopMessageMaxRepeats,
|
|
278
|
-
stopMessageUsed: currentState.stopMessageUsed
|
|
278
|
+
stopMessageUsed: currentState.stopMessageUsed,
|
|
279
|
+
stopMessageUpdatedAt: currentState.stopMessageUpdatedAt,
|
|
280
|
+
stopMessageLastUsedAt: currentState.stopMessageLastUsedAt
|
|
279
281
|
};
|
|
280
282
|
let allowReset = false;
|
|
281
283
|
let disableReset = false;
|
|
@@ -399,6 +401,8 @@ export function applyRoutingInstructions(instructions, currentState) {
|
|
|
399
401
|
newState.stopMessageText = text;
|
|
400
402
|
newState.stopMessageMaxRepeats = maxRepeats;
|
|
401
403
|
newState.stopMessageUsed = 0;
|
|
404
|
+
newState.stopMessageUpdatedAt = Date.now();
|
|
405
|
+
newState.stopMessageLastUsedAt = undefined;
|
|
402
406
|
}
|
|
403
407
|
break;
|
|
404
408
|
}
|
|
@@ -406,6 +410,8 @@ export function applyRoutingInstructions(instructions, currentState) {
|
|
|
406
410
|
newState.stopMessageText = undefined;
|
|
407
411
|
newState.stopMessageMaxRepeats = undefined;
|
|
408
412
|
newState.stopMessageUsed = undefined;
|
|
413
|
+
newState.stopMessageUpdatedAt = undefined;
|
|
414
|
+
newState.stopMessageLastUsedAt = undefined;
|
|
409
415
|
break;
|
|
410
416
|
}
|
|
411
417
|
}
|
|
@@ -455,6 +461,12 @@ export function serializeRoutingInstructionState(state) {
|
|
|
455
461
|
: {}),
|
|
456
462
|
...(typeof state.stopMessageUsed === 'number' && Number.isFinite(state.stopMessageUsed)
|
|
457
463
|
? { stopMessageUsed: state.stopMessageUsed }
|
|
464
|
+
: {}),
|
|
465
|
+
...(typeof state.stopMessageUpdatedAt === 'number' && Number.isFinite(state.stopMessageUpdatedAt)
|
|
466
|
+
? { stopMessageUpdatedAt: state.stopMessageUpdatedAt }
|
|
467
|
+
: {}),
|
|
468
|
+
...(typeof state.stopMessageLastUsedAt === 'number' && Number.isFinite(state.stopMessageLastUsedAt)
|
|
469
|
+
? { stopMessageLastUsedAt: state.stopMessageLastUsedAt }
|
|
458
470
|
: {})
|
|
459
471
|
};
|
|
460
472
|
}
|
|
@@ -505,5 +517,11 @@ export function deserializeRoutingInstructionState(data) {
|
|
|
505
517
|
if (typeof data.stopMessageUsed === 'number' && Number.isFinite(data.stopMessageUsed)) {
|
|
506
518
|
state.stopMessageUsed = Math.max(0, Math.floor(data.stopMessageUsed));
|
|
507
519
|
}
|
|
520
|
+
if (typeof data.stopMessageUpdatedAt === 'number' && Number.isFinite(data.stopMessageUpdatedAt)) {
|
|
521
|
+
state.stopMessageUpdatedAt = data.stopMessageUpdatedAt;
|
|
522
|
+
}
|
|
523
|
+
if (typeof data.stopMessageLastUsedAt === 'number' && Number.isFinite(data.stopMessageLastUsedAt)) {
|
|
524
|
+
state.stopMessageLastUsedAt = data.stopMessageLastUsedAt;
|
|
525
|
+
}
|
|
508
526
|
return state;
|
|
509
527
|
}
|