@jsonstudio/llms 0.6.567 → 0.6.586
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/gemini-openai-codec.js +33 -4
- package/dist/conversion/codecs/openai-openai-codec.js +2 -1
- package/dist/conversion/codecs/responses-openai-codec.js +3 -2
- package/dist/conversion/compat/actions/glm-history-image-trim.d.ts +2 -0
- package/dist/conversion/compat/actions/glm-history-image-trim.js +88 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +6 -2
- package/dist/conversion/hub/pipeline/hub-pipeline.js +72 -81
- package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/index.js +0 -34
- package/dist/conversion/hub/process/chat-process.js +68 -24
- package/dist/conversion/hub/response/provider-response.js +0 -8
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +22 -3
- package/dist/conversion/hub/semantic-mappers/responses-mapper.js +267 -14
- package/dist/conversion/hub/types/chat-envelope.d.ts +1 -0
- package/dist/conversion/responses/responses-openai-bridge.d.ts +3 -2
- package/dist/conversion/responses/responses-openai-bridge.js +1 -13
- package/dist/conversion/shared/anthropic-message-utils.js +54 -0
- package/dist/conversion/shared/args-mapping.js +11 -3
- package/dist/conversion/shared/responses-output-builder.js +42 -21
- package/dist/conversion/shared/streaming-text-extractor.d.ts +25 -0
- package/dist/conversion/shared/streaming-text-extractor.js +31 -38
- package/dist/conversion/shared/text-markup-normalizer.d.ts +20 -0
- package/dist/conversion/shared/text-markup-normalizer.js +118 -31
- package/dist/conversion/shared/tool-filter-pipeline.js +56 -30
- package/dist/conversion/shared/tool-harvester.js +43 -12
- package/dist/conversion/shared/tool-mapping.d.ts +1 -0
- package/dist/conversion/shared/tool-mapping.js +33 -19
- package/dist/filters/index.d.ts +1 -0
- package/dist/filters/index.js +1 -0
- package/dist/filters/special/request-tools-normalize.js +14 -4
- package/dist/filters/special/response-apply-patch-toon-decode.d.ts +23 -0
- package/dist/filters/special/response-apply-patch-toon-decode.js +117 -0
- package/dist/filters/special/response-tool-arguments-toon-decode.d.ts +10 -0
- package/dist/filters/special/response-tool-arguments-toon-decode.js +154 -26
- package/dist/guidance/index.js +71 -42
- package/dist/router/virtual-router/bootstrap.js +10 -5
- package/dist/router/virtual-router/classifier.js +16 -7
- package/dist/router/virtual-router/engine-health.d.ts +11 -0
- package/dist/router/virtual-router/engine-health.js +217 -4
- package/dist/router/virtual-router/engine-logging.d.ts +2 -1
- package/dist/router/virtual-router/engine-logging.js +35 -3
- package/dist/router/virtual-router/engine.d.ts +17 -1
- package/dist/router/virtual-router/engine.js +184 -6
- package/dist/router/virtual-router/routing-instructions.d.ts +2 -0
- package/dist/router/virtual-router/routing-instructions.js +19 -1
- package/dist/router/virtual-router/tool-signals.d.ts +2 -1
- package/dist/router/virtual-router/tool-signals.js +324 -119
- package/dist/router/virtual-router/types.d.ts +31 -1
- package/dist/router/virtual-router/types.js +2 -2
- package/dist/servertool/engine.js +3 -0
- package/dist/servertool/handlers/iflow-model-error-retry.d.ts +1 -0
- package/dist/servertool/handlers/iflow-model-error-retry.js +93 -0
- package/dist/servertool/handlers/stop-message-auto.js +61 -4
- package/dist/servertool/server-side-tools.d.ts +1 -0
- package/dist/servertool/server-side-tools.js +27 -0
- package/dist/sse/json-to-sse/event-generators/responses.js +9 -2
- package/dist/sse/sse-to-json/builders/anthropic-response-builder.js +23 -3
- package/dist/tools/apply-patch-structured.d.ts +20 -0
- package/dist/tools/apply-patch-structured.js +240 -0
- package/dist/tools/tool-description-utils.d.ts +5 -0
- package/dist/tools/tool-description-utils.js +50 -0
- package/dist/tools/tool-registry.js +11 -193
- package/package.json +1 -1
|
@@ -13,18 +13,21 @@ export class RoutingClassifier {
|
|
|
13
13
|
const lastToolCategory = features.lastAssistantToolCategory;
|
|
14
14
|
const reachedLongContext = features.estimatedTokens >= (this.config.longContextThresholdTokens ?? DEFAULT_LONG_CONTEXT_THRESHOLD);
|
|
15
15
|
const latestMessageFromUser = features.latestMessageFromUser === true;
|
|
16
|
-
const codingContinuation = lastToolCategory === 'write';
|
|
17
16
|
const thinkingContinuation = lastToolCategory === 'read';
|
|
17
|
+
const thinkingFromUser = latestMessageFromUser;
|
|
18
|
+
const thinkingFromRead = !thinkingFromUser && thinkingContinuation;
|
|
19
|
+
const codingContinuation = lastToolCategory === 'write';
|
|
18
20
|
const searchContinuation = lastToolCategory === 'search';
|
|
19
21
|
const toolsContinuation = lastToolCategory === 'other';
|
|
22
|
+
const hasToolActivity = features.hasTools || features.hasToolCallResponses;
|
|
20
23
|
const evaluationMap = {
|
|
21
24
|
vision: {
|
|
22
25
|
triggered: features.hasImageAttachment,
|
|
23
26
|
reason: 'vision:image-detected'
|
|
24
27
|
},
|
|
25
28
|
thinking: {
|
|
26
|
-
triggered:
|
|
27
|
-
reason: 'thinking:user-input'
|
|
29
|
+
triggered: thinkingFromUser || thinkingFromRead,
|
|
30
|
+
reason: thinkingFromUser ? 'thinking:user-input' : 'thinking:last-tool-read'
|
|
28
31
|
},
|
|
29
32
|
longcontext: {
|
|
30
33
|
triggered: reachedLongContext,
|
|
@@ -34,16 +37,22 @@ export class RoutingClassifier {
|
|
|
34
37
|
triggered: codingContinuation,
|
|
35
38
|
reason: 'coding:last-tool-write'
|
|
36
39
|
},
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
web_search: {
|
|
41
|
+
// web_search 路由不再基于上一轮工具分类或本轮是否声明 web_search 工具自动触发,
|
|
42
|
+
// 仅保留为显式路由指令/未来扩展的占位,默认不命中。
|
|
43
|
+
triggered: false,
|
|
44
|
+
reason: 'web_search:disabled'
|
|
40
45
|
},
|
|
41
46
|
search: {
|
|
47
|
+
// search 路由:仅在上一轮 assistant 使用 search 类工具时继续命中,
|
|
48
|
+
// 不因本轮是否声明 web_search 工具而改变路由。
|
|
42
49
|
triggered: searchContinuation,
|
|
43
50
|
reason: 'search:last-tool-search'
|
|
44
51
|
},
|
|
45
52
|
tools: {
|
|
46
|
-
|
|
53
|
+
// tools 路由:通用工具分支,包括首次声明的 web/search 工具。
|
|
54
|
+
// 若上一轮已明确归类为 search,则优先命中 search 路由,tools 仅作为兜底。
|
|
55
|
+
triggered: toolsContinuation || (!searchContinuation && hasToolActivity),
|
|
47
56
|
reason: toolsContinuation ? 'tools:last-tool-other' : 'tools:tool-request-detected'
|
|
48
57
|
},
|
|
49
58
|
background: {
|
|
@@ -4,8 +4,19 @@ import type { ProviderErrorEvent, ProviderFailureEvent, ProviderHealthConfig } f
|
|
|
4
4
|
type DebugLike = {
|
|
5
5
|
log?: (...args: unknown[]) => void;
|
|
6
6
|
} | Console | undefined;
|
|
7
|
+
export declare function resetRateLimitBackoffForProvider(providerKey: string): void;
|
|
7
8
|
export declare function handleProviderFailureImpl(event: ProviderFailureEvent, healthManager: ProviderHealthManager, healthConfig: Required<ProviderHealthConfig>, markProviderCooldown: (providerKey: string, cooldownMs: number | undefined) => void): void;
|
|
8
9
|
export declare function mapProviderErrorImpl(event: ProviderErrorEvent, healthConfig: Required<ProviderHealthConfig>): ProviderFailureEvent | null;
|
|
9
10
|
export declare function applySeriesCooldownImpl(event: ProviderErrorEvent, providerRegistry: ProviderRegistry, healthManager: ProviderHealthManager, markProviderCooldown: (providerKey: string, cooldownMs: number | undefined) => void, debug?: DebugLike): void;
|
|
11
|
+
/**
|
|
12
|
+
* 处理来自 Host 侧的配额恢复事件:
|
|
13
|
+
* - 清除指定 providerKey 在健康管理器中的熔断/冷却状态;
|
|
14
|
+
* - 清理对应的速率退避计数;
|
|
15
|
+
* - 调用调用方提供的 clearProviderCooldown 回调移除显式 cooldown TTL。
|
|
16
|
+
*
|
|
17
|
+
* 返回值表示是否已处理(true=已处理且后续应跳过常规错误映射逻辑)。
|
|
18
|
+
*/
|
|
19
|
+
export declare function applyQuotaRecoveryImpl(event: ProviderErrorEvent, healthManager: ProviderHealthManager, clearProviderCooldown: (providerKey: string) => void, debug?: DebugLike): boolean;
|
|
20
|
+
export declare function applyQuotaDepletedImpl(event: ProviderErrorEvent, healthManager: ProviderHealthManager, markProviderCooldown: (providerKey: string, cooldownMs: number | undefined) => void, debug?: DebugLike): boolean;
|
|
10
21
|
export declare function deriveReason(code: string, stage: string, statusCode?: number): string;
|
|
11
22
|
export {};
|
|
@@ -1,4 +1,105 @@
|
|
|
1
1
|
const SERIES_COOLDOWN_DETAIL_KEY = 'virtualRouterSeriesCooldown';
|
|
2
|
+
const QUOTA_RECOVERY_DETAIL_KEY = 'virtualRouterQuotaRecovery';
|
|
3
|
+
const QUOTA_DEPLETED_DETAIL_KEY = 'virtualRouterQuotaDepleted';
|
|
4
|
+
function parseDurationToMs(value) {
|
|
5
|
+
if (!value || typeof value !== 'string') {
|
|
6
|
+
return null;
|
|
7
|
+
}
|
|
8
|
+
const pattern = /(\d+(?:\.\d+)?)(ms|s|m|h)/gi;
|
|
9
|
+
let totalMs = 0;
|
|
10
|
+
let matched = false;
|
|
11
|
+
let match;
|
|
12
|
+
while ((match = pattern.exec(value)) !== null) {
|
|
13
|
+
matched = true;
|
|
14
|
+
const amount = Number.parseFloat(match[1]);
|
|
15
|
+
if (!Number.isFinite(amount)) {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
const unit = match[2].toLowerCase();
|
|
19
|
+
if (unit === 'ms') {
|
|
20
|
+
totalMs += amount;
|
|
21
|
+
}
|
|
22
|
+
else if (unit === 'h') {
|
|
23
|
+
totalMs += amount * 3_600_000;
|
|
24
|
+
}
|
|
25
|
+
else if (unit === 'm') {
|
|
26
|
+
totalMs += amount * 60_000;
|
|
27
|
+
}
|
|
28
|
+
else if (unit === 's') {
|
|
29
|
+
totalMs += amount * 1_000;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (!matched) {
|
|
33
|
+
const seconds = Number.parseFloat(value);
|
|
34
|
+
if (Number.isFinite(seconds)) {
|
|
35
|
+
totalMs = seconds * 1_000;
|
|
36
|
+
matched = true;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (!matched || totalMs <= 0) {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
return Math.round(totalMs);
|
|
43
|
+
}
|
|
44
|
+
function readEnvSchedule(name, fallback) {
|
|
45
|
+
const raw = (process.env[name] || '').trim();
|
|
46
|
+
if (!raw) {
|
|
47
|
+
return fallback;
|
|
48
|
+
}
|
|
49
|
+
const parts = raw.split(',').map((token) => token.trim()).filter(Boolean);
|
|
50
|
+
const parsed = [];
|
|
51
|
+
for (const part of parts) {
|
|
52
|
+
const ms = parseDurationToMs(part);
|
|
53
|
+
if (ms && ms > 0) {
|
|
54
|
+
parsed.push(ms);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return parsed.length ? parsed : fallback;
|
|
58
|
+
}
|
|
59
|
+
function readEnvDuration(name, fallbackMs) {
|
|
60
|
+
const raw = (process.env[name] || '').trim();
|
|
61
|
+
if (!raw) {
|
|
62
|
+
return fallbackMs;
|
|
63
|
+
}
|
|
64
|
+
const ms = parseDurationToMs(raw);
|
|
65
|
+
return ms && ms > 0 ? ms : fallbackMs;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* 对没有 quotaResetDelay 的 429 错误,在 VirtualRouter 内部维护一个简单的阶梯退避策略:
|
|
69
|
+
* - 默认:第 1 次 5 分钟,第 2 次 1 小时,第 3 次 6 小时,第 4 次及以上 24 小时封顶;
|
|
70
|
+
* - 可通过环境变量 ROUTECODEX_RL_SCHEDULE / RCC_RL_SCHEDULE 调整(例如 "5m,1h,6h,24h")。
|
|
71
|
+
*
|
|
72
|
+
* 这里的“次数”针对 providerKey 计数,并带有简单的时间窗口:若距离上次 429 超过 24 小时,则重置计数。
|
|
73
|
+
* 该状态仅用于路由决策,不反映在 healthConfig 上,使 Host 与 VirtualRouter 对 429 处理职责清晰分层。
|
|
74
|
+
*/
|
|
75
|
+
const NO_QUOTA_RATE_LIMIT_SCHEDULE_MS = readEnvSchedule('ROUTECODEX_RL_SCHEDULE', [
|
|
76
|
+
5 * 60_000,
|
|
77
|
+
60 * 60_000,
|
|
78
|
+
6 * 60 * 60_000,
|
|
79
|
+
24 * 60 * 60_000
|
|
80
|
+
]);
|
|
81
|
+
const rateLimitBackoffByProvider = new Map();
|
|
82
|
+
const RATE_LIMIT_RESET_WINDOW_MS = readEnvDuration('ROUTECODEX_RL_RESET_WINDOW', 24 * 60 * 60_000);
|
|
83
|
+
function computeRateLimitCooldownMsForProvider(providerKey, now) {
|
|
84
|
+
const prev = rateLimitBackoffByProvider.get(providerKey);
|
|
85
|
+
let nextCount = 1;
|
|
86
|
+
if (prev) {
|
|
87
|
+
const elapsed = now - prev.lastAt;
|
|
88
|
+
if (Number.isFinite(elapsed) && elapsed >= 0 && elapsed < RATE_LIMIT_RESET_WINDOW_MS) {
|
|
89
|
+
nextCount = prev.count + 1;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const idx = Math.min(nextCount - 1, NO_QUOTA_RATE_LIMIT_SCHEDULE_MS.length - 1);
|
|
93
|
+
const ttl = NO_QUOTA_RATE_LIMIT_SCHEDULE_MS[idx];
|
|
94
|
+
rateLimitBackoffByProvider.set(providerKey, { count: nextCount, lastAt: now });
|
|
95
|
+
return ttl;
|
|
96
|
+
}
|
|
97
|
+
export function resetRateLimitBackoffForProvider(providerKey) {
|
|
98
|
+
if (!providerKey) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
rateLimitBackoffByProvider.delete(providerKey);
|
|
102
|
+
}
|
|
2
103
|
export function handleProviderFailureImpl(event, healthManager, healthConfig, markProviderCooldown) {
|
|
3
104
|
if (!event || !event.providerKey) {
|
|
4
105
|
return;
|
|
@@ -10,9 +111,17 @@ export function handleProviderFailureImpl(event, healthManager, healthConfig, ma
|
|
|
10
111
|
healthManager.tripProvider(event.providerKey, event.reason, event.cooldownOverrideMs);
|
|
11
112
|
}
|
|
12
113
|
else if (event.reason === 'rate_limit' && event.statusCode === 429) {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
114
|
+
// 对非致命的 429 错误:
|
|
115
|
+
// - 若 ProviderErrorEvent 已携带显式 cooldownOverrideMs(例如来自 quotaResetDelay),则直接使用;
|
|
116
|
+
// - 否则针对该 providerKey 启用阶梯退避策略(5min → 1h → 6h → 24h),
|
|
117
|
+
// 在冷却期内从路由池中移除该 alias,避免持续命中上游。
|
|
118
|
+
const providerKey = event.providerKey;
|
|
119
|
+
let ttl = event.cooldownOverrideMs;
|
|
120
|
+
if (!ttl || !Number.isFinite(ttl) || ttl <= 0) {
|
|
121
|
+
ttl = computeRateLimitCooldownMsForProvider(providerKey, Date.now());
|
|
122
|
+
}
|
|
123
|
+
healthManager.cooldownProvider(providerKey, event.reason, ttl);
|
|
124
|
+
markProviderCooldown(providerKey, ttl);
|
|
16
125
|
}
|
|
17
126
|
else {
|
|
18
127
|
healthManager.recordFailure(event.providerKey, event.reason);
|
|
@@ -35,10 +144,20 @@ export function mapProviderErrorImpl(event, healthConfig) {
|
|
|
35
144
|
const code = event.code?.toUpperCase() ?? 'ERR_UNKNOWN';
|
|
36
145
|
const stage = event.stage?.toLowerCase() ?? 'unknown';
|
|
37
146
|
const recoverable = event.recoverable === true;
|
|
147
|
+
const providerFamily = runtime.providerFamily &&
|
|
148
|
+
typeof runtime.providerFamily === 'string'
|
|
149
|
+
? runtime.providerFamily
|
|
150
|
+
: undefined;
|
|
151
|
+
const providerId = runtime.providerId &&
|
|
152
|
+
typeof runtime.providerId === 'string'
|
|
153
|
+
? runtime.providerId
|
|
154
|
+
: undefined;
|
|
155
|
+
const providerTag = (providerFamily || providerId || '').toLowerCase();
|
|
156
|
+
const isOAuthAuth406 = statusCode === 406 && (providerTag === 'iflow' || providerTag === 'qwen');
|
|
38
157
|
let fatal = !recoverable;
|
|
39
158
|
let reason = deriveReason(code, stage, statusCode);
|
|
40
159
|
let cooldownOverrideMs;
|
|
41
|
-
if (statusCode === 401 || statusCode === 402 || statusCode === 403 || code.includes('AUTH')) {
|
|
160
|
+
if (statusCode === 401 || statusCode === 402 || statusCode === 403 || code.includes('AUTH') || isOAuthAuth406) {
|
|
42
161
|
fatal = true;
|
|
43
162
|
cooldownOverrideMs = Math.max(10 * 60_000, healthConfig.fatalCooldownMs ?? 10 * 60_000);
|
|
44
163
|
reason = 'auth';
|
|
@@ -117,6 +236,100 @@ export function applySeriesCooldownImpl(event, providerRegistry, healthManager,
|
|
|
117
236
|
});
|
|
118
237
|
}
|
|
119
238
|
}
|
|
239
|
+
function extractQuotaRecoveryDetail(event) {
|
|
240
|
+
if (!event || !event.details || typeof event.details !== 'object') {
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
const raw = event.details[QUOTA_RECOVERY_DETAIL_KEY];
|
|
244
|
+
if (!raw || typeof raw !== 'object') {
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
const record = raw;
|
|
248
|
+
const providerKeyRaw = record.providerKey;
|
|
249
|
+
if (typeof providerKeyRaw !== 'string' || !providerKeyRaw.trim()) {
|
|
250
|
+
return null;
|
|
251
|
+
}
|
|
252
|
+
const reason = typeof record.reason === 'string' && record.reason.trim()
|
|
253
|
+
? record.reason.trim()
|
|
254
|
+
: undefined;
|
|
255
|
+
return {
|
|
256
|
+
providerKey: providerKeyRaw.trim(),
|
|
257
|
+
reason
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* 处理来自 Host 侧的配额恢复事件:
|
|
262
|
+
* - 清除指定 providerKey 在健康管理器中的熔断/冷却状态;
|
|
263
|
+
* - 清理对应的速率退避计数;
|
|
264
|
+
* - 调用调用方提供的 clearProviderCooldown 回调移除显式 cooldown TTL。
|
|
265
|
+
*
|
|
266
|
+
* 返回值表示是否已处理(true=已处理且后续应跳过常规错误映射逻辑)。
|
|
267
|
+
*/
|
|
268
|
+
export function applyQuotaRecoveryImpl(event, healthManager, clearProviderCooldown, debug) {
|
|
269
|
+
const detail = extractQuotaRecoveryDetail(event);
|
|
270
|
+
if (!detail) {
|
|
271
|
+
return false;
|
|
272
|
+
}
|
|
273
|
+
const providerKey = detail.providerKey;
|
|
274
|
+
try {
|
|
275
|
+
healthManager.recordSuccess(providerKey);
|
|
276
|
+
resetRateLimitBackoffForProvider(providerKey);
|
|
277
|
+
clearProviderCooldown(providerKey);
|
|
278
|
+
debug?.log?.('[virtual-router] quota recovery', {
|
|
279
|
+
providerKey,
|
|
280
|
+
reason: detail.reason
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
catch {
|
|
284
|
+
// 恢复失败不得影响主路由流程
|
|
285
|
+
}
|
|
286
|
+
return true;
|
|
287
|
+
}
|
|
288
|
+
function extractQuotaDepletedDetail(event) {
|
|
289
|
+
if (!event || !event.details || typeof event.details !== 'object') {
|
|
290
|
+
return null;
|
|
291
|
+
}
|
|
292
|
+
const raw = event.details[QUOTA_DEPLETED_DETAIL_KEY];
|
|
293
|
+
if (!raw || typeof raw !== 'object') {
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
const record = raw;
|
|
297
|
+
const providerKeyRaw = record.providerKey;
|
|
298
|
+
if (typeof providerKeyRaw !== 'string' || !providerKeyRaw.trim()) {
|
|
299
|
+
return null;
|
|
300
|
+
}
|
|
301
|
+
const cooldownMs = typeof record.cooldownMs === 'number' && Number.isFinite(record.cooldownMs) && record.cooldownMs > 0
|
|
302
|
+
? record.cooldownMs
|
|
303
|
+
: undefined;
|
|
304
|
+
const reason = typeof record.reason === 'string' && record.reason.trim()
|
|
305
|
+
? record.reason.trim()
|
|
306
|
+
: undefined;
|
|
307
|
+
return {
|
|
308
|
+
providerKey: providerKeyRaw.trim(),
|
|
309
|
+
cooldownMs,
|
|
310
|
+
reason
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
export function applyQuotaDepletedImpl(event, healthManager, markProviderCooldown, debug) {
|
|
314
|
+
const detail = extractQuotaDepletedDetail(event);
|
|
315
|
+
if (!detail) {
|
|
316
|
+
return false;
|
|
317
|
+
}
|
|
318
|
+
const ttl = detail.cooldownMs;
|
|
319
|
+
try {
|
|
320
|
+
healthManager.cooldownProvider(detail.providerKey, 'rate_limit', ttl);
|
|
321
|
+
markProviderCooldown(detail.providerKey, ttl);
|
|
322
|
+
debug?.log?.('[virtual-router] quota depleted', {
|
|
323
|
+
providerKey: detail.providerKey,
|
|
324
|
+
cooldownMs: ttl,
|
|
325
|
+
reason: detail.reason
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
catch {
|
|
329
|
+
// ignore failures
|
|
330
|
+
}
|
|
331
|
+
return true;
|
|
332
|
+
}
|
|
120
333
|
function resolveSeriesCooldownTargets(detail, event, providerRegistry) {
|
|
121
334
|
const candidates = new Set();
|
|
122
335
|
const push = (key) => {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { type ClassificationResult, type RoutingFeatures, type RoutingInstructionMode, type VirtualRouterContextRoutingConfig } from './types.js';
|
|
2
2
|
import { ProviderRegistry } from './provider-registry.js';
|
|
3
|
+
import type { RoutingInstructionState } from './routing-instructions.js';
|
|
3
4
|
type LoggingDeps = {
|
|
4
5
|
providerRegistry: ProviderRegistry;
|
|
5
6
|
contextRouting: VirtualRouterContextRoutingConfig | undefined;
|
|
@@ -15,5 +16,5 @@ export declare function describeTargetProvider(providerKey: string, fallbackMode
|
|
|
15
16
|
resolvedModel?: string;
|
|
16
17
|
};
|
|
17
18
|
export declare function buildHitReason(routeUsed: string, providerKey: string, classification: ClassificationResult, features: RoutingFeatures, mode: RoutingInstructionMode | undefined, deps: LoggingDeps): string;
|
|
18
|
-
export declare function formatVirtualRouterHit(routeName: string, poolId: string | undefined, providerKey: string, modelId?: string, hitReason?: string, stickyScope?: string): string;
|
|
19
|
+
export declare function formatVirtualRouterHit(routeName: string, poolId: string | undefined, providerKey: string, modelId?: string, hitReason?: string, stickyScope?: string, routingState?: RoutingInstructionState): string;
|
|
19
20
|
export {};
|
|
@@ -132,7 +132,7 @@ export function buildHitReason(routeUsed, providerKey, classification, features,
|
|
|
132
132
|
}
|
|
133
133
|
return base;
|
|
134
134
|
}
|
|
135
|
-
export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason, stickyScope) {
|
|
135
|
+
export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId, hitReason, stickyScope, routingState) {
|
|
136
136
|
try {
|
|
137
137
|
const now = new Date();
|
|
138
138
|
const hours = String(now.getHours()).padStart(2, '0');
|
|
@@ -144,6 +144,7 @@ export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId,
|
|
|
144
144
|
const timeColor = '\x1b[90m';
|
|
145
145
|
const stickyColor = '\x1b[33m';
|
|
146
146
|
const routeColor = resolveRouteColor(routeName);
|
|
147
|
+
const stopColor = '\x1b[38;5;214m';
|
|
147
148
|
const prefix = `${prefixColor}[virtual-router-hit]${reset}`;
|
|
148
149
|
const timeLabel = `${timeColor}${timestamp}${reset}`;
|
|
149
150
|
const { providerLabel, resolvedModel } = describeTargetProvider(providerKey, modelId);
|
|
@@ -152,7 +153,29 @@ export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId,
|
|
|
152
153
|
const stickyText = formatStickyScope(stickyScope);
|
|
153
154
|
const stickyLabel = stickyText ? ` ${stickyColor}[sticky:${stickyText}]${reset}` : '';
|
|
154
155
|
const reasonLabel = hitReason ? ` reason=${hitReason}` : '';
|
|
155
|
-
|
|
156
|
+
let stopLabel = '';
|
|
157
|
+
if (routingState?.stopMessageText && typeof routingState.stopMessageMaxRepeats === 'number') {
|
|
158
|
+
const text = routingState.stopMessageText;
|
|
159
|
+
const safeText = text.length > 24 ? `${text.slice(0, 21)}…` : text;
|
|
160
|
+
const used = typeof routingState.stopMessageUsed === 'number' && Number.isFinite(routingState.stopMessageUsed)
|
|
161
|
+
? routingState.stopMessageUsed
|
|
162
|
+
: 0;
|
|
163
|
+
const updatedAt = typeof routingState.stopMessageUpdatedAt === 'number' && Number.isFinite(routingState.stopMessageUpdatedAt)
|
|
164
|
+
? routingState.stopMessageUpdatedAt
|
|
165
|
+
: undefined;
|
|
166
|
+
const lastUsedAt = typeof routingState.stopMessageLastUsedAt === 'number' && Number.isFinite(routingState.stopMessageLastUsedAt)
|
|
167
|
+
? routingState.stopMessageLastUsedAt
|
|
168
|
+
: undefined;
|
|
169
|
+
const parts = [`"${safeText}"`, `${used}/${Math.floor(routingState.stopMessageMaxRepeats)}`];
|
|
170
|
+
if (updatedAt) {
|
|
171
|
+
parts.push(`set=${new Date(updatedAt).toLocaleString(undefined, { hour12: false })}`);
|
|
172
|
+
}
|
|
173
|
+
if (lastUsedAt) {
|
|
174
|
+
parts.push(`last=${new Date(lastUsedAt).toLocaleString(undefined, { hour12: false })}`);
|
|
175
|
+
}
|
|
176
|
+
stopLabel = ` ${stopColor}[stopMessage:${parts.join(' ')}]${reset}`;
|
|
177
|
+
}
|
|
178
|
+
return `${prefix} ${timeLabel} ${routeColor}${targetLabel}${stickyLabel}${reasonLabel}${stopLabel}${reset}`;
|
|
156
179
|
}
|
|
157
180
|
catch {
|
|
158
181
|
const now = new Date();
|
|
@@ -160,6 +183,15 @@ export function formatVirtualRouterHit(routeName, poolId, providerKey, modelId,
|
|
|
160
183
|
const routeLabel = poolId ? `${routeName}/${poolId}` : routeName;
|
|
161
184
|
const stickyText = formatStickyScope(stickyScope);
|
|
162
185
|
const stickyLabel = stickyText ? ` [sticky:${stickyText}]` : '';
|
|
163
|
-
|
|
186
|
+
let stopLabel = '';
|
|
187
|
+
if (routingState?.stopMessageText && typeof routingState.stopMessageMaxRepeats === 'number') {
|
|
188
|
+
const text = routingState.stopMessageText;
|
|
189
|
+
const safeText = text.length > 24 ? `${text.slice(0, 21)}…` : text;
|
|
190
|
+
const used = typeof routingState.stopMessageUsed === 'number' && Number.isFinite(routingState.stopMessageUsed)
|
|
191
|
+
? routingState.stopMessageUsed
|
|
192
|
+
: 0;
|
|
193
|
+
stopLabel = ` [stopMessage:"${safeText}" ${used}/${Math.floor(routingState.stopMessageMaxRepeats)}]`;
|
|
194
|
+
}
|
|
195
|
+
return `[virtual-router-hit] ${timestamp} ${routeLabel} -> ${providerKey}${modelId ? '.' + modelId : ''}${stickyLabel}${hitReason ? ` reason=${hitReason}` : ''}${stopLabel}`;
|
|
164
196
|
}
|
|
165
197
|
}
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
-
import { type RoutingDecision, type RoutingDiagnostics, type RouterMetadataInput, type VirtualRouterConfig, type TargetMetadata, type ProviderFailureEvent, type ProviderErrorEvent } from './types.js';
|
|
1
|
+
import { type RoutingDecision, type RoutingDiagnostics, type RouterMetadataInput, type VirtualRouterConfig, type TargetMetadata, type ProviderFailureEvent, type ProviderErrorEvent, type VirtualRouterHealthStore } from './types.js';
|
|
2
2
|
import type { ProcessedRequest, StandardizedRequest } from '../../conversion/hub/types/standardized.js';
|
|
3
|
+
import { type RoutingInstructionState } from './routing-instructions.js';
|
|
4
|
+
interface RoutingInstructionStateStore {
|
|
5
|
+
loadSync(key: string): RoutingInstructionState | null;
|
|
6
|
+
saveAsync(key: string, state: RoutingInstructionState | null): void;
|
|
7
|
+
}
|
|
3
8
|
export declare class VirtualRouterEngine {
|
|
4
9
|
private routing;
|
|
5
10
|
private readonly providerRegistry;
|
|
@@ -14,7 +19,13 @@ export declare class VirtualRouterEngine {
|
|
|
14
19
|
private healthConfig;
|
|
15
20
|
private readonly statsCenter;
|
|
16
21
|
private webSearchForce;
|
|
22
|
+
private healthStore?;
|
|
23
|
+
private routingStateStore;
|
|
17
24
|
private routingInstructionState;
|
|
25
|
+
constructor(deps?: {
|
|
26
|
+
healthStore?: VirtualRouterHealthStore;
|
|
27
|
+
routingStateStore?: RoutingInstructionStateStore;
|
|
28
|
+
});
|
|
18
29
|
initialize(config: VirtualRouterConfig): void;
|
|
19
30
|
route(request: StandardizedRequest | ProcessedRequest, metadata: RouterMetadataInput): {
|
|
20
31
|
target: TargetMetadata;
|
|
@@ -82,5 +93,10 @@ export declare class VirtualRouterEngine {
|
|
|
82
93
|
private isRoutingStateEmpty;
|
|
83
94
|
private persistRoutingInstructionState;
|
|
84
95
|
private markProviderCooldown;
|
|
96
|
+
private clearProviderCooldown;
|
|
85
97
|
private isProviderCoolingDown;
|
|
98
|
+
private restoreHealthFromStore;
|
|
99
|
+
private buildHealthSnapshot;
|
|
100
|
+
private persistHealthSnapshot;
|
|
86
101
|
}
|
|
102
|
+
export {};
|