@jsonstudio/llms 0.6.633 → 0.6.749
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/codecs/anthropic-openai-codec.js +0 -5
- package/dist/conversion/codecs/openai-openai-codec.js +0 -6
- package/dist/conversion/codecs/responses-openai-codec.js +1 -7
- package/dist/conversion/hub/node-support.js +5 -4
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +14 -1
- package/dist/conversion/hub/pipeline/hub-pipeline.js +82 -18
- package/dist/conversion/hub/pipeline/session-identifiers.js +132 -2
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +23 -19
- package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +47 -0
- package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage1_tool_governance/index.js +4 -2
- package/dist/conversion/hub/process/chat-process.js +2 -0
- package/dist/conversion/hub/response/provider-response.js +6 -1
- package/dist/conversion/hub/snapshot-recorder.js +8 -1
- package/dist/conversion/pipeline/codecs/v2/shared/openai-chat-helpers.js +0 -7
- package/dist/conversion/responses/responses-openai-bridge.js +47 -7
- package/dist/conversion/shared/compaction-detect.d.ts +2 -0
- package/dist/conversion/shared/compaction-detect.js +53 -0
- package/dist/conversion/shared/errors.d.ts +1 -1
- package/dist/conversion/shared/reasoning-tool-normalizer.js +7 -0
- package/dist/conversion/shared/snapshot-hooks.d.ts +2 -0
- package/dist/conversion/shared/snapshot-hooks.js +180 -4
- package/dist/conversion/shared/snapshot-utils.d.ts +4 -0
- package/dist/conversion/shared/snapshot-utils.js +4 -0
- package/dist/conversion/shared/tool-filter-pipeline.js +3 -9
- package/dist/conversion/shared/tool-governor.d.ts +2 -0
- package/dist/conversion/shared/tool-governor.js +101 -13
- package/dist/conversion/shared/tool-harvester.js +42 -2
- package/dist/filters/index.d.ts +0 -2
- package/dist/filters/index.js +0 -2
- package/dist/filters/special/request-tools-normalize.d.ts +11 -0
- package/dist/filters/special/request-tools-normalize.js +13 -50
- package/dist/filters/special/response-apply-patch-toon-decode.js +403 -82
- package/dist/filters/special/response-tool-arguments-toon-decode.js +6 -75
- package/dist/filters/utils/snapshot-writer.js +42 -4
- package/dist/guidance/index.js +8 -2
- package/dist/router/virtual-router/bootstrap.js +68 -4
- package/dist/router/virtual-router/engine-health.js +0 -4
- package/dist/router/virtual-router/engine-selection.d.ts +8 -1
- package/dist/router/virtual-router/engine-selection.js +168 -9
- package/dist/router/virtual-router/engine.d.ts +6 -1
- package/dist/router/virtual-router/engine.js +263 -14
- package/dist/router/virtual-router/load-balancer.d.ts +18 -0
- package/dist/router/virtual-router/load-balancer.js +3 -2
- package/dist/router/virtual-router/routing-instructions.d.ts +6 -0
- package/dist/router/virtual-router/routing-instructions.js +18 -3
- package/dist/router/virtual-router/sticky-session-store.d.ts +1 -0
- package/dist/router/virtual-router/sticky-session-store.js +36 -0
- package/dist/router/virtual-router/types.d.ts +29 -0
- package/dist/servertool/engine.js +335 -9
- package/dist/servertool/handlers/compaction-detect.d.ts +1 -0
- package/dist/servertool/handlers/compaction-detect.js +1 -0
- package/dist/servertool/handlers/gemini-empty-reply-continue.js +29 -5
- package/dist/servertool/handlers/iflow-model-error-retry.js +17 -0
- package/dist/servertool/handlers/stop-message-auto.js +199 -19
- package/dist/servertool/server-side-tools.d.ts +0 -1
- package/dist/servertool/server-side-tools.js +0 -1
- package/dist/servertool/types.d.ts +1 -0
- package/dist/tools/apply-patch-structured.js +52 -15
- package/dist/tools/tool-registry.js +537 -15
- package/dist/utils/toon.d.ts +4 -0
- package/dist/utils/toon.js +75 -0
- package/package.json +4 -2
- package/dist/test-output/virtual-router/results.json +0 -1
- package/dist/test-output/virtual-router/summary.json +0 -12
|
@@ -9,7 +9,7 @@ import { getStatsCenter } from '../../telemetry/stats-center.js';
|
|
|
9
9
|
import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
|
|
10
10
|
import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
|
|
11
11
|
import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
|
|
12
|
-
import { selectProviderImpl } from './engine-selection.js';
|
|
12
|
+
import { selectDirectProviderModel, selectProviderImpl } from './engine-selection.js';
|
|
13
13
|
import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
|
|
14
14
|
export class VirtualRouterEngine {
|
|
15
15
|
routing = {};
|
|
@@ -32,6 +32,7 @@ export class VirtualRouterEngine {
|
|
|
32
32
|
saveAsync: saveRoutingInstructionStateAsync
|
|
33
33
|
};
|
|
34
34
|
routingInstructionState = new Map();
|
|
35
|
+
quotaView;
|
|
35
36
|
constructor(deps) {
|
|
36
37
|
if (deps?.healthStore) {
|
|
37
38
|
this.healthStore = deps.healthStore;
|
|
@@ -39,6 +40,28 @@ export class VirtualRouterEngine {
|
|
|
39
40
|
if (deps?.routingStateStore) {
|
|
40
41
|
this.routingStateStore = deps.routingStateStore;
|
|
41
42
|
}
|
|
43
|
+
if (deps?.quotaView) {
|
|
44
|
+
this.quotaView = deps.quotaView;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
parseDirectProviderModel(model) {
|
|
48
|
+
const raw = typeof model === 'string' ? model.trim() : '';
|
|
49
|
+
if (!raw) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
const firstDot = raw.indexOf('.');
|
|
53
|
+
if (firstDot <= 0 || firstDot === raw.length - 1) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
const providerId = raw.slice(0, firstDot).trim();
|
|
57
|
+
const modelId = raw.slice(firstDot + 1).trim();
|
|
58
|
+
if (!providerId || !modelId) {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
if (this.providerRegistry.listProviderKeys(providerId).length === 0) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
return { providerId, modelId };
|
|
42
65
|
}
|
|
43
66
|
initialize(config) {
|
|
44
67
|
this.validateConfig(config);
|
|
@@ -76,27 +99,182 @@ export class VirtualRouterEngine {
|
|
|
76
99
|
stickyTarget: undefined
|
|
77
100
|
};
|
|
78
101
|
}
|
|
79
|
-
const
|
|
102
|
+
const sessionScope = this.resolveSessionScope(metadata);
|
|
103
|
+
if (sessionScope) {
|
|
104
|
+
const sessionState = this.getRoutingInstructionState(sessionScope);
|
|
105
|
+
if (typeof sessionState.stopMessageText === 'string' ||
|
|
106
|
+
typeof sessionState.stopMessageMaxRepeats === 'number') {
|
|
107
|
+
routingState = {
|
|
108
|
+
...routingState,
|
|
109
|
+
stopMessageText: sessionState.stopMessageText,
|
|
110
|
+
stopMessageMaxRepeats: sessionState.stopMessageMaxRepeats,
|
|
111
|
+
stopMessageUsed: sessionState.stopMessageUsed,
|
|
112
|
+
stopMessageUpdatedAt: sessionState.stopMessageUpdatedAt,
|
|
113
|
+
stopMessageLastUsedAt: sessionState.stopMessageLastUsedAt
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
const parsedInstructions = parseRoutingInstructions(request.messages);
|
|
118
|
+
let instructions = parsedInstructions;
|
|
119
|
+
if (sessionScope && parsedInstructions.length > 0) {
|
|
120
|
+
const sessionState = this.getRoutingInstructionState(sessionScope);
|
|
121
|
+
const hasStopMessageClear = parsedInstructions.some((entry) => entry.type === 'stopMessageClear');
|
|
122
|
+
const stopMessageSets = parsedInstructions.filter((entry) => entry.type === 'stopMessageSet');
|
|
123
|
+
if (!hasStopMessageClear && stopMessageSets.length > 0) {
|
|
124
|
+
const sessionText = typeof sessionState.stopMessageText === 'string' ? sessionState.stopMessageText.trim() : '';
|
|
125
|
+
const sessionMax = typeof sessionState.stopMessageMaxRepeats === 'number' && Number.isFinite(sessionState.stopMessageMaxRepeats)
|
|
126
|
+
? Math.floor(sessionState.stopMessageMaxRepeats)
|
|
127
|
+
: undefined;
|
|
128
|
+
const allSame = stopMessageSets.every((entry) => {
|
|
129
|
+
const entryText = typeof entry.stopMessageText === 'string' ? entry.stopMessageText.trim() : '';
|
|
130
|
+
const entryMax = typeof entry.stopMessageMaxRepeats === 'number' && Number.isFinite(entry.stopMessageMaxRepeats)
|
|
131
|
+
? Math.floor(entry.stopMessageMaxRepeats)
|
|
132
|
+
: undefined;
|
|
133
|
+
return Boolean(entryText) && entryText === sessionText && entryMax === sessionMax;
|
|
134
|
+
});
|
|
135
|
+
if (allSame) {
|
|
136
|
+
instructions = parsedInstructions.filter((entry) => entry.type !== 'stopMessageSet');
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
if (parsedInstructions.length > 0) {
|
|
141
|
+
request.messages = cleanMessagesFromRoutingInstructions(request.messages);
|
|
142
|
+
}
|
|
80
143
|
if (instructions.length > 0) {
|
|
81
144
|
routingState = applyRoutingInstructions(instructions, routingState);
|
|
82
145
|
const effectiveKey = stickyKey || 'default';
|
|
83
146
|
this.routingInstructionState.set(effectiveKey, routingState);
|
|
84
|
-
request.messages = cleanMessagesFromRoutingInstructions(request.messages);
|
|
85
147
|
this.persistRoutingInstructionState(effectiveKey, routingState);
|
|
148
|
+
// 对 stopMessage 指令补充一份基于 session/conversation 的持久化状态,
|
|
149
|
+
// 便于 server-side 工具通过 session:*/conversation:* scope 读取到相同配置。
|
|
150
|
+
if (sessionScope) {
|
|
151
|
+
const hasStopMessageSet = instructions.some((entry) => entry.type === 'stopMessageSet');
|
|
152
|
+
const hasStopMessageClear = instructions.some((entry) => entry.type === 'stopMessageClear');
|
|
153
|
+
if (hasStopMessageSet || hasStopMessageClear) {
|
|
154
|
+
const sessionState = this.getRoutingInstructionState(sessionScope);
|
|
155
|
+
let nextSessionState = {
|
|
156
|
+
...sessionState
|
|
157
|
+
};
|
|
158
|
+
let shouldPersistSessionState = false;
|
|
159
|
+
if (hasStopMessageClear) {
|
|
160
|
+
nextSessionState.stopMessageText = undefined;
|
|
161
|
+
nextSessionState.stopMessageMaxRepeats = undefined;
|
|
162
|
+
nextSessionState.stopMessageUsed = undefined;
|
|
163
|
+
nextSessionState.stopMessageUpdatedAt = undefined;
|
|
164
|
+
nextSessionState.stopMessageLastUsedAt = undefined;
|
|
165
|
+
shouldPersistSessionState = true;
|
|
166
|
+
}
|
|
167
|
+
else if (hasStopMessageSet) {
|
|
168
|
+
const text = typeof routingState.stopMessageText === 'string' ? routingState.stopMessageText : '';
|
|
169
|
+
const maxRepeats = routingState.stopMessageMaxRepeats;
|
|
170
|
+
const sameText = typeof sessionState.stopMessageText === 'string' &&
|
|
171
|
+
sessionState.stopMessageText.trim() === text.trim();
|
|
172
|
+
const sameMax = typeof sessionState.stopMessageMaxRepeats === 'number' &&
|
|
173
|
+
typeof maxRepeats === 'number' &&
|
|
174
|
+
Math.floor(sessionState.stopMessageMaxRepeats) === Math.floor(maxRepeats);
|
|
175
|
+
const isSameInstruction = Boolean(text) && sameText && sameMax;
|
|
176
|
+
nextSessionState.stopMessageText = text || undefined;
|
|
177
|
+
nextSessionState.stopMessageMaxRepeats = maxRepeats;
|
|
178
|
+
if (!isSameInstruction) {
|
|
179
|
+
nextSessionState.stopMessageUsed = 0;
|
|
180
|
+
nextSessionState.stopMessageUpdatedAt =
|
|
181
|
+
typeof routingState.stopMessageUpdatedAt === 'number'
|
|
182
|
+
? routingState.stopMessageUpdatedAt
|
|
183
|
+
: Date.now();
|
|
184
|
+
nextSessionState.stopMessageLastUsedAt = undefined;
|
|
185
|
+
shouldPersistSessionState = true;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (shouldPersistSessionState) {
|
|
189
|
+
this.routingInstructionState.set(sessionScope, nextSessionState);
|
|
190
|
+
this.persistRoutingInstructionState(sessionScope, nextSessionState);
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
nextSessionState = sessionState;
|
|
194
|
+
}
|
|
195
|
+
// 日志展示使用 session scope 的 stopMessage 状态,避免每次解析重复刷新时间/次数。
|
|
196
|
+
if (typeof nextSessionState.stopMessageText === 'string' ||
|
|
197
|
+
typeof nextSessionState.stopMessageMaxRepeats === 'number') {
|
|
198
|
+
routingState.stopMessageText = nextSessionState.stopMessageText;
|
|
199
|
+
routingState.stopMessageMaxRepeats = nextSessionState.stopMessageMaxRepeats;
|
|
200
|
+
routingState.stopMessageUsed = nextSessionState.stopMessageUsed;
|
|
201
|
+
routingState.stopMessageUpdatedAt = nextSessionState.stopMessageUpdatedAt;
|
|
202
|
+
routingState.stopMessageLastUsedAt = nextSessionState.stopMessageLastUsedAt;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
if (instructions.length === 0 && sessionScope) {
|
|
208
|
+
const sessionState = this.getRoutingInstructionState(sessionScope);
|
|
209
|
+
if (typeof sessionState.stopMessageText === 'string' ||
|
|
210
|
+
typeof sessionState.stopMessageMaxRepeats === 'number') {
|
|
211
|
+
routingState.stopMessageText = sessionState.stopMessageText;
|
|
212
|
+
routingState.stopMessageMaxRepeats = sessionState.stopMessageMaxRepeats;
|
|
213
|
+
routingState.stopMessageUsed = sessionState.stopMessageUsed;
|
|
214
|
+
routingState.stopMessageUpdatedAt = sessionState.stopMessageUpdatedAt;
|
|
215
|
+
routingState.stopMessageLastUsedAt = sessionState.stopMessageLastUsedAt;
|
|
216
|
+
}
|
|
86
217
|
}
|
|
87
218
|
const routingMode = this.resolveRoutingMode([...metadataInstructions, ...instructions], routingState);
|
|
88
219
|
const features = buildRoutingFeatures(request, metadata);
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
|
|
220
|
+
const directProviderModel = this.parseDirectProviderModel(request?.model);
|
|
221
|
+
let classification;
|
|
222
|
+
let requestedRoute;
|
|
223
|
+
let selection;
|
|
224
|
+
if (directProviderModel) {
|
|
225
|
+
const providerKeys = this.providerRegistry.listProviderKeys(directProviderModel.providerId);
|
|
226
|
+
let hasModel = false;
|
|
227
|
+
for (const key of providerKeys) {
|
|
228
|
+
try {
|
|
229
|
+
const profile = this.providerRegistry.get(key);
|
|
230
|
+
if (profile?.modelId === directProviderModel.modelId) {
|
|
231
|
+
hasModel = true;
|
|
232
|
+
break;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
catch {
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
if (!hasModel) {
|
|
240
|
+
throw new VirtualRouterError(`Unknown model ${directProviderModel.modelId} for provider ${directProviderModel.providerId}`, VirtualRouterErrorCode.CONFIG_ERROR, { providerId: directProviderModel.providerId, modelId: directProviderModel.modelId });
|
|
241
|
+
}
|
|
242
|
+
classification = {
|
|
243
|
+
routeName: 'direct',
|
|
92
244
|
confidence: 1,
|
|
93
|
-
reasoning: `
|
|
245
|
+
reasoning: `direct_model:${directProviderModel.providerId}.${directProviderModel.modelId}`,
|
|
94
246
|
fallback: false,
|
|
95
|
-
candidates: [
|
|
247
|
+
candidates: ['direct']
|
|
248
|
+
};
|
|
249
|
+
requestedRoute = 'direct';
|
|
250
|
+
const directSelection = selectDirectProviderModel(directProviderModel.providerId, directProviderModel.modelId, metadata, features, routingState, {
|
|
251
|
+
routing: this.routing,
|
|
252
|
+
providerRegistry: this.providerRegistry,
|
|
253
|
+
healthManager: this.healthManager,
|
|
254
|
+
contextAdvisor: this.contextAdvisor,
|
|
255
|
+
loadBalancer: this.loadBalancer,
|
|
256
|
+
isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
|
|
257
|
+
resolveStickyKey: (m) => this.resolveStickyKey(m),
|
|
258
|
+
quotaView: this.quotaView
|
|
259
|
+
});
|
|
260
|
+
if (!directSelection) {
|
|
261
|
+
throw new VirtualRouterError(`All providers unavailable for model ${directProviderModel.providerId}.${directProviderModel.modelId}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { providerId: directProviderModel.providerId, modelId: directProviderModel.modelId });
|
|
96
262
|
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
263
|
+
selection = directSelection;
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
classification = metadata.routeHint && metadata.routeHint.trim()
|
|
267
|
+
? {
|
|
268
|
+
routeName: metadata.routeHint.trim(),
|
|
269
|
+
confidence: 1,
|
|
270
|
+
reasoning: `route_hint:${metadata.routeHint.trim()}`,
|
|
271
|
+
fallback: false,
|
|
272
|
+
candidates: [metadata.routeHint.trim()]
|
|
273
|
+
}
|
|
274
|
+
: this.classifier.classify(features);
|
|
275
|
+
requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
|
|
276
|
+
selection = this.selectProvider(requestedRoute, metadata, classification, features, routingState);
|
|
277
|
+
}
|
|
100
278
|
const baseTarget = this.providerRegistry.buildTarget(selection.providerKey);
|
|
101
279
|
const forceVision = this.routeHasForceFlag('vision');
|
|
102
280
|
const target = {
|
|
@@ -183,6 +361,44 @@ export class VirtualRouterEngine {
|
|
|
183
361
|
}
|
|
184
362
|
};
|
|
185
363
|
}
|
|
364
|
+
getStopMessageState(metadata) {
|
|
365
|
+
const sessionScope = this.resolveSessionScope(metadata);
|
|
366
|
+
const sessionState = sessionScope ? this.getRoutingInstructionState(sessionScope) : null;
|
|
367
|
+
const stickyKey = this.resolveStickyKey(metadata);
|
|
368
|
+
const stickyState = stickyKey ? this.getRoutingInstructionState(stickyKey) : null;
|
|
369
|
+
const effectiveState = sessionState && typeof sessionState.stopMessageText === 'string' && sessionState.stopMessageText.trim()
|
|
370
|
+
? sessionState
|
|
371
|
+
: stickyState;
|
|
372
|
+
if (!effectiveState) {
|
|
373
|
+
return null;
|
|
374
|
+
}
|
|
375
|
+
const text = typeof effectiveState.stopMessageText === 'string' ? effectiveState.stopMessageText.trim() : '';
|
|
376
|
+
const maxRepeats = typeof effectiveState.stopMessageMaxRepeats === 'number' &&
|
|
377
|
+
Number.isFinite(effectiveState.stopMessageMaxRepeats)
|
|
378
|
+
? Math.max(1, Math.floor(effectiveState.stopMessageMaxRepeats))
|
|
379
|
+
: 0;
|
|
380
|
+
if (!text || maxRepeats <= 0) {
|
|
381
|
+
return null;
|
|
382
|
+
}
|
|
383
|
+
return {
|
|
384
|
+
stopMessageText: text,
|
|
385
|
+
stopMessageMaxRepeats: maxRepeats,
|
|
386
|
+
...(typeof effectiveState.stopMessageSource === 'string' && effectiveState.stopMessageSource.trim()
|
|
387
|
+
? { stopMessageSource: effectiveState.stopMessageSource.trim() }
|
|
388
|
+
: {}),
|
|
389
|
+
...(typeof effectiveState.stopMessageUsed === 'number' && Number.isFinite(effectiveState.stopMessageUsed)
|
|
390
|
+
? { stopMessageUsed: Math.max(0, Math.floor(effectiveState.stopMessageUsed)) }
|
|
391
|
+
: {}),
|
|
392
|
+
...(typeof effectiveState.stopMessageUpdatedAt === 'number' &&
|
|
393
|
+
Number.isFinite(effectiveState.stopMessageUpdatedAt)
|
|
394
|
+
? { stopMessageUpdatedAt: effectiveState.stopMessageUpdatedAt }
|
|
395
|
+
: {}),
|
|
396
|
+
...(typeof effectiveState.stopMessageLastUsedAt === 'number' &&
|
|
397
|
+
Number.isFinite(effectiveState.stopMessageLastUsedAt)
|
|
398
|
+
? { stopMessageLastUsedAt: effectiveState.stopMessageLastUsedAt }
|
|
399
|
+
: {})
|
|
400
|
+
};
|
|
401
|
+
}
|
|
186
402
|
handleProviderFailure(event) {
|
|
187
403
|
handleProviderFailureImpl(event, this.healthManager, this.providerHealthConfig(), (key, ttl) => this.markProviderCooldown(key, ttl));
|
|
188
404
|
}
|
|
@@ -195,6 +411,12 @@ export class VirtualRouterEngine {
|
|
|
195
411
|
// ignore persistence errors
|
|
196
412
|
}
|
|
197
413
|
}
|
|
414
|
+
// 当 Host 注入 quotaView 时,VirtualRouter 的入池/优先级决策应以 quota 为准;
|
|
415
|
+
// 此时不再在 engine-health 内部进行 429/backoff/series cooldown 等健康决策,
|
|
416
|
+
// 以避免与 daemon/quota-center 的长期熔断策略重复维护并导致日志噪声。
|
|
417
|
+
if (this.quotaView) {
|
|
418
|
+
return;
|
|
419
|
+
}
|
|
198
420
|
// 配额恢复事件优先处理:一旦识别到 virtualRouterQuotaRecovery,
|
|
199
421
|
// 直接清理健康状态/冷却 TTL,避免继续走常规错误映射逻辑。
|
|
200
422
|
const handledByQuota = applyQuotaRecoveryImpl(event, this.healthManager, (key) => this.clearProviderCooldown(key), this.debug);
|
|
@@ -278,7 +500,8 @@ export class VirtualRouterEngine {
|
|
|
278
500
|
contextAdvisor: this.contextAdvisor,
|
|
279
501
|
loadBalancer: this.loadBalancer,
|
|
280
502
|
isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
|
|
281
|
-
resolveStickyKey: (m) => this.resolveStickyKey(m)
|
|
503
|
+
resolveStickyKey: (m) => this.resolveStickyKey(m),
|
|
504
|
+
quotaView: this.quotaView
|
|
282
505
|
}, { routingState });
|
|
283
506
|
}
|
|
284
507
|
incrementRouteStat(routeName, providerKey) {
|
|
@@ -327,8 +550,34 @@ export class VirtualRouterEngine {
|
|
|
327
550
|
}
|
|
328
551
|
getRoutingInstructionState(stickyKey) {
|
|
329
552
|
const key = stickyKey || 'default';
|
|
330
|
-
|
|
331
|
-
|
|
553
|
+
const existing = this.routingInstructionState.get(key);
|
|
554
|
+
// 对 session:/conversation: 作用域,在每次读取时尝试从磁盘刷新 stopMessage 相关字段,
|
|
555
|
+
// 确保 servertool(如 stop_message_auto)通过 sticky-session-store 更新的使用次数
|
|
556
|
+
// 能在 VirtualRouter 日志中实时反映出来。
|
|
557
|
+
if (existing && (key.startsWith('session:') || key.startsWith('conversation:'))) {
|
|
558
|
+
try {
|
|
559
|
+
const persisted = loadRoutingInstructionStateSync(key);
|
|
560
|
+
if (persisted) {
|
|
561
|
+
// 以持久化状态为准(包括清空后的 undefined),避免 stopMessage 状态“卡死”在内存中。
|
|
562
|
+
existing.stopMessageText = persisted.stopMessageText;
|
|
563
|
+
existing.stopMessageMaxRepeats = persisted.stopMessageMaxRepeats;
|
|
564
|
+
existing.stopMessageUsed = persisted.stopMessageUsed;
|
|
565
|
+
existing.stopMessageUpdatedAt = persisted.stopMessageUpdatedAt;
|
|
566
|
+
existing.stopMessageLastUsedAt = persisted.stopMessageLastUsedAt;
|
|
567
|
+
}
|
|
568
|
+
else {
|
|
569
|
+
// 文件被删除或无法解析时,将内存中的 stopMessage 状态一并清空。
|
|
570
|
+
existing.stopMessageText = undefined;
|
|
571
|
+
existing.stopMessageMaxRepeats = undefined;
|
|
572
|
+
existing.stopMessageUsed = undefined;
|
|
573
|
+
existing.stopMessageUpdatedAt = undefined;
|
|
574
|
+
existing.stopMessageLastUsedAt = undefined;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
catch {
|
|
578
|
+
// 刷新失败不影响原有内存状态
|
|
579
|
+
}
|
|
580
|
+
return existing;
|
|
332
581
|
}
|
|
333
582
|
let initial = null;
|
|
334
583
|
// 仅对 session:/conversation: 作用域的 key 尝试从磁盘恢复持久化状态
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { LoadBalancingPolicy } from './types.js';
|
|
2
|
+
export interface LoadBalancingOptions {
|
|
3
|
+
routeName: string;
|
|
4
|
+
candidates: string[];
|
|
5
|
+
stickyKey?: string;
|
|
6
|
+
availabilityCheck: (providerKey: string) => boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare class RouteLoadBalancer {
|
|
9
|
+
private policy;
|
|
10
|
+
private readonly states;
|
|
11
|
+
constructor(policy?: LoadBalancingPolicy);
|
|
12
|
+
updatePolicy(policy?: LoadBalancingPolicy): void;
|
|
13
|
+
select(options: LoadBalancingOptions, strategyOverride?: LoadBalancingPolicy['strategy']): string | null;
|
|
14
|
+
private selectRoundRobin;
|
|
15
|
+
private selectWeighted;
|
|
16
|
+
private selectSticky;
|
|
17
|
+
private getState;
|
|
18
|
+
}
|
|
@@ -9,12 +9,13 @@ export class RouteLoadBalancer {
|
|
|
9
9
|
this.policy = policy;
|
|
10
10
|
}
|
|
11
11
|
}
|
|
12
|
-
select(options) {
|
|
12
|
+
select(options, strategyOverride) {
|
|
13
13
|
const available = options.candidates.filter((candidate) => options.availabilityCheck(candidate));
|
|
14
14
|
if (available.length === 0) {
|
|
15
15
|
return null;
|
|
16
16
|
}
|
|
17
|
-
|
|
17
|
+
const strategy = strategyOverride ?? this.policy.strategy;
|
|
18
|
+
switch (strategy) {
|
|
18
19
|
case 'sticky':
|
|
19
20
|
return this.selectSticky(options.routeName, available, options.stickyKey);
|
|
20
21
|
case 'weighted':
|
|
@@ -28,6 +28,12 @@ export interface RoutingInstructionState {
|
|
|
28
28
|
disabledProviders: Set<string>;
|
|
29
29
|
disabledKeys: Map<string, Set<string | number>>;
|
|
30
30
|
disabledModels: Map<string, Set<string>>;
|
|
31
|
+
/**
|
|
32
|
+
* Source of the current stopMessage configuration.
|
|
33
|
+
* - 'explicit':由用户通过 <** stopMessage:"..." **> 指令显式设置
|
|
34
|
+
* - 'auto':由系统基于空响应/错误自动推导(例如 Gemini 空回复)
|
|
35
|
+
*/
|
|
36
|
+
stopMessageSource?: string;
|
|
31
37
|
stopMessageText?: string;
|
|
32
38
|
stopMessageMaxRepeats?: number;
|
|
33
39
|
stopMessageUsed?: number;
|
|
@@ -398,11 +398,19 @@ export function applyRoutingInstructions(instructions, currentState) {
|
|
|
398
398
|
? Math.floor(instruction.stopMessageMaxRepeats)
|
|
399
399
|
: 0;
|
|
400
400
|
if (text && maxRepeats > 0) {
|
|
401
|
+
const sameText = typeof newState.stopMessageText === 'string' &&
|
|
402
|
+
newState.stopMessageText.trim() === text;
|
|
403
|
+
const sameMax = typeof newState.stopMessageMaxRepeats === 'number' &&
|
|
404
|
+
Math.floor(newState.stopMessageMaxRepeats) === maxRepeats;
|
|
405
|
+
const isSameInstruction = sameText && sameMax;
|
|
401
406
|
newState.stopMessageText = text;
|
|
402
407
|
newState.stopMessageMaxRepeats = maxRepeats;
|
|
403
|
-
newState.
|
|
404
|
-
|
|
405
|
-
|
|
408
|
+
newState.stopMessageSource = 'explicit';
|
|
409
|
+
if (!isSameInstruction) {
|
|
410
|
+
newState.stopMessageUsed = 0;
|
|
411
|
+
newState.stopMessageUpdatedAt = Date.now();
|
|
412
|
+
newState.stopMessageLastUsedAt = undefined;
|
|
413
|
+
}
|
|
406
414
|
}
|
|
407
415
|
break;
|
|
408
416
|
}
|
|
@@ -410,6 +418,7 @@ export function applyRoutingInstructions(instructions, currentState) {
|
|
|
410
418
|
newState.stopMessageText = undefined;
|
|
411
419
|
newState.stopMessageMaxRepeats = undefined;
|
|
412
420
|
newState.stopMessageUsed = undefined;
|
|
421
|
+
newState.stopMessageSource = undefined;
|
|
413
422
|
newState.stopMessageUpdatedAt = undefined;
|
|
414
423
|
newState.stopMessageLastUsedAt = undefined;
|
|
415
424
|
break;
|
|
@@ -453,6 +462,9 @@ export function serializeRoutingInstructionState(state) {
|
|
|
453
462
|
provider,
|
|
454
463
|
models: Array.from(models)
|
|
455
464
|
})),
|
|
465
|
+
...(typeof state.stopMessageSource === 'string' && state.stopMessageSource.trim()
|
|
466
|
+
? { stopMessageSource: state.stopMessageSource }
|
|
467
|
+
: {}),
|
|
456
468
|
...(typeof state.stopMessageText === 'string' && state.stopMessageText.trim()
|
|
457
469
|
? { stopMessageText: state.stopMessageText }
|
|
458
470
|
: {}),
|
|
@@ -508,6 +520,9 @@ export function deserializeRoutingInstructionState(data) {
|
|
|
508
520
|
}
|
|
509
521
|
}
|
|
510
522
|
}
|
|
523
|
+
if (typeof data.stopMessageSource === 'string' && data.stopMessageSource.trim()) {
|
|
524
|
+
state.stopMessageSource = data.stopMessageSource.trim();
|
|
525
|
+
}
|
|
511
526
|
if (typeof data.stopMessageText === 'string' && data.stopMessageText.trim()) {
|
|
512
527
|
state.stopMessageText = data.stopMessageText;
|
|
513
528
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import type { RoutingInstructionState } from './routing-instructions.js';
|
|
2
2
|
export declare function loadRoutingInstructionStateSync(key: string | undefined): RoutingInstructionState | null;
|
|
3
3
|
export declare function saveRoutingInstructionStateAsync(key: string | undefined, state: RoutingInstructionState | null): void;
|
|
4
|
+
export declare function saveRoutingInstructionStateSync(key: string | undefined, state: RoutingInstructionState | null): void;
|
|
@@ -108,3 +108,39 @@ export function saveRoutingInstructionStateAsync(key, state) {
|
|
|
108
108
|
// ignore sync write failures
|
|
109
109
|
}
|
|
110
110
|
}
|
|
111
|
+
export function saveRoutingInstructionStateSync(key, state) {
|
|
112
|
+
if (!isPersistentKey(key)) {
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
const dir = resolveSessionDir();
|
|
116
|
+
const filename = keyToFilename(key);
|
|
117
|
+
if (!dir || !filename) {
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
const filepath = path.join(dir, filename);
|
|
121
|
+
if (!state) {
|
|
122
|
+
try {
|
|
123
|
+
fs.unlinkSync(filepath);
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
// ignore unlink failures
|
|
127
|
+
}
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
const payload = {
|
|
131
|
+
version: 1,
|
|
132
|
+
state: serializeRoutingInstructionState(state)
|
|
133
|
+
};
|
|
134
|
+
try {
|
|
135
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
// ignore mkdir errors
|
|
139
|
+
}
|
|
140
|
+
try {
|
|
141
|
+
fs.writeFileSync(filepath, JSON.stringify(payload), { encoding: 'utf8' });
|
|
142
|
+
}
|
|
143
|
+
catch {
|
|
144
|
+
// ignore sync write failures
|
|
145
|
+
}
|
|
146
|
+
}
|
|
@@ -6,10 +6,17 @@ export declare const DEFAULT_MODEL_CONTEXT_TOKENS = 200000;
|
|
|
6
6
|
export declare const DEFAULT_ROUTE = "default";
|
|
7
7
|
export declare const ROUTE_PRIORITY: string[];
|
|
8
8
|
export type RoutingInstructionMode = 'force' | 'sticky' | 'none';
|
|
9
|
+
export type RoutePoolMode = 'round-robin' | 'priority';
|
|
9
10
|
export interface RoutePoolTier {
|
|
10
11
|
id: string;
|
|
11
12
|
targets: string[];
|
|
12
13
|
priority: number;
|
|
14
|
+
/**
|
|
15
|
+
* Pool-level routing mode:
|
|
16
|
+
* - round-robin: force round-robin selection inside this pool (ignores global loadBalancing strategy)
|
|
17
|
+
* - priority: always pick highest-priority key first, only fallback when unavailable
|
|
18
|
+
*/
|
|
19
|
+
mode?: RoutePoolMode;
|
|
13
20
|
backup?: boolean;
|
|
14
21
|
/**
|
|
15
22
|
* Optional force flag for this route pool.
|
|
@@ -297,6 +304,19 @@ export interface RoutingDiagnostics {
|
|
|
297
304
|
poolId?: string;
|
|
298
305
|
confidence: number;
|
|
299
306
|
}
|
|
307
|
+
export interface StopMessageStateSnapshot {
|
|
308
|
+
stopMessageText: string;
|
|
309
|
+
stopMessageMaxRepeats: number;
|
|
310
|
+
/**
|
|
311
|
+
* stopMessage 来源:
|
|
312
|
+
* - 'explicit':来自用户显式指令
|
|
313
|
+
* - 'auto':系统基于空响应/错误自动推导
|
|
314
|
+
*/
|
|
315
|
+
stopMessageSource?: string;
|
|
316
|
+
stopMessageUsed?: number;
|
|
317
|
+
stopMessageUpdatedAt?: number;
|
|
318
|
+
stopMessageLastUsedAt?: number;
|
|
319
|
+
}
|
|
300
320
|
export interface RoutingStatusSnapshot {
|
|
301
321
|
routes: Record<string, {
|
|
302
322
|
providers: string[];
|
|
@@ -373,3 +393,12 @@ export interface VirtualRouterHealthStore {
|
|
|
373
393
|
*/
|
|
374
394
|
recordProviderError?(event: ProviderErrorEvent): void;
|
|
375
395
|
}
|
|
396
|
+
export interface ProviderQuotaViewEntry {
|
|
397
|
+
providerKey: string;
|
|
398
|
+
inPool: boolean;
|
|
399
|
+
reason?: string;
|
|
400
|
+
priorityTier?: number;
|
|
401
|
+
cooldownUntil?: number | null;
|
|
402
|
+
blacklistUntil?: number | null;
|
|
403
|
+
}
|
|
404
|
+
export type ProviderQuotaView = (providerKey: string) => ProviderQuotaViewEntry | null;
|