@jsonstudio/llms 0.6.633 → 0.6.749

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/conversion/codecs/anthropic-openai-codec.js +0 -5
  2. package/dist/conversion/codecs/openai-openai-codec.js +0 -6
  3. package/dist/conversion/codecs/responses-openai-codec.js +1 -7
  4. package/dist/conversion/hub/node-support.js +5 -4
  5. package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +14 -1
  6. package/dist/conversion/hub/pipeline/hub-pipeline.js +82 -18
  7. package/dist/conversion/hub/pipeline/session-identifiers.js +132 -2
  8. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +23 -19
  9. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +47 -0
  10. package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage1_tool_governance/index.js +4 -2
  11. package/dist/conversion/hub/process/chat-process.js +2 -0
  12. package/dist/conversion/hub/response/provider-response.js +6 -1
  13. package/dist/conversion/hub/snapshot-recorder.js +8 -1
  14. package/dist/conversion/pipeline/codecs/v2/shared/openai-chat-helpers.js +0 -7
  15. package/dist/conversion/responses/responses-openai-bridge.js +47 -7
  16. package/dist/conversion/shared/compaction-detect.d.ts +2 -0
  17. package/dist/conversion/shared/compaction-detect.js +53 -0
  18. package/dist/conversion/shared/errors.d.ts +1 -1
  19. package/dist/conversion/shared/reasoning-tool-normalizer.js +7 -0
  20. package/dist/conversion/shared/snapshot-hooks.d.ts +2 -0
  21. package/dist/conversion/shared/snapshot-hooks.js +180 -4
  22. package/dist/conversion/shared/snapshot-utils.d.ts +4 -0
  23. package/dist/conversion/shared/snapshot-utils.js +4 -0
  24. package/dist/conversion/shared/tool-filter-pipeline.js +3 -9
  25. package/dist/conversion/shared/tool-governor.d.ts +2 -0
  26. package/dist/conversion/shared/tool-governor.js +101 -13
  27. package/dist/conversion/shared/tool-harvester.js +42 -2
  28. package/dist/filters/index.d.ts +0 -2
  29. package/dist/filters/index.js +0 -2
  30. package/dist/filters/special/request-tools-normalize.d.ts +11 -0
  31. package/dist/filters/special/request-tools-normalize.js +13 -50
  32. package/dist/filters/special/response-apply-patch-toon-decode.js +403 -82
  33. package/dist/filters/special/response-tool-arguments-toon-decode.js +6 -75
  34. package/dist/filters/utils/snapshot-writer.js +42 -4
  35. package/dist/guidance/index.js +8 -2
  36. package/dist/router/virtual-router/bootstrap.js +68 -4
  37. package/dist/router/virtual-router/engine-health.js +0 -4
  38. package/dist/router/virtual-router/engine-selection.d.ts +8 -1
  39. package/dist/router/virtual-router/engine-selection.js +168 -9
  40. package/dist/router/virtual-router/engine.d.ts +6 -1
  41. package/dist/router/virtual-router/engine.js +263 -14
  42. package/dist/router/virtual-router/load-balancer.d.ts +18 -0
  43. package/dist/router/virtual-router/load-balancer.js +3 -2
  44. package/dist/router/virtual-router/routing-instructions.d.ts +6 -0
  45. package/dist/router/virtual-router/routing-instructions.js +18 -3
  46. package/dist/router/virtual-router/sticky-session-store.d.ts +1 -0
  47. package/dist/router/virtual-router/sticky-session-store.js +36 -0
  48. package/dist/router/virtual-router/types.d.ts +29 -0
  49. package/dist/servertool/engine.js +335 -9
  50. package/dist/servertool/handlers/compaction-detect.d.ts +1 -0
  51. package/dist/servertool/handlers/compaction-detect.js +1 -0
  52. package/dist/servertool/handlers/gemini-empty-reply-continue.js +29 -5
  53. package/dist/servertool/handlers/iflow-model-error-retry.js +17 -0
  54. package/dist/servertool/handlers/stop-message-auto.js +199 -19
  55. package/dist/servertool/server-side-tools.d.ts +0 -1
  56. package/dist/servertool/server-side-tools.js +0 -1
  57. package/dist/servertool/types.d.ts +1 -0
  58. package/dist/tools/apply-patch-structured.js +52 -15
  59. package/dist/tools/tool-registry.js +537 -15
  60. package/dist/utils/toon.d.ts +4 -0
  61. package/dist/utils/toon.js +75 -0
  62. package/package.json +4 -2
  63. package/dist/test-output/virtual-router/results.json +0 -1
  64. package/dist/test-output/virtual-router/summary.json +0 -12
@@ -9,7 +9,7 @@ import { getStatsCenter } from '../../telemetry/stats-center.js';
9
9
  import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
10
10
  import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
11
11
  import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
12
- import { selectProviderImpl } from './engine-selection.js';
12
+ import { selectDirectProviderModel, selectProviderImpl } from './engine-selection.js';
13
13
  import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
14
14
  export class VirtualRouterEngine {
15
15
  routing = {};
@@ -32,6 +32,7 @@ export class VirtualRouterEngine {
32
32
  saveAsync: saveRoutingInstructionStateAsync
33
33
  };
34
34
  routingInstructionState = new Map();
35
+ quotaView;
35
36
  constructor(deps) {
36
37
  if (deps?.healthStore) {
37
38
  this.healthStore = deps.healthStore;
@@ -39,6 +40,28 @@ export class VirtualRouterEngine {
39
40
  if (deps?.routingStateStore) {
40
41
  this.routingStateStore = deps.routingStateStore;
41
42
  }
43
+ if (deps?.quotaView) {
44
+ this.quotaView = deps.quotaView;
45
+ }
46
+ }
47
+ parseDirectProviderModel(model) {
48
+ const raw = typeof model === 'string' ? model.trim() : '';
49
+ if (!raw) {
50
+ return null;
51
+ }
52
+ const firstDot = raw.indexOf('.');
53
+ if (firstDot <= 0 || firstDot === raw.length - 1) {
54
+ return null;
55
+ }
56
+ const providerId = raw.slice(0, firstDot).trim();
57
+ const modelId = raw.slice(firstDot + 1).trim();
58
+ if (!providerId || !modelId) {
59
+ return null;
60
+ }
61
+ if (this.providerRegistry.listProviderKeys(providerId).length === 0) {
62
+ return null;
63
+ }
64
+ return { providerId, modelId };
42
65
  }
43
66
  initialize(config) {
44
67
  this.validateConfig(config);
@@ -76,27 +99,182 @@ export class VirtualRouterEngine {
76
99
  stickyTarget: undefined
77
100
  };
78
101
  }
79
- const instructions = parseRoutingInstructions(request.messages);
102
+ const sessionScope = this.resolveSessionScope(metadata);
103
+ if (sessionScope) {
104
+ const sessionState = this.getRoutingInstructionState(sessionScope);
105
+ if (typeof sessionState.stopMessageText === 'string' ||
106
+ typeof sessionState.stopMessageMaxRepeats === 'number') {
107
+ routingState = {
108
+ ...routingState,
109
+ stopMessageText: sessionState.stopMessageText,
110
+ stopMessageMaxRepeats: sessionState.stopMessageMaxRepeats,
111
+ stopMessageUsed: sessionState.stopMessageUsed,
112
+ stopMessageUpdatedAt: sessionState.stopMessageUpdatedAt,
113
+ stopMessageLastUsedAt: sessionState.stopMessageLastUsedAt
114
+ };
115
+ }
116
+ }
117
+ const parsedInstructions = parseRoutingInstructions(request.messages);
118
+ let instructions = parsedInstructions;
119
+ if (sessionScope && parsedInstructions.length > 0) {
120
+ const sessionState = this.getRoutingInstructionState(sessionScope);
121
+ const hasStopMessageClear = parsedInstructions.some((entry) => entry.type === 'stopMessageClear');
122
+ const stopMessageSets = parsedInstructions.filter((entry) => entry.type === 'stopMessageSet');
123
+ if (!hasStopMessageClear && stopMessageSets.length > 0) {
124
+ const sessionText = typeof sessionState.stopMessageText === 'string' ? sessionState.stopMessageText.trim() : '';
125
+ const sessionMax = typeof sessionState.stopMessageMaxRepeats === 'number' && Number.isFinite(sessionState.stopMessageMaxRepeats)
126
+ ? Math.floor(sessionState.stopMessageMaxRepeats)
127
+ : undefined;
128
+ const allSame = stopMessageSets.every((entry) => {
129
+ const entryText = typeof entry.stopMessageText === 'string' ? entry.stopMessageText.trim() : '';
130
+ const entryMax = typeof entry.stopMessageMaxRepeats === 'number' && Number.isFinite(entry.stopMessageMaxRepeats)
131
+ ? Math.floor(entry.stopMessageMaxRepeats)
132
+ : undefined;
133
+ return Boolean(entryText) && entryText === sessionText && entryMax === sessionMax;
134
+ });
135
+ if (allSame) {
136
+ instructions = parsedInstructions.filter((entry) => entry.type !== 'stopMessageSet');
137
+ }
138
+ }
139
+ }
140
+ if (parsedInstructions.length > 0) {
141
+ request.messages = cleanMessagesFromRoutingInstructions(request.messages);
142
+ }
80
143
  if (instructions.length > 0) {
81
144
  routingState = applyRoutingInstructions(instructions, routingState);
82
145
  const effectiveKey = stickyKey || 'default';
83
146
  this.routingInstructionState.set(effectiveKey, routingState);
84
- request.messages = cleanMessagesFromRoutingInstructions(request.messages);
85
147
  this.persistRoutingInstructionState(effectiveKey, routingState);
148
+ // 对 stopMessage 指令补充一份基于 session/conversation 的持久化状态,
149
+ // 便于 server-side 工具通过 session:*/conversation:* scope 读取到相同配置。
150
+ if (sessionScope) {
151
+ const hasStopMessageSet = instructions.some((entry) => entry.type === 'stopMessageSet');
152
+ const hasStopMessageClear = instructions.some((entry) => entry.type === 'stopMessageClear');
153
+ if (hasStopMessageSet || hasStopMessageClear) {
154
+ const sessionState = this.getRoutingInstructionState(sessionScope);
155
+ let nextSessionState = {
156
+ ...sessionState
157
+ };
158
+ let shouldPersistSessionState = false;
159
+ if (hasStopMessageClear) {
160
+ nextSessionState.stopMessageText = undefined;
161
+ nextSessionState.stopMessageMaxRepeats = undefined;
162
+ nextSessionState.stopMessageUsed = undefined;
163
+ nextSessionState.stopMessageUpdatedAt = undefined;
164
+ nextSessionState.stopMessageLastUsedAt = undefined;
165
+ shouldPersistSessionState = true;
166
+ }
167
+ else if (hasStopMessageSet) {
168
+ const text = typeof routingState.stopMessageText === 'string' ? routingState.stopMessageText : '';
169
+ const maxRepeats = routingState.stopMessageMaxRepeats;
170
+ const sameText = typeof sessionState.stopMessageText === 'string' &&
171
+ sessionState.stopMessageText.trim() === text.trim();
172
+ const sameMax = typeof sessionState.stopMessageMaxRepeats === 'number' &&
173
+ typeof maxRepeats === 'number' &&
174
+ Math.floor(sessionState.stopMessageMaxRepeats) === Math.floor(maxRepeats);
175
+ const isSameInstruction = Boolean(text) && sameText && sameMax;
176
+ nextSessionState.stopMessageText = text || undefined;
177
+ nextSessionState.stopMessageMaxRepeats = maxRepeats;
178
+ if (!isSameInstruction) {
179
+ nextSessionState.stopMessageUsed = 0;
180
+ nextSessionState.stopMessageUpdatedAt =
181
+ typeof routingState.stopMessageUpdatedAt === 'number'
182
+ ? routingState.stopMessageUpdatedAt
183
+ : Date.now();
184
+ nextSessionState.stopMessageLastUsedAt = undefined;
185
+ shouldPersistSessionState = true;
186
+ }
187
+ }
188
+ if (shouldPersistSessionState) {
189
+ this.routingInstructionState.set(sessionScope, nextSessionState);
190
+ this.persistRoutingInstructionState(sessionScope, nextSessionState);
191
+ }
192
+ else {
193
+ nextSessionState = sessionState;
194
+ }
195
+ // 日志展示使用 session scope 的 stopMessage 状态,避免每次解析重复刷新时间/次数。
196
+ if (typeof nextSessionState.stopMessageText === 'string' ||
197
+ typeof nextSessionState.stopMessageMaxRepeats === 'number') {
198
+ routingState.stopMessageText = nextSessionState.stopMessageText;
199
+ routingState.stopMessageMaxRepeats = nextSessionState.stopMessageMaxRepeats;
200
+ routingState.stopMessageUsed = nextSessionState.stopMessageUsed;
201
+ routingState.stopMessageUpdatedAt = nextSessionState.stopMessageUpdatedAt;
202
+ routingState.stopMessageLastUsedAt = nextSessionState.stopMessageLastUsedAt;
203
+ }
204
+ }
205
+ }
206
+ }
207
+ if (instructions.length === 0 && sessionScope) {
208
+ const sessionState = this.getRoutingInstructionState(sessionScope);
209
+ if (typeof sessionState.stopMessageText === 'string' ||
210
+ typeof sessionState.stopMessageMaxRepeats === 'number') {
211
+ routingState.stopMessageText = sessionState.stopMessageText;
212
+ routingState.stopMessageMaxRepeats = sessionState.stopMessageMaxRepeats;
213
+ routingState.stopMessageUsed = sessionState.stopMessageUsed;
214
+ routingState.stopMessageUpdatedAt = sessionState.stopMessageUpdatedAt;
215
+ routingState.stopMessageLastUsedAt = sessionState.stopMessageLastUsedAt;
216
+ }
86
217
  }
87
218
  const routingMode = this.resolveRoutingMode([...metadataInstructions, ...instructions], routingState);
88
219
  const features = buildRoutingFeatures(request, metadata);
89
- const classification = metadata.routeHint && metadata.routeHint.trim()
90
- ? {
91
- routeName: metadata.routeHint.trim(),
220
+ const directProviderModel = this.parseDirectProviderModel(request?.model);
221
+ let classification;
222
+ let requestedRoute;
223
+ let selection;
224
+ if (directProviderModel) {
225
+ const providerKeys = this.providerRegistry.listProviderKeys(directProviderModel.providerId);
226
+ let hasModel = false;
227
+ for (const key of providerKeys) {
228
+ try {
229
+ const profile = this.providerRegistry.get(key);
230
+ if (profile?.modelId === directProviderModel.modelId) {
231
+ hasModel = true;
232
+ break;
233
+ }
234
+ }
235
+ catch {
236
+ continue;
237
+ }
238
+ }
239
+ if (!hasModel) {
240
+ throw new VirtualRouterError(`Unknown model ${directProviderModel.modelId} for provider ${directProviderModel.providerId}`, VirtualRouterErrorCode.CONFIG_ERROR, { providerId: directProviderModel.providerId, modelId: directProviderModel.modelId });
241
+ }
242
+ classification = {
243
+ routeName: 'direct',
92
244
  confidence: 1,
93
- reasoning: `route_hint:${metadata.routeHint.trim()}`,
245
+ reasoning: `direct_model:${directProviderModel.providerId}.${directProviderModel.modelId}`,
94
246
  fallback: false,
95
- candidates: [metadata.routeHint.trim()]
247
+ candidates: ['direct']
248
+ };
249
+ requestedRoute = 'direct';
250
+ const directSelection = selectDirectProviderModel(directProviderModel.providerId, directProviderModel.modelId, metadata, features, routingState, {
251
+ routing: this.routing,
252
+ providerRegistry: this.providerRegistry,
253
+ healthManager: this.healthManager,
254
+ contextAdvisor: this.contextAdvisor,
255
+ loadBalancer: this.loadBalancer,
256
+ isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
257
+ resolveStickyKey: (m) => this.resolveStickyKey(m),
258
+ quotaView: this.quotaView
259
+ });
260
+ if (!directSelection) {
261
+ throw new VirtualRouterError(`All providers unavailable for model ${directProviderModel.providerId}.${directProviderModel.modelId}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { providerId: directProviderModel.providerId, modelId: directProviderModel.modelId });
96
262
  }
97
- : this.classifier.classify(features);
98
- const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
99
- const selection = this.selectProvider(requestedRoute, metadata, classification, features, routingState);
263
+ selection = directSelection;
264
+ }
265
+ else {
266
+ classification = metadata.routeHint && metadata.routeHint.trim()
267
+ ? {
268
+ routeName: metadata.routeHint.trim(),
269
+ confidence: 1,
270
+ reasoning: `route_hint:${metadata.routeHint.trim()}`,
271
+ fallback: false,
272
+ candidates: [metadata.routeHint.trim()]
273
+ }
274
+ : this.classifier.classify(features);
275
+ requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
276
+ selection = this.selectProvider(requestedRoute, metadata, classification, features, routingState);
277
+ }
100
278
  const baseTarget = this.providerRegistry.buildTarget(selection.providerKey);
101
279
  const forceVision = this.routeHasForceFlag('vision');
102
280
  const target = {
@@ -183,6 +361,44 @@ export class VirtualRouterEngine {
183
361
  }
184
362
  };
185
363
  }
364
+ getStopMessageState(metadata) {
365
+ const sessionScope = this.resolveSessionScope(metadata);
366
+ const sessionState = sessionScope ? this.getRoutingInstructionState(sessionScope) : null;
367
+ const stickyKey = this.resolveStickyKey(metadata);
368
+ const stickyState = stickyKey ? this.getRoutingInstructionState(stickyKey) : null;
369
+ const effectiveState = sessionState && typeof sessionState.stopMessageText === 'string' && sessionState.stopMessageText.trim()
370
+ ? sessionState
371
+ : stickyState;
372
+ if (!effectiveState) {
373
+ return null;
374
+ }
375
+ const text = typeof effectiveState.stopMessageText === 'string' ? effectiveState.stopMessageText.trim() : '';
376
+ const maxRepeats = typeof effectiveState.stopMessageMaxRepeats === 'number' &&
377
+ Number.isFinite(effectiveState.stopMessageMaxRepeats)
378
+ ? Math.max(1, Math.floor(effectiveState.stopMessageMaxRepeats))
379
+ : 0;
380
+ if (!text || maxRepeats <= 0) {
381
+ return null;
382
+ }
383
+ return {
384
+ stopMessageText: text,
385
+ stopMessageMaxRepeats: maxRepeats,
386
+ ...(typeof effectiveState.stopMessageSource === 'string' && effectiveState.stopMessageSource.trim()
387
+ ? { stopMessageSource: effectiveState.stopMessageSource.trim() }
388
+ : {}),
389
+ ...(typeof effectiveState.stopMessageUsed === 'number' && Number.isFinite(effectiveState.stopMessageUsed)
390
+ ? { stopMessageUsed: Math.max(0, Math.floor(effectiveState.stopMessageUsed)) }
391
+ : {}),
392
+ ...(typeof effectiveState.stopMessageUpdatedAt === 'number' &&
393
+ Number.isFinite(effectiveState.stopMessageUpdatedAt)
394
+ ? { stopMessageUpdatedAt: effectiveState.stopMessageUpdatedAt }
395
+ : {}),
396
+ ...(typeof effectiveState.stopMessageLastUsedAt === 'number' &&
397
+ Number.isFinite(effectiveState.stopMessageLastUsedAt)
398
+ ? { stopMessageLastUsedAt: effectiveState.stopMessageLastUsedAt }
399
+ : {})
400
+ };
401
+ }
186
402
  handleProviderFailure(event) {
187
403
  handleProviderFailureImpl(event, this.healthManager, this.providerHealthConfig(), (key, ttl) => this.markProviderCooldown(key, ttl));
188
404
  }
@@ -195,6 +411,12 @@ export class VirtualRouterEngine {
195
411
  // ignore persistence errors
196
412
  }
197
413
  }
414
+ // 当 Host 注入 quotaView 时,VirtualRouter 的入池/优先级决策应以 quota 为准;
415
+ // 此时不再在 engine-health 内部进行 429/backoff/series cooldown 等健康决策,
416
+ // 以避免与 daemon/quota-center 的长期熔断策略重复维护并导致日志噪声。
417
+ if (this.quotaView) {
418
+ return;
419
+ }
198
420
  // 配额恢复事件优先处理:一旦识别到 virtualRouterQuotaRecovery,
199
421
  // 直接清理健康状态/冷却 TTL,避免继续走常规错误映射逻辑。
200
422
  const handledByQuota = applyQuotaRecoveryImpl(event, this.healthManager, (key) => this.clearProviderCooldown(key), this.debug);
@@ -278,7 +500,8 @@ export class VirtualRouterEngine {
278
500
  contextAdvisor: this.contextAdvisor,
279
501
  loadBalancer: this.loadBalancer,
280
502
  isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
281
- resolveStickyKey: (m) => this.resolveStickyKey(m)
503
+ resolveStickyKey: (m) => this.resolveStickyKey(m),
504
+ quotaView: this.quotaView
282
505
  }, { routingState });
283
506
  }
284
507
  incrementRouteStat(routeName, providerKey) {
@@ -327,8 +550,34 @@ export class VirtualRouterEngine {
327
550
  }
328
551
  getRoutingInstructionState(stickyKey) {
329
552
  const key = stickyKey || 'default';
330
- if (this.routingInstructionState.has(key)) {
331
- return this.routingInstructionState.get(key);
553
+ const existing = this.routingInstructionState.get(key);
554
+ // 对 session:/conversation: 作用域,在每次读取时尝试从磁盘刷新 stopMessage 相关字段,
555
+ // 确保 servertool(如 stop_message_auto)通过 sticky-session-store 更新的使用次数
556
+ // 能在 VirtualRouter 日志中实时反映出来。
557
+ if (existing && (key.startsWith('session:') || key.startsWith('conversation:'))) {
558
+ try {
559
+ const persisted = loadRoutingInstructionStateSync(key);
560
+ if (persisted) {
561
+ // 以持久化状态为准(包括清空后的 undefined),避免 stopMessage 状态“卡死”在内存中。
562
+ existing.stopMessageText = persisted.stopMessageText;
563
+ existing.stopMessageMaxRepeats = persisted.stopMessageMaxRepeats;
564
+ existing.stopMessageUsed = persisted.stopMessageUsed;
565
+ existing.stopMessageUpdatedAt = persisted.stopMessageUpdatedAt;
566
+ existing.stopMessageLastUsedAt = persisted.stopMessageLastUsedAt;
567
+ }
568
+ else {
569
+ // 文件被删除或无法解析时,将内存中的 stopMessage 状态一并清空。
570
+ existing.stopMessageText = undefined;
571
+ existing.stopMessageMaxRepeats = undefined;
572
+ existing.stopMessageUsed = undefined;
573
+ existing.stopMessageUpdatedAt = undefined;
574
+ existing.stopMessageLastUsedAt = undefined;
575
+ }
576
+ }
577
+ catch {
578
+ // 刷新失败不影响原有内存状态
579
+ }
580
+ return existing;
332
581
  }
333
582
  let initial = null;
334
583
  // 仅对 session:/conversation: 作用域的 key 尝试从磁盘恢复持久化状态
@@ -0,0 +1,18 @@
1
+ import type { LoadBalancingPolicy } from './types.js';
2
+ export interface LoadBalancingOptions {
3
+ routeName: string;
4
+ candidates: string[];
5
+ stickyKey?: string;
6
+ availabilityCheck: (providerKey: string) => boolean;
7
+ }
8
+ export declare class RouteLoadBalancer {
9
+ private policy;
10
+ private readonly states;
11
+ constructor(policy?: LoadBalancingPolicy);
12
+ updatePolicy(policy?: LoadBalancingPolicy): void;
13
+ select(options: LoadBalancingOptions, strategyOverride?: LoadBalancingPolicy['strategy']): string | null;
14
+ private selectRoundRobin;
15
+ private selectWeighted;
16
+ private selectSticky;
17
+ private getState;
18
+ }
@@ -9,12 +9,13 @@ export class RouteLoadBalancer {
9
9
  this.policy = policy;
10
10
  }
11
11
  }
12
- select(options) {
12
+ select(options, strategyOverride) {
13
13
  const available = options.candidates.filter((candidate) => options.availabilityCheck(candidate));
14
14
  if (available.length === 0) {
15
15
  return null;
16
16
  }
17
- switch (this.policy.strategy) {
17
+ const strategy = strategyOverride ?? this.policy.strategy;
18
+ switch (strategy) {
18
19
  case 'sticky':
19
20
  return this.selectSticky(options.routeName, available, options.stickyKey);
20
21
  case 'weighted':
@@ -28,6 +28,12 @@ export interface RoutingInstructionState {
28
28
  disabledProviders: Set<string>;
29
29
  disabledKeys: Map<string, Set<string | number>>;
30
30
  disabledModels: Map<string, Set<string>>;
31
+ /**
32
+ * Source of the current stopMessage configuration.
33
+ * - 'explicit':由用户通过 <** stopMessage:"..." **> 指令显式设置
34
+ * - 'auto':由系统基于空响应/错误自动推导(例如 Gemini 空回复)
35
+ */
36
+ stopMessageSource?: string;
31
37
  stopMessageText?: string;
32
38
  stopMessageMaxRepeats?: number;
33
39
  stopMessageUsed?: number;
@@ -398,11 +398,19 @@ export function applyRoutingInstructions(instructions, currentState) {
398
398
  ? Math.floor(instruction.stopMessageMaxRepeats)
399
399
  : 0;
400
400
  if (text && maxRepeats > 0) {
401
+ const sameText = typeof newState.stopMessageText === 'string' &&
402
+ newState.stopMessageText.trim() === text;
403
+ const sameMax = typeof newState.stopMessageMaxRepeats === 'number' &&
404
+ Math.floor(newState.stopMessageMaxRepeats) === maxRepeats;
405
+ const isSameInstruction = sameText && sameMax;
401
406
  newState.stopMessageText = text;
402
407
  newState.stopMessageMaxRepeats = maxRepeats;
403
- newState.stopMessageUsed = 0;
404
- newState.stopMessageUpdatedAt = Date.now();
405
- newState.stopMessageLastUsedAt = undefined;
408
+ newState.stopMessageSource = 'explicit';
409
+ if (!isSameInstruction) {
410
+ newState.stopMessageUsed = 0;
411
+ newState.stopMessageUpdatedAt = Date.now();
412
+ newState.stopMessageLastUsedAt = undefined;
413
+ }
406
414
  }
407
415
  break;
408
416
  }
@@ -410,6 +418,7 @@ export function applyRoutingInstructions(instructions, currentState) {
410
418
  newState.stopMessageText = undefined;
411
419
  newState.stopMessageMaxRepeats = undefined;
412
420
  newState.stopMessageUsed = undefined;
421
+ newState.stopMessageSource = undefined;
413
422
  newState.stopMessageUpdatedAt = undefined;
414
423
  newState.stopMessageLastUsedAt = undefined;
415
424
  break;
@@ -453,6 +462,9 @@ export function serializeRoutingInstructionState(state) {
453
462
  provider,
454
463
  models: Array.from(models)
455
464
  })),
465
+ ...(typeof state.stopMessageSource === 'string' && state.stopMessageSource.trim()
466
+ ? { stopMessageSource: state.stopMessageSource }
467
+ : {}),
456
468
  ...(typeof state.stopMessageText === 'string' && state.stopMessageText.trim()
457
469
  ? { stopMessageText: state.stopMessageText }
458
470
  : {}),
@@ -508,6 +520,9 @@ export function deserializeRoutingInstructionState(data) {
508
520
  }
509
521
  }
510
522
  }
523
+ if (typeof data.stopMessageSource === 'string' && data.stopMessageSource.trim()) {
524
+ state.stopMessageSource = data.stopMessageSource.trim();
525
+ }
511
526
  if (typeof data.stopMessageText === 'string' && data.stopMessageText.trim()) {
512
527
  state.stopMessageText = data.stopMessageText;
513
528
  }
@@ -1,3 +1,4 @@
1
1
  import type { RoutingInstructionState } from './routing-instructions.js';
2
2
  export declare function loadRoutingInstructionStateSync(key: string | undefined): RoutingInstructionState | null;
3
3
  export declare function saveRoutingInstructionStateAsync(key: string | undefined, state: RoutingInstructionState | null): void;
4
+ export declare function saveRoutingInstructionStateSync(key: string | undefined, state: RoutingInstructionState | null): void;
@@ -108,3 +108,39 @@ export function saveRoutingInstructionStateAsync(key, state) {
108
108
  // ignore sync write failures
109
109
  }
110
110
  }
111
+ export function saveRoutingInstructionStateSync(key, state) {
112
+ if (!isPersistentKey(key)) {
113
+ return;
114
+ }
115
+ const dir = resolveSessionDir();
116
+ const filename = keyToFilename(key);
117
+ if (!dir || !filename) {
118
+ return;
119
+ }
120
+ const filepath = path.join(dir, filename);
121
+ if (!state) {
122
+ try {
123
+ fs.unlinkSync(filepath);
124
+ }
125
+ catch {
126
+ // ignore unlink failures
127
+ }
128
+ return;
129
+ }
130
+ const payload = {
131
+ version: 1,
132
+ state: serializeRoutingInstructionState(state)
133
+ };
134
+ try {
135
+ fs.mkdirSync(dir, { recursive: true });
136
+ }
137
+ catch {
138
+ // ignore mkdir errors
139
+ }
140
+ try {
141
+ fs.writeFileSync(filepath, JSON.stringify(payload), { encoding: 'utf8' });
142
+ }
143
+ catch {
144
+ // ignore sync write failures
145
+ }
146
+ }
@@ -6,10 +6,17 @@ export declare const DEFAULT_MODEL_CONTEXT_TOKENS = 200000;
6
6
  export declare const DEFAULT_ROUTE = "default";
7
7
  export declare const ROUTE_PRIORITY: string[];
8
8
  export type RoutingInstructionMode = 'force' | 'sticky' | 'none';
9
+ export type RoutePoolMode = 'round-robin' | 'priority';
9
10
  export interface RoutePoolTier {
10
11
  id: string;
11
12
  targets: string[];
12
13
  priority: number;
14
+ /**
15
+ * Pool-level routing mode:
16
+ * - round-robin: force round-robin selection inside this pool (ignores global loadBalancing strategy)
17
+ * - priority: always pick highest-priority key first, only fallback when unavailable
18
+ */
19
+ mode?: RoutePoolMode;
13
20
  backup?: boolean;
14
21
  /**
15
22
  * Optional force flag for this route pool.
@@ -297,6 +304,19 @@ export interface RoutingDiagnostics {
297
304
  poolId?: string;
298
305
  confidence: number;
299
306
  }
307
+ export interface StopMessageStateSnapshot {
308
+ stopMessageText: string;
309
+ stopMessageMaxRepeats: number;
310
+ /**
311
+ * stopMessage 来源:
312
+ * - 'explicit':来自用户显式指令
313
+ * - 'auto':系统基于空响应/错误自动推导
314
+ */
315
+ stopMessageSource?: string;
316
+ stopMessageUsed?: number;
317
+ stopMessageUpdatedAt?: number;
318
+ stopMessageLastUsedAt?: number;
319
+ }
300
320
  export interface RoutingStatusSnapshot {
301
321
  routes: Record<string, {
302
322
  providers: string[];
@@ -373,3 +393,12 @@ export interface VirtualRouterHealthStore {
373
393
  */
374
394
  recordProviderError?(event: ProviderErrorEvent): void;
375
395
  }
396
+ export interface ProviderQuotaViewEntry {
397
+ providerKey: string;
398
+ inPool: boolean;
399
+ reason?: string;
400
+ priorityTier?: number;
401
+ cooldownUntil?: number | null;
402
+ blacklistUntil?: number | null;
403
+ }
404
+ export type ProviderQuotaView = (providerKey: string) => ProviderQuotaViewEntry | null;