@jsonstudio/llms 0.6.954 → 0.6.1172

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/dist/conversion/hub/operation-table/operation-table-runner.d.ts +18 -0
  2. package/dist/conversion/hub/operation-table/operation-table-runner.js +158 -0
  3. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.d.ts +8 -0
  4. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +303 -0
  5. package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.d.ts +8 -0
  6. package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +413 -0
  7. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.d.ts +7 -0
  8. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +841 -0
  9. package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.d.ts +21 -0
  10. package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +535 -0
  11. package/dist/conversion/hub/ops/operations.d.ts +19 -0
  12. package/dist/conversion/hub/ops/operations.js +126 -0
  13. package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +9 -0
  14. package/dist/conversion/hub/pipeline/hub-pipeline.js +489 -19
  15. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +6 -0
  16. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +11 -0
  17. package/dist/conversion/hub/policy/policy-engine.js +41 -9
  18. package/dist/conversion/hub/policy/protocol-spec.d.ts +25 -0
  19. package/dist/conversion/hub/policy/protocol-spec.js +73 -23
  20. package/dist/conversion/hub/process/chat-process.js +252 -41
  21. package/dist/conversion/hub/response/provider-response.js +175 -2
  22. package/dist/conversion/hub/response/response-runtime.js +1 -1
  23. package/dist/conversion/hub/semantic-mappers/anthropic-mapper.d.ts +1 -8
  24. package/dist/conversion/hub/semantic-mappers/anthropic-mapper.js +1 -365
  25. package/dist/conversion/hub/semantic-mappers/chat-mapper.d.ts +1 -8
  26. package/dist/conversion/hub/semantic-mappers/chat-mapper.js +1 -467
  27. package/dist/conversion/hub/semantic-mappers/gemini-mapper.d.ts +1 -7
  28. package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +1 -903
  29. package/dist/conversion/hub/semantic-mappers/responses-mapper.d.ts +1 -21
  30. package/dist/conversion/hub/semantic-mappers/responses-mapper.js +1 -593
  31. package/dist/conversion/hub/tool-surface/tool-surface-engine.d.ts +18 -0
  32. package/dist/conversion/hub/tool-surface/tool-surface-engine.js +571 -0
  33. package/dist/conversion/responses/responses-openai-bridge.js +14 -2
  34. package/dist/conversion/shared/bridge-message-utils.js +2 -8
  35. package/dist/conversion/shared/bridge-policies.js +5 -105
  36. package/dist/conversion/shared/gemini-tool-utils.js +89 -15
  37. package/dist/conversion/shared/protocol-field-allowlists.d.ts +7 -0
  38. package/dist/conversion/shared/protocol-field-allowlists.js +145 -0
  39. package/dist/conversion/shared/reasoning-tool-normalizer.js +4 -2
  40. package/dist/conversion/shared/snapshot-hooks.js +166 -3
  41. package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
  42. package/dist/conversion/shared/text-markup-normalizer.js +345 -9
  43. package/dist/conversion/shared/thought-signature-validator.d.ts +52 -0
  44. package/dist/conversion/shared/thought-signature-validator.js +170 -0
  45. package/dist/conversion/shared/tool-argument-repairer.d.ts +39 -0
  46. package/dist/conversion/shared/tool-argument-repairer.js +56 -0
  47. package/dist/conversion/shared/tool-call-id-manager.d.ts +113 -0
  48. package/dist/conversion/shared/tool-call-id-manager.js +231 -0
  49. package/dist/conversion/shared/tool-canonicalizer.js +2 -11
  50. package/dist/router/virtual-router/bootstrap.js +70 -5
  51. package/dist/router/virtual-router/context-advisor.d.ts +4 -0
  52. package/dist/router/virtual-router/context-advisor.js +3 -0
  53. package/dist/router/virtual-router/context-weighted.d.ts +31 -0
  54. package/dist/router/virtual-router/context-weighted.js +54 -0
  55. package/dist/router/virtual-router/engine-selection.js +284 -47
  56. package/dist/router/virtual-router/engine.d.ts +3 -0
  57. package/dist/router/virtual-router/engine.js +142 -33
  58. package/dist/router/virtual-router/health-weighted.d.ts +25 -0
  59. package/dist/router/virtual-router/health-weighted.js +63 -0
  60. package/dist/router/virtual-router/load-balancer.d.ts +2 -0
  61. package/dist/router/virtual-router/load-balancer.js +45 -16
  62. package/dist/router/virtual-router/routing-instructions.js +17 -1
  63. package/dist/router/virtual-router/sticky-session-store.js +136 -24
  64. package/dist/router/virtual-router/stop-message-file-resolver.d.ts +1 -0
  65. package/dist/router/virtual-router/stop-message-file-resolver.js +74 -0
  66. package/dist/router/virtual-router/stop-message-state-sync.d.ts +15 -0
  67. package/dist/router/virtual-router/stop-message-state-sync.js +57 -0
  68. package/dist/router/virtual-router/types.d.ts +98 -0
  69. package/dist/servertool/clock/config.d.ts +7 -0
  70. package/dist/servertool/clock/config.js +27 -0
  71. package/dist/servertool/clock/daemon.d.ts +3 -0
  72. package/dist/servertool/clock/daemon.js +79 -0
  73. package/dist/servertool/clock/io.d.ts +2 -0
  74. package/dist/servertool/clock/io.js +13 -0
  75. package/dist/servertool/clock/paths.d.ts +4 -0
  76. package/dist/servertool/clock/paths.js +25 -0
  77. package/dist/servertool/clock/session-store.d.ts +3 -0
  78. package/dist/servertool/clock/session-store.js +56 -0
  79. package/dist/servertool/clock/state.d.ts +5 -0
  80. package/dist/servertool/clock/state.js +62 -0
  81. package/dist/servertool/clock/task-store.d.ts +5 -0
  82. package/dist/servertool/clock/task-store.js +4 -0
  83. package/dist/servertool/clock/tasks.d.ts +17 -0
  84. package/dist/servertool/clock/tasks.js +221 -0
  85. package/dist/servertool/clock/types.d.ts +36 -0
  86. package/dist/servertool/clock/types.js +1 -0
  87. package/dist/servertool/engine.d.ts +2 -0
  88. package/dist/servertool/engine.js +161 -7
  89. package/dist/servertool/followup-shadow.d.ts +16 -0
  90. package/dist/servertool/followup-shadow.js +145 -0
  91. package/dist/servertool/handlers/apply-patch-guard.js +1 -265
  92. package/dist/servertool/handlers/clock-auto.d.ts +1 -0
  93. package/dist/servertool/handlers/clock-auto.js +160 -0
  94. package/dist/servertool/handlers/clock.d.ts +1 -0
  95. package/dist/servertool/handlers/clock.js +197 -0
  96. package/dist/servertool/handlers/exec-command-guard.js +7 -555
  97. package/dist/servertool/handlers/followup-request-builder.d.ts +15 -7
  98. package/dist/servertool/handlers/followup-request-builder.js +248 -28
  99. package/dist/servertool/handlers/gemini-empty-reply-continue.js +62 -169
  100. package/dist/servertool/handlers/iflow-model-error-retry.js +18 -28
  101. package/dist/servertool/handlers/recursive-detection-guard.d.ts +1 -0
  102. package/dist/servertool/handlers/recursive-detection-guard.js +333 -0
  103. package/dist/servertool/handlers/stop-message-auto.js +47 -175
  104. package/dist/servertool/handlers/vision.d.ts +7 -1
  105. package/dist/servertool/handlers/vision.js +61 -117
  106. package/dist/servertool/handlers/web-search.d.ts +7 -1
  107. package/dist/servertool/handlers/web-search.js +122 -105
  108. package/dist/servertool/reenter-backend.d.ts +23 -0
  109. package/dist/servertool/reenter-backend.js +18 -0
  110. package/dist/servertool/server-side-tools.d.ts +3 -2
  111. package/dist/servertool/server-side-tools.js +64 -10
  112. package/dist/servertool/types.d.ts +92 -3
  113. package/dist/sse/json-to-sse/event-generators/responses.js +3 -21
  114. package/dist/sse/shared/serializers/responses-event-serializer.d.ts +8 -0
  115. package/dist/sse/shared/serializers/responses-event-serializer.js +19 -0
  116. package/dist/sse/shared/writer.js +24 -7
  117. package/dist/tools/apply-patch/execution-capturer.js +3 -1
  118. package/dist/tools/apply-patch/json/parse-loose.d.ts +3 -0
  119. package/dist/tools/apply-patch/json/parse-loose.js +139 -0
  120. package/dist/tools/apply-patch/patch-text/context-diff.d.ts +1 -0
  121. package/dist/tools/apply-patch/patch-text/context-diff.js +173 -0
  122. package/dist/tools/apply-patch/patch-text/git-diff.d.ts +1 -0
  123. package/dist/tools/apply-patch/patch-text/git-diff.js +138 -0
  124. package/dist/tools/apply-patch/patch-text/looks-like-patch.d.ts +1 -0
  125. package/dist/tools/apply-patch/patch-text/looks-like-patch.js +13 -0
  126. package/dist/tools/apply-patch/patch-text/normalize.d.ts +3 -0
  127. package/dist/tools/apply-patch/patch-text/normalize.js +262 -0
  128. package/dist/tools/apply-patch/structured/coercion.d.ts +3 -0
  129. package/dist/tools/apply-patch/structured/coercion.js +82 -0
  130. package/dist/tools/apply-patch/validation/shared.d.ts +3 -0
  131. package/dist/tools/apply-patch/validation/shared.js +6 -0
  132. package/dist/tools/apply-patch/validator.d.ts +2 -2
  133. package/dist/tools/apply-patch/validator.js +6 -556
  134. package/package.json +1 -1
@@ -7,10 +7,11 @@ import { ContextAdvisor } from './context-advisor.js';
7
7
  import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
8
8
  import { getStatsCenter } from '../../telemetry/stats-center.js';
9
9
  import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
10
- import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
10
+ import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync, saveRoutingInstructionStateSync } from './sticky-session-store.js';
11
11
  import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
12
12
  import { selectDirectProviderModel, selectFromStickyPool, selectProviderImpl } from './engine-selection.js';
13
13
  import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
14
+ import { mergeStopMessageFromPersisted } from './stop-message-state-sync.js';
14
15
  export class VirtualRouterEngine {
15
16
  routing = {};
16
17
  providerRegistry = new ProviderRegistry();
@@ -29,7 +30,8 @@ export class VirtualRouterEngine {
29
30
  healthStore;
30
31
  routingStateStore = {
31
32
  loadSync: loadRoutingInstructionStateSync,
32
- saveAsync: saveRoutingInstructionStateAsync
33
+ saveAsync: saveRoutingInstructionStateAsync,
34
+ saveSync: saveRoutingInstructionStateSync
33
35
  };
34
36
  routingInstructionState = new Map();
35
37
  quotaView;
@@ -56,7 +58,8 @@ export class VirtualRouterEngine {
56
58
  deps.routingStateStore ??
57
59
  {
58
60
  loadSync: loadRoutingInstructionStateSync,
59
- saveAsync: saveRoutingInstructionStateAsync
61
+ saveAsync: saveRoutingInstructionStateAsync,
62
+ saveSync: saveRoutingInstructionStateSync
60
63
  };
61
64
  // Routing state store changes require clearing in-memory cache to avoid stale reads.
62
65
  this.routingInstructionState.clear();
@@ -106,6 +109,7 @@ export class VirtualRouterEngine {
106
109
  route(request, metadata) {
107
110
  const stickyKey = this.resolveStickyKey(metadata);
108
111
  const sessionScope = this.resolveSessionScope(metadata);
112
+ const stopMessageScope = this.resolveStopMessageScope(metadata);
109
113
  // Routing instructions should be session/conversation-scoped when available (including /v1/responses),
110
114
  // while auto-sticky for Responses remains request-chain scoped via resolveStickyKey().
111
115
  const stateKey = sessionScope || stickyKey || 'default';
@@ -125,8 +129,8 @@ export class VirtualRouterEngine {
125
129
  preferTarget: undefined
126
130
  };
127
131
  }
128
- if (sessionScope) {
129
- const sessionState = this.getRoutingInstructionState(sessionScope);
132
+ if (stopMessageScope) {
133
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
130
134
  if (typeof sessionState.stopMessageText === 'string' ||
131
135
  typeof sessionState.stopMessageMaxRepeats === 'number') {
132
136
  routingState = {
@@ -141,8 +145,8 @@ export class VirtualRouterEngine {
141
145
  }
142
146
  const parsedInstructions = parseRoutingInstructions(request.messages);
143
147
  let instructions = parsedInstructions;
144
- if (sessionScope && parsedInstructions.length > 0) {
145
- const sessionState = this.getRoutingInstructionState(sessionScope);
148
+ if (stopMessageScope && parsedInstructions.length > 0) {
149
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
146
150
  const hasStopMessageClear = parsedInstructions.some((entry) => entry.type === 'stopMessageClear');
147
151
  const stopMessageSets = parsedInstructions.filter((entry) => entry.type === 'stopMessageSet');
148
152
  if (!hasStopMessageClear && stopMessageSets.length > 0) {
@@ -162,6 +166,14 @@ export class VirtualRouterEngine {
162
166
  }
163
167
  }
164
168
  }
169
+ // stopMessage must be session-scoped: require explicit sessionId in metadata.
170
+ // This prevents global/default persistence and ensures the trigger matches the setting sessionId.
171
+ if (parsedInstructions.length > 0) {
172
+ const hasStopMessageInstruction = parsedInstructions.some((entry) => entry.type === 'stopMessageSet' || entry.type === 'stopMessageClear');
173
+ if (hasStopMessageInstruction && !stopMessageScope) {
174
+ throw new VirtualRouterError('[stopMessage] requires sessionId (e.g. set x-session-id header or metadata.sessionId).', VirtualRouterErrorCode.CONFIG_ERROR, { requestId: metadata.requestId, entryEndpoint: metadata.entryEndpoint });
175
+ }
176
+ }
165
177
  if (parsedInstructions.length > 0) {
166
178
  request.messages = cleanMessagesFromRoutingInstructions(request.messages);
167
179
  }
@@ -171,21 +183,25 @@ export class VirtualRouterEngine {
171
183
  this.persistRoutingInstructionState(stateKey, routingState);
172
184
  // 对 stopMessage 指令补充一份基于 session/conversation 的持久化状态,
173
185
  // 便于 server-side 工具通过 session:*/conversation:* scope 读取到相同配置。
174
- if (sessionScope) {
186
+ // stopMessage is strictly session-scoped (sessionId only). Persist it under the session scope
187
+ // so servertool triggers always match the setting sessionId.
188
+ if (stopMessageScope) {
175
189
  const hasStopMessageSet = instructions.some((entry) => entry.type === 'stopMessageSet');
176
190
  const hasStopMessageClear = instructions.some((entry) => entry.type === 'stopMessageClear');
177
191
  if (hasStopMessageSet || hasStopMessageClear) {
178
- const sessionState = this.getRoutingInstructionState(sessionScope);
192
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
179
193
  let nextSessionState = {
180
194
  ...sessionState
181
195
  };
182
196
  let shouldPersistSessionState = false;
183
197
  if (hasStopMessageClear) {
198
+ const clearedAt = Date.now();
184
199
  nextSessionState.stopMessageText = undefined;
185
200
  nextSessionState.stopMessageMaxRepeats = undefined;
186
201
  nextSessionState.stopMessageUsed = undefined;
187
- nextSessionState.stopMessageUpdatedAt = undefined;
188
- nextSessionState.stopMessageLastUsedAt = undefined;
202
+ nextSessionState.stopMessageUpdatedAt = clearedAt;
203
+ nextSessionState.stopMessageLastUsedAt = clearedAt;
204
+ nextSessionState.stopMessageSource = undefined;
189
205
  shouldPersistSessionState = true;
190
206
  }
191
207
  else if (hasStopMessageSet) {
@@ -210,8 +226,8 @@ export class VirtualRouterEngine {
210
226
  }
211
227
  }
212
228
  if (shouldPersistSessionState) {
213
- this.routingInstructionState.set(sessionScope, nextSessionState);
214
- this.persistRoutingInstructionState(sessionScope, nextSessionState);
229
+ this.routingInstructionState.set(stopMessageScope, nextSessionState);
230
+ this.persistRoutingInstructionState(stopMessageScope, nextSessionState);
215
231
  }
216
232
  else {
217
233
  nextSessionState = sessionState;
@@ -228,8 +244,8 @@ export class VirtualRouterEngine {
228
244
  }
229
245
  }
230
246
  }
231
- if (instructions.length === 0 && sessionScope) {
232
- const sessionState = this.getRoutingInstructionState(sessionScope);
247
+ if (instructions.length === 0 && stopMessageScope) {
248
+ const sessionState = this.getRoutingInstructionState(stopMessageScope);
233
249
  if (typeof sessionState.stopMessageText === 'string' ||
234
250
  typeof sessionState.stopMessageMaxRepeats === 'number') {
235
251
  routingState.stopMessageText = sessionState.stopMessageText;
@@ -709,6 +725,13 @@ export class VirtualRouterEngine {
709
725
  }
710
726
  return undefined;
711
727
  }
728
+ resolveStopMessageScope(metadata) {
729
+ const sessionId = typeof metadata.sessionId === 'string' ? metadata.sessionId.trim() : '';
730
+ if (sessionId) {
731
+ return `session:${sessionId}`;
732
+ }
733
+ return undefined;
734
+ }
712
735
  getRoutingInstructionState(stickyKey) {
713
736
  const key = stickyKey || 'default';
714
737
  const existing = this.routingInstructionState.get(key);
@@ -718,22 +741,13 @@ export class VirtualRouterEngine {
718
741
  if (existing && (key.startsWith('session:') || key.startsWith('conversation:'))) {
719
742
  try {
720
743
  const persisted = this.routingStateStore.loadSync(key);
721
- if (persisted) {
722
- // 以持久化状态为准(包括清空后的 undefined),避免 stopMessage 状态“卡死”在内存中。
723
- existing.stopMessageText = persisted.stopMessageText;
724
- existing.stopMessageMaxRepeats = persisted.stopMessageMaxRepeats;
725
- existing.stopMessageUsed = persisted.stopMessageUsed;
726
- existing.stopMessageUpdatedAt = persisted.stopMessageUpdatedAt;
727
- existing.stopMessageLastUsedAt = persisted.stopMessageLastUsedAt;
728
- }
729
- else {
730
- // 文件被删除或无法解析时,将内存中的 stopMessage 状态一并清空。
731
- existing.stopMessageText = undefined;
732
- existing.stopMessageMaxRepeats = undefined;
733
- existing.stopMessageUsed = undefined;
734
- existing.stopMessageUpdatedAt = undefined;
735
- existing.stopMessageLastUsedAt = undefined;
736
- }
744
+ const merged = mergeStopMessageFromPersisted(existing, persisted);
745
+ existing.stopMessageSource = merged.stopMessageSource;
746
+ existing.stopMessageText = merged.stopMessageText;
747
+ existing.stopMessageMaxRepeats = merged.stopMessageMaxRepeats;
748
+ existing.stopMessageUsed = merged.stopMessageUsed;
749
+ existing.stopMessageUpdatedAt = merged.stopMessageUpdatedAt;
750
+ existing.stopMessageLastUsedAt = merged.stopMessageLastUsedAt;
737
751
  }
738
752
  catch {
739
753
  // 刷新失败不影响原有内存状态
@@ -753,6 +767,7 @@ export class VirtualRouterEngine {
753
767
  disabledProviders: new Set(),
754
768
  disabledKeys: new Map(),
755
769
  disabledModels: new Map(),
770
+ stopMessageSource: undefined,
756
771
  stopMessageText: undefined,
757
772
  stopMessageMaxRepeats: undefined,
758
773
  stopMessageUsed: undefined,
@@ -765,6 +780,12 @@ export class VirtualRouterEngine {
765
780
  }
766
781
  buildMetadataInstructions(metadata) {
767
782
  const instructions = [];
783
+ const forcedProviderKeyRaw = metadata
784
+ .__shadowCompareForcedProviderKey;
785
+ const forcedProviderKey = this.parseMetadataForceProviderKey(forcedProviderKeyRaw);
786
+ if (forcedProviderKey) {
787
+ instructions.push({ type: 'force', ...forcedProviderKey });
788
+ }
768
789
  if (Array.isArray(metadata.disabledProviderKeyAliases)) {
769
790
  for (const entry of metadata.disabledProviderKeyAliases) {
770
791
  const parsed = this.parseMetadataDisableDescriptor(entry);
@@ -797,6 +818,76 @@ export class VirtualRouterEngine {
797
818
  }
798
819
  return { provider, keyAlias: alias };
799
820
  }
821
+ parseMetadataForceProviderKey(entry) {
822
+ if (typeof entry !== 'string') {
823
+ return null;
824
+ }
825
+ const trimmed = entry.trim();
826
+ if (!trimmed) {
827
+ return null;
828
+ }
829
+ // Accept the bracket notation used in virtual-router-hit logs: provider[alias].model
830
+ // - provider[].model means provider.model across all aliases
831
+ const bracketMatch = trimmed.match(/^([a-zA-Z0-9_-]+)\[([a-zA-Z0-9_-]*)\](?:\.(.+))?$/);
832
+ if (bracketMatch) {
833
+ const provider = bracketMatch[1]?.trim() || '';
834
+ const keyAlias = bracketMatch[2]?.trim() || '';
835
+ const model = typeof bracketMatch[3] === 'string' ? bracketMatch[3].trim() : '';
836
+ if (!provider) {
837
+ return null;
838
+ }
839
+ if (keyAlias) {
840
+ return {
841
+ provider,
842
+ keyAlias,
843
+ ...(model ? { model } : {}),
844
+ pathLength: 3
845
+ };
846
+ }
847
+ if (model) {
848
+ return {
849
+ provider,
850
+ model,
851
+ pathLength: 2
852
+ };
853
+ }
854
+ return { provider, pathLength: 1 };
855
+ }
856
+ // Accept provider.keyAlias.model and provider.model (model may contain dots when keyAlias is explicit).
857
+ const parts = trimmed.split('.').map((part) => part.trim()).filter(Boolean);
858
+ if (parts.length === 0) {
859
+ return null;
860
+ }
861
+ const provider = parts[0] || '';
862
+ if (!provider) {
863
+ return null;
864
+ }
865
+ if (parts.length === 1) {
866
+ return { provider, pathLength: 1 };
867
+ }
868
+ if (parts.length === 2) {
869
+ const second = parts[1] || '';
870
+ if (!second) {
871
+ return null;
872
+ }
873
+ if (/^\d+$/.test(second)) {
874
+ const keyIndex = Number.parseInt(second, 10);
875
+ return Number.isFinite(keyIndex) && keyIndex > 0 ? { provider, keyIndex, pathLength: 2 } : null;
876
+ }
877
+ return { provider, model: second, pathLength: 2 };
878
+ }
879
+ const keyAlias = parts[1] || '';
880
+ const model = parts.slice(2).join('.').trim();
881
+ if (!keyAlias) {
882
+ return null;
883
+ }
884
+ return {
885
+ provider,
886
+ keyAlias,
887
+ ...(model ? { model } : {}),
888
+ pathLength: 3
889
+ };
890
+ }
800
891
  resolveRoutingMode(instructions, state) {
801
892
  const hasForce = instructions.some((inst) => inst.type === 'force');
802
893
  const hasAllow = instructions.some((inst) => inst.type === 'allow');
@@ -1281,11 +1372,29 @@ export class VirtualRouterEngine {
1281
1372
  if (!key || (!key.startsWith('session:') && !key.startsWith('conversation:'))) {
1282
1373
  return;
1283
1374
  }
1375
+ const supportsSync = typeof this.routingStateStore.saveSync === 'function';
1376
+ const prefersSync = supportsSync &&
1377
+ key.startsWith('session:') &&
1378
+ (Boolean(state.stopMessageText && state.stopMessageText.trim()) ||
1379
+ (typeof state.stopMessageMaxRepeats === 'number' && Number.isFinite(state.stopMessageMaxRepeats)) ||
1380
+ (typeof state.stopMessageUsed === 'number' && Number.isFinite(state.stopMessageUsed)) ||
1381
+ (typeof state.stopMessageUpdatedAt === 'number' && Number.isFinite(state.stopMessageUpdatedAt)) ||
1382
+ (typeof state.stopMessageLastUsedAt === 'number' && Number.isFinite(state.stopMessageLastUsedAt)));
1284
1383
  if (this.isRoutingStateEmpty(state)) {
1285
- this.routingStateStore.saveAsync(key, null);
1384
+ if (prefersSync) {
1385
+ this.routingStateStore.saveSync(key, null);
1386
+ }
1387
+ else {
1388
+ this.routingStateStore.saveAsync(key, null);
1389
+ }
1286
1390
  return;
1287
1391
  }
1288
- this.routingStateStore.saveAsync(key, state);
1392
+ if (prefersSync) {
1393
+ this.routingStateStore.saveSync(key, state);
1394
+ }
1395
+ else {
1396
+ this.routingStateStore.saveAsync(key, state);
1397
+ }
1289
1398
  }
1290
1399
  markProviderCooldown(providerKey, cooldownMs) {
1291
1400
  if (!providerKey) {
@@ -0,0 +1,25 @@
1
+ import type { HealthWeightedLoadBalancingConfig, ProviderQuotaViewEntry } from './types.js';
2
+ export type ResolvedHealthWeightedConfig = Required<{
3
+ enabled: boolean;
4
+ baseWeight: number;
5
+ minMultiplier: number;
6
+ beta: number;
7
+ halfLifeMs: number;
8
+ recoverToBestOnRetry: boolean;
9
+ }>;
10
+ /**
11
+ * AWRR constant table (defaults).
12
+ *
13
+ * Notes:
14
+ * - `minMultiplier=0.5` is the "50% of baseline share" floor: penalties will not reduce a key below ~half of
15
+ * its initial (equal) share within the same pool bucket.
16
+ * - `halfLifeMs=10min` means: if no new errors occur, the effect of the last error decays by 50% every 10 minutes.
17
+ * - `beta` controls how quickly errors reduce share; tune carefully.
18
+ */
19
+ export declare const DEFAULT_HEALTH_WEIGHTED_CONFIG: ResolvedHealthWeightedConfig;
20
+ export declare function resolveHealthWeightedConfig(raw?: HealthWeightedLoadBalancingConfig | null): ResolvedHealthWeightedConfig;
21
+ export declare function computeHealthMultiplier(entry: ProviderQuotaViewEntry | null, nowMs: number, cfg: ResolvedHealthWeightedConfig): number;
22
+ export declare function computeHealthWeight(entry: ProviderQuotaViewEntry | null, nowMs: number, cfg: ResolvedHealthWeightedConfig): {
23
+ weight: number;
24
+ multiplier: number;
25
+ };
@@ -0,0 +1,63 @@
1
+ /**
2
+ * AWRR constant table (defaults).
3
+ *
4
+ * Notes:
5
+ * - `minMultiplier=0.5` is the "50% of baseline share" floor: penalties will not reduce a key below ~half of
6
+ * its initial (equal) share within the same pool bucket.
7
+ * - `halfLifeMs=10min` means: if no new errors occur, the effect of the last error decays by 50% every 10 minutes.
8
+ * - `beta` controls how quickly errors reduce share; tune carefully.
9
+ */
10
+ export const DEFAULT_HEALTH_WEIGHTED_CONFIG = {
11
+ enabled: false,
12
+ baseWeight: 100,
13
+ minMultiplier: 0.5,
14
+ beta: 0.1,
15
+ halfLifeMs: 10 * 60 * 1000,
16
+ recoverToBestOnRetry: true
17
+ };
18
+ export function resolveHealthWeightedConfig(raw) {
19
+ const enabled = raw?.enabled ?? DEFAULT_HEALTH_WEIGHTED_CONFIG.enabled;
20
+ const baseWeight = typeof raw?.baseWeight === 'number' && Number.isFinite(raw.baseWeight) && raw.baseWeight > 0
21
+ ? Math.floor(raw.baseWeight)
22
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.baseWeight;
23
+ const minMultiplier = typeof raw?.minMultiplier === 'number' && Number.isFinite(raw.minMultiplier) && raw.minMultiplier > 0
24
+ ? Math.min(1, raw.minMultiplier)
25
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.minMultiplier;
26
+ const beta = typeof raw?.beta === 'number' && Number.isFinite(raw.beta) && raw.beta >= 0
27
+ ? raw.beta
28
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.beta;
29
+ const halfLifeMs = typeof raw?.halfLifeMs === 'number' && Number.isFinite(raw.halfLifeMs) && raw.halfLifeMs > 0
30
+ ? Math.floor(raw.halfLifeMs)
31
+ : DEFAULT_HEALTH_WEIGHTED_CONFIG.halfLifeMs;
32
+ const recoverToBestOnRetry = raw?.recoverToBestOnRetry ?? DEFAULT_HEALTH_WEIGHTED_CONFIG.recoverToBestOnRetry;
33
+ return {
34
+ enabled,
35
+ baseWeight,
36
+ minMultiplier,
37
+ beta,
38
+ halfLifeMs,
39
+ recoverToBestOnRetry
40
+ };
41
+ }
42
+ export function computeHealthMultiplier(entry, nowMs, cfg) {
43
+ if (!entry) {
44
+ return 1;
45
+ }
46
+ const lastErrorAtMs = typeof entry.lastErrorAtMs === 'number' && Number.isFinite(entry.lastErrorAtMs) ? entry.lastErrorAtMs : null;
47
+ const consecutiveErrorCount = typeof entry.consecutiveErrorCount === 'number' && Number.isFinite(entry.consecutiveErrorCount) && entry.consecutiveErrorCount > 0
48
+ ? Math.floor(entry.consecutiveErrorCount)
49
+ : 0;
50
+ if (!lastErrorAtMs || consecutiveErrorCount <= 0) {
51
+ return 1;
52
+ }
53
+ const elapsedMs = Math.max(0, nowMs - lastErrorAtMs);
54
+ const decay = Math.exp((-Math.log(2) * elapsedMs) / cfg.halfLifeMs);
55
+ const effectiveErrors = consecutiveErrorCount * decay;
56
+ const raw = 1 - cfg.beta * effectiveErrors;
57
+ return Math.max(cfg.minMultiplier, Math.min(1, raw));
58
+ }
59
+ export function computeHealthWeight(entry, nowMs, cfg) {
60
+ const multiplier = computeHealthMultiplier(entry, nowMs, cfg);
61
+ const weight = Math.max(1, Math.round(cfg.baseWeight * multiplier));
62
+ return { weight, multiplier };
63
+ }
@@ -3,6 +3,7 @@ export interface LoadBalancingOptions {
3
3
  routeName: string;
4
4
  candidates: string[];
5
5
  stickyKey?: string;
6
+ weights?: Record<string, number>;
6
7
  availabilityCheck: (providerKey: string) => boolean;
7
8
  }
8
9
  export declare class RouteLoadBalancer {
@@ -10,6 +11,7 @@ export declare class RouteLoadBalancer {
10
11
  private readonly states;
11
12
  constructor(policy?: LoadBalancingPolicy);
12
13
  updatePolicy(policy?: LoadBalancingPolicy): void;
14
+ getPolicy(): LoadBalancingPolicy;
13
15
  select(options: LoadBalancingOptions, strategyOverride?: LoadBalancingPolicy['strategy']): string | null;
14
16
  private selectRoundRobin;
15
17
  private selectWeighted;
@@ -9,6 +9,9 @@ export class RouteLoadBalancer {
9
9
  this.policy = policy;
10
10
  }
11
11
  }
12
+ getPolicy() {
13
+ return this.policy;
14
+ }
12
15
  select(options, strategyOverride) {
13
16
  const available = options.candidates.filter((candidate) => options.availabilityCheck(candidate));
14
17
  if (available.length === 0) {
@@ -17,10 +20,16 @@ export class RouteLoadBalancer {
17
20
  const strategy = strategyOverride ?? this.policy.strategy;
18
21
  switch (strategy) {
19
22
  case 'sticky':
20
- return this.selectSticky(options.routeName, available, options.stickyKey);
23
+ return this.selectSticky(options.routeName, available, options.stickyKey, options.weights ?? this.policy.weights);
21
24
  case 'weighted':
22
- return this.selectWeighted(available);
25
+ return this.selectWeighted(options.routeName, available, options.weights ?? this.policy.weights);
23
26
  default:
27
+ if (options.weights) {
28
+ const distinct = new Set(available.map((candidate) => Math.max(1, options.weights?.[candidate] ?? 1)));
29
+ if (distinct.size > 1) {
30
+ return this.selectWeighted(options.routeName, available, options.weights);
31
+ }
32
+ }
24
33
  return this.selectRoundRobin(options.routeName, available);
25
34
  }
26
35
  }
@@ -30,23 +39,41 @@ export class RouteLoadBalancer {
30
39
  state.pointer = (state.pointer + 1) % candidates.length;
31
40
  return choice;
32
41
  }
33
- selectWeighted(candidates) {
34
- if (!this.policy.weights) {
35
- return candidates[0];
42
+ selectWeighted(routeName, candidates, weights) {
43
+ // Deterministic smooth weighted round-robin (no randomness) so routing behavior is testable and stable.
44
+ // Each candidate with a positive weight is guaranteed to be selected eventually.
45
+ const state = this.getState(routeName);
46
+ const current = state.weighted.currentWeights;
47
+ const candidateSet = new Set(candidates);
48
+ for (const existing of Array.from(current.keys())) {
49
+ if (!candidateSet.has(existing)) {
50
+ current.delete(existing);
51
+ }
52
+ }
53
+ for (const key of candidates) {
54
+ if (!current.has(key)) {
55
+ current.set(key, 0);
56
+ }
36
57
  }
37
- const weights = candidates.map((candidate) => Math.max(1, this.policy.weights?.[candidate] ?? 1));
38
- const total = weights.reduce((sum, weight) => sum + weight, 0);
39
- const threshold = Math.random() * total;
40
- let running = 0;
58
+ const candidateWeights = candidates.map((candidate) => Math.max(1, weights?.[candidate] ?? 1));
59
+ const totalWeight = candidateWeights.reduce((sum, w) => sum + w, 0);
60
+ let bestIndex = 0;
61
+ let bestScore = Number.NEGATIVE_INFINITY;
41
62
  for (let i = 0; i < candidates.length; i += 1) {
42
- running += weights[i];
43
- if (running >= threshold) {
44
- return candidates[i];
63
+ const key = candidates[i];
64
+ const w = candidateWeights[i];
65
+ const next = (current.get(key) ?? 0) + w;
66
+ current.set(key, next);
67
+ if (next > bestScore) {
68
+ bestScore = next;
69
+ bestIndex = i;
45
70
  }
46
71
  }
47
- return candidates[candidates.length - 1];
72
+ const selectedKey = candidates[bestIndex];
73
+ current.set(selectedKey, (current.get(selectedKey) ?? 0) - totalWeight);
74
+ return selectedKey;
48
75
  }
49
- selectSticky(routeName, candidates, stickyKey) {
76
+ selectSticky(routeName, candidates, stickyKey, weights) {
50
77
  if (!stickyKey) {
51
78
  return this.selectRoundRobin(routeName, candidates);
52
79
  }
@@ -55,13 +82,15 @@ export class RouteLoadBalancer {
55
82
  if (pinned && candidates.includes(pinned)) {
56
83
  return pinned;
57
84
  }
58
- const choice = this.selectRoundRobin(routeName, candidates);
85
+ const choice = weights && Object.keys(weights).length > 0
86
+ ? this.selectWeighted(`${routeName}:sticky`, candidates, weights)
87
+ : this.selectRoundRobin(routeName, candidates);
59
88
  state.stickyMap.set(stickyKey, choice);
60
89
  return choice;
61
90
  }
62
91
  getState(routeName) {
63
92
  if (!this.states.has(routeName)) {
64
- this.states.set(routeName, { pointer: 0, stickyMap: new Map() });
93
+ this.states.set(routeName, { pointer: 0, stickyMap: new Map(), weighted: { currentWeights: new Map() } });
65
94
  }
66
95
  return this.states.get(routeName);
67
96
  }
@@ -1,15 +1,21 @@
1
1
  import { extractMessageText } from './message-utils.js';
2
+ import { resolveStopMessageText } from './stop-message-file-resolver.js';
2
3
  export function parseRoutingInstructions(messages) {
3
4
  const instructions = [];
4
5
  // 从最新一条携带路由指令标记(<** ... **>)的 user 消息中解析指令,
5
6
  // 而不是简单地取"最后一条 user 消息"。这样可以在服务重启后,通过完整
6
7
  // 会话历史恢复 sticky/黑名单状态,同时保持"最后一次指令生效"的语义。
7
8
  let sanitized = null;
9
+ let sanitizedIndex = -1;
10
+ let lastUserIndex = -1;
8
11
  for (let idx = messages.length - 1; idx >= 0; idx -= 1) {
9
12
  const message = messages[idx];
10
13
  if (!message || message.role !== 'user') {
11
14
  continue;
12
15
  }
16
+ if (lastUserIndex < 0) {
17
+ lastUserIndex = idx;
18
+ }
13
19
  const content = extractMessageText(message);
14
20
  if (!content) {
15
21
  continue;
@@ -22,6 +28,7 @@ export function parseRoutingInstructions(messages) {
22
28
  continue;
23
29
  }
24
30
  sanitized = candidate;
31
+ sanitizedIndex = idx;
25
32
  break;
26
33
  }
27
34
  if (!sanitized) {
@@ -38,6 +45,15 @@ export function parseRoutingInstructions(messages) {
38
45
  for (const segment of segments) {
39
46
  const parsed = parseSingleInstruction(segment);
40
47
  if (parsed) {
48
+ // stopMessage is a "command" and must only be set/cleared from the *latest* user message.
49
+ // Otherwise, clients that resend full history (including a past "<**stopMessage:...**>" message)
50
+ // would keep re-applying stopMessage after it has been consumed/cleared.
51
+ if ((parsed.type === 'stopMessageSet' || parsed.type === 'stopMessageClear') &&
52
+ lastUserIndex >= 0 &&
53
+ sanitizedIndex >= 0 &&
54
+ sanitizedIndex !== lastUserIndex) {
55
+ continue;
56
+ }
41
57
  instructions.push(parsed);
42
58
  }
43
59
  }
@@ -176,7 +192,7 @@ function parseSingleInstruction(instruction) {
176
192
  }
177
193
  return {
178
194
  type: 'stopMessageSet',
179
- stopMessageText: text,
195
+ stopMessageText: resolveStopMessageText(text),
180
196
  stopMessageMaxRepeats: maxRepeats
181
197
  };
182
198
  }