@jsonstudio/llms 0.6.954 → 0.6.1172
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/hub/operation-table/operation-table-runner.d.ts +18 -0
- package/dist/conversion/hub/operation-table/operation-table-runner.js +158 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.d.ts +8 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +303 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.d.ts +8 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +413 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.d.ts +7 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +841 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.d.ts +21 -0
- package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +535 -0
- package/dist/conversion/hub/ops/operations.d.ts +19 -0
- package/dist/conversion/hub/ops/operations.js +126 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +9 -0
- package/dist/conversion/hub/pipeline/hub-pipeline.js +489 -19
- package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +6 -0
- package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +11 -0
- package/dist/conversion/hub/policy/policy-engine.js +41 -9
- package/dist/conversion/hub/policy/protocol-spec.d.ts +25 -0
- package/dist/conversion/hub/policy/protocol-spec.js +73 -23
- package/dist/conversion/hub/process/chat-process.js +252 -41
- package/dist/conversion/hub/response/provider-response.js +175 -2
- package/dist/conversion/hub/response/response-runtime.js +1 -1
- package/dist/conversion/hub/semantic-mappers/anthropic-mapper.d.ts +1 -8
- package/dist/conversion/hub/semantic-mappers/anthropic-mapper.js +1 -365
- package/dist/conversion/hub/semantic-mappers/chat-mapper.d.ts +1 -8
- package/dist/conversion/hub/semantic-mappers/chat-mapper.js +1 -467
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.d.ts +1 -7
- package/dist/conversion/hub/semantic-mappers/gemini-mapper.js +1 -903
- package/dist/conversion/hub/semantic-mappers/responses-mapper.d.ts +1 -21
- package/dist/conversion/hub/semantic-mappers/responses-mapper.js +1 -593
- package/dist/conversion/hub/tool-surface/tool-surface-engine.d.ts +18 -0
- package/dist/conversion/hub/tool-surface/tool-surface-engine.js +571 -0
- package/dist/conversion/responses/responses-openai-bridge.js +14 -2
- package/dist/conversion/shared/bridge-message-utils.js +2 -8
- package/dist/conversion/shared/bridge-policies.js +5 -105
- package/dist/conversion/shared/gemini-tool-utils.js +89 -15
- package/dist/conversion/shared/protocol-field-allowlists.d.ts +7 -0
- package/dist/conversion/shared/protocol-field-allowlists.js +145 -0
- package/dist/conversion/shared/reasoning-tool-normalizer.js +4 -2
- package/dist/conversion/shared/snapshot-hooks.js +166 -3
- package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
- package/dist/conversion/shared/text-markup-normalizer.js +345 -9
- package/dist/conversion/shared/thought-signature-validator.d.ts +52 -0
- package/dist/conversion/shared/thought-signature-validator.js +170 -0
- package/dist/conversion/shared/tool-argument-repairer.d.ts +39 -0
- package/dist/conversion/shared/tool-argument-repairer.js +56 -0
- package/dist/conversion/shared/tool-call-id-manager.d.ts +113 -0
- package/dist/conversion/shared/tool-call-id-manager.js +231 -0
- package/dist/conversion/shared/tool-canonicalizer.js +2 -11
- package/dist/router/virtual-router/bootstrap.js +70 -5
- package/dist/router/virtual-router/context-advisor.d.ts +4 -0
- package/dist/router/virtual-router/context-advisor.js +3 -0
- package/dist/router/virtual-router/context-weighted.d.ts +31 -0
- package/dist/router/virtual-router/context-weighted.js +54 -0
- package/dist/router/virtual-router/engine-selection.js +284 -47
- package/dist/router/virtual-router/engine.d.ts +3 -0
- package/dist/router/virtual-router/engine.js +142 -33
- package/dist/router/virtual-router/health-weighted.d.ts +25 -0
- package/dist/router/virtual-router/health-weighted.js +63 -0
- package/dist/router/virtual-router/load-balancer.d.ts +2 -0
- package/dist/router/virtual-router/load-balancer.js +45 -16
- package/dist/router/virtual-router/routing-instructions.js +17 -1
- package/dist/router/virtual-router/sticky-session-store.js +136 -24
- package/dist/router/virtual-router/stop-message-file-resolver.d.ts +1 -0
- package/dist/router/virtual-router/stop-message-file-resolver.js +74 -0
- package/dist/router/virtual-router/stop-message-state-sync.d.ts +15 -0
- package/dist/router/virtual-router/stop-message-state-sync.js +57 -0
- package/dist/router/virtual-router/types.d.ts +98 -0
- package/dist/servertool/clock/config.d.ts +7 -0
- package/dist/servertool/clock/config.js +27 -0
- package/dist/servertool/clock/daemon.d.ts +3 -0
- package/dist/servertool/clock/daemon.js +79 -0
- package/dist/servertool/clock/io.d.ts +2 -0
- package/dist/servertool/clock/io.js +13 -0
- package/dist/servertool/clock/paths.d.ts +4 -0
- package/dist/servertool/clock/paths.js +25 -0
- package/dist/servertool/clock/session-store.d.ts +3 -0
- package/dist/servertool/clock/session-store.js +56 -0
- package/dist/servertool/clock/state.d.ts +5 -0
- package/dist/servertool/clock/state.js +62 -0
- package/dist/servertool/clock/task-store.d.ts +5 -0
- package/dist/servertool/clock/task-store.js +4 -0
- package/dist/servertool/clock/tasks.d.ts +17 -0
- package/dist/servertool/clock/tasks.js +221 -0
- package/dist/servertool/clock/types.d.ts +36 -0
- package/dist/servertool/clock/types.js +1 -0
- package/dist/servertool/engine.d.ts +2 -0
- package/dist/servertool/engine.js +161 -7
- package/dist/servertool/followup-shadow.d.ts +16 -0
- package/dist/servertool/followup-shadow.js +145 -0
- package/dist/servertool/handlers/apply-patch-guard.js +1 -265
- package/dist/servertool/handlers/clock-auto.d.ts +1 -0
- package/dist/servertool/handlers/clock-auto.js +160 -0
- package/dist/servertool/handlers/clock.d.ts +1 -0
- package/dist/servertool/handlers/clock.js +197 -0
- package/dist/servertool/handlers/exec-command-guard.js +7 -555
- package/dist/servertool/handlers/followup-request-builder.d.ts +15 -7
- package/dist/servertool/handlers/followup-request-builder.js +248 -28
- package/dist/servertool/handlers/gemini-empty-reply-continue.js +62 -169
- package/dist/servertool/handlers/iflow-model-error-retry.js +18 -28
- package/dist/servertool/handlers/recursive-detection-guard.d.ts +1 -0
- package/dist/servertool/handlers/recursive-detection-guard.js +333 -0
- package/dist/servertool/handlers/stop-message-auto.js +47 -175
- package/dist/servertool/handlers/vision.d.ts +7 -1
- package/dist/servertool/handlers/vision.js +61 -117
- package/dist/servertool/handlers/web-search.d.ts +7 -1
- package/dist/servertool/handlers/web-search.js +122 -105
- package/dist/servertool/reenter-backend.d.ts +23 -0
- package/dist/servertool/reenter-backend.js +18 -0
- package/dist/servertool/server-side-tools.d.ts +3 -2
- package/dist/servertool/server-side-tools.js +64 -10
- package/dist/servertool/types.d.ts +92 -3
- package/dist/sse/json-to-sse/event-generators/responses.js +3 -21
- package/dist/sse/shared/serializers/responses-event-serializer.d.ts +8 -0
- package/dist/sse/shared/serializers/responses-event-serializer.js +19 -0
- package/dist/sse/shared/writer.js +24 -7
- package/dist/tools/apply-patch/execution-capturer.js +3 -1
- package/dist/tools/apply-patch/json/parse-loose.d.ts +3 -0
- package/dist/tools/apply-patch/json/parse-loose.js +139 -0
- package/dist/tools/apply-patch/patch-text/context-diff.d.ts +1 -0
- package/dist/tools/apply-patch/patch-text/context-diff.js +173 -0
- package/dist/tools/apply-patch/patch-text/git-diff.d.ts +1 -0
- package/dist/tools/apply-patch/patch-text/git-diff.js +138 -0
- package/dist/tools/apply-patch/patch-text/looks-like-patch.d.ts +1 -0
- package/dist/tools/apply-patch/patch-text/looks-like-patch.js +13 -0
- package/dist/tools/apply-patch/patch-text/normalize.d.ts +3 -0
- package/dist/tools/apply-patch/patch-text/normalize.js +262 -0
- package/dist/tools/apply-patch/structured/coercion.d.ts +3 -0
- package/dist/tools/apply-patch/structured/coercion.js +82 -0
- package/dist/tools/apply-patch/validation/shared.d.ts +3 -0
- package/dist/tools/apply-patch/validation/shared.js +6 -0
- package/dist/tools/apply-patch/validator.d.ts +2 -2
- package/dist/tools/apply-patch/validator.js +6 -556
- package/package.json +1 -1
|
@@ -7,10 +7,11 @@ import { ContextAdvisor } from './context-advisor.js';
|
|
|
7
7
|
import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
|
|
8
8
|
import { getStatsCenter } from '../../telemetry/stats-center.js';
|
|
9
9
|
import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
|
|
10
|
-
import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
|
|
10
|
+
import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync, saveRoutingInstructionStateSync } from './sticky-session-store.js';
|
|
11
11
|
import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
|
|
12
12
|
import { selectDirectProviderModel, selectFromStickyPool, selectProviderImpl } from './engine-selection.js';
|
|
13
13
|
import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
|
|
14
|
+
import { mergeStopMessageFromPersisted } from './stop-message-state-sync.js';
|
|
14
15
|
export class VirtualRouterEngine {
|
|
15
16
|
routing = {};
|
|
16
17
|
providerRegistry = new ProviderRegistry();
|
|
@@ -29,7 +30,8 @@ export class VirtualRouterEngine {
|
|
|
29
30
|
healthStore;
|
|
30
31
|
routingStateStore = {
|
|
31
32
|
loadSync: loadRoutingInstructionStateSync,
|
|
32
|
-
saveAsync: saveRoutingInstructionStateAsync
|
|
33
|
+
saveAsync: saveRoutingInstructionStateAsync,
|
|
34
|
+
saveSync: saveRoutingInstructionStateSync
|
|
33
35
|
};
|
|
34
36
|
routingInstructionState = new Map();
|
|
35
37
|
quotaView;
|
|
@@ -56,7 +58,8 @@ export class VirtualRouterEngine {
|
|
|
56
58
|
deps.routingStateStore ??
|
|
57
59
|
{
|
|
58
60
|
loadSync: loadRoutingInstructionStateSync,
|
|
59
|
-
saveAsync: saveRoutingInstructionStateAsync
|
|
61
|
+
saveAsync: saveRoutingInstructionStateAsync,
|
|
62
|
+
saveSync: saveRoutingInstructionStateSync
|
|
60
63
|
};
|
|
61
64
|
// Routing state store changes require clearing in-memory cache to avoid stale reads.
|
|
62
65
|
this.routingInstructionState.clear();
|
|
@@ -106,6 +109,7 @@ export class VirtualRouterEngine {
|
|
|
106
109
|
route(request, metadata) {
|
|
107
110
|
const stickyKey = this.resolveStickyKey(metadata);
|
|
108
111
|
const sessionScope = this.resolveSessionScope(metadata);
|
|
112
|
+
const stopMessageScope = this.resolveStopMessageScope(metadata);
|
|
109
113
|
// Routing instructions should be session/conversation-scoped when available (including /v1/responses),
|
|
110
114
|
// while auto-sticky for Responses remains request-chain scoped via resolveStickyKey().
|
|
111
115
|
const stateKey = sessionScope || stickyKey || 'default';
|
|
@@ -125,8 +129,8 @@ export class VirtualRouterEngine {
|
|
|
125
129
|
preferTarget: undefined
|
|
126
130
|
};
|
|
127
131
|
}
|
|
128
|
-
if (
|
|
129
|
-
const sessionState = this.getRoutingInstructionState(
|
|
132
|
+
if (stopMessageScope) {
|
|
133
|
+
const sessionState = this.getRoutingInstructionState(stopMessageScope);
|
|
130
134
|
if (typeof sessionState.stopMessageText === 'string' ||
|
|
131
135
|
typeof sessionState.stopMessageMaxRepeats === 'number') {
|
|
132
136
|
routingState = {
|
|
@@ -141,8 +145,8 @@ export class VirtualRouterEngine {
|
|
|
141
145
|
}
|
|
142
146
|
const parsedInstructions = parseRoutingInstructions(request.messages);
|
|
143
147
|
let instructions = parsedInstructions;
|
|
144
|
-
if (
|
|
145
|
-
const sessionState = this.getRoutingInstructionState(
|
|
148
|
+
if (stopMessageScope && parsedInstructions.length > 0) {
|
|
149
|
+
const sessionState = this.getRoutingInstructionState(stopMessageScope);
|
|
146
150
|
const hasStopMessageClear = parsedInstructions.some((entry) => entry.type === 'stopMessageClear');
|
|
147
151
|
const stopMessageSets = parsedInstructions.filter((entry) => entry.type === 'stopMessageSet');
|
|
148
152
|
if (!hasStopMessageClear && stopMessageSets.length > 0) {
|
|
@@ -162,6 +166,14 @@ export class VirtualRouterEngine {
|
|
|
162
166
|
}
|
|
163
167
|
}
|
|
164
168
|
}
|
|
169
|
+
// stopMessage must be session-scoped: require explicit sessionId in metadata.
|
|
170
|
+
// This prevents global/default persistence and ensures the trigger matches the setting sessionId.
|
|
171
|
+
if (parsedInstructions.length > 0) {
|
|
172
|
+
const hasStopMessageInstruction = parsedInstructions.some((entry) => entry.type === 'stopMessageSet' || entry.type === 'stopMessageClear');
|
|
173
|
+
if (hasStopMessageInstruction && !stopMessageScope) {
|
|
174
|
+
throw new VirtualRouterError('[stopMessage] requires sessionId (e.g. set x-session-id header or metadata.sessionId).', VirtualRouterErrorCode.CONFIG_ERROR, { requestId: metadata.requestId, entryEndpoint: metadata.entryEndpoint });
|
|
175
|
+
}
|
|
176
|
+
}
|
|
165
177
|
if (parsedInstructions.length > 0) {
|
|
166
178
|
request.messages = cleanMessagesFromRoutingInstructions(request.messages);
|
|
167
179
|
}
|
|
@@ -171,21 +183,25 @@ export class VirtualRouterEngine {
|
|
|
171
183
|
this.persistRoutingInstructionState(stateKey, routingState);
|
|
172
184
|
// 对 stopMessage 指令补充一份基于 session/conversation 的持久化状态,
|
|
173
185
|
// 便于 server-side 工具通过 session:*/conversation:* scope 读取到相同配置。
|
|
174
|
-
|
|
186
|
+
// stopMessage is strictly session-scoped (sessionId only). Persist it under the session scope
|
|
187
|
+
// so servertool triggers always match the setting sessionId.
|
|
188
|
+
if (stopMessageScope) {
|
|
175
189
|
const hasStopMessageSet = instructions.some((entry) => entry.type === 'stopMessageSet');
|
|
176
190
|
const hasStopMessageClear = instructions.some((entry) => entry.type === 'stopMessageClear');
|
|
177
191
|
if (hasStopMessageSet || hasStopMessageClear) {
|
|
178
|
-
const sessionState = this.getRoutingInstructionState(
|
|
192
|
+
const sessionState = this.getRoutingInstructionState(stopMessageScope);
|
|
179
193
|
let nextSessionState = {
|
|
180
194
|
...sessionState
|
|
181
195
|
};
|
|
182
196
|
let shouldPersistSessionState = false;
|
|
183
197
|
if (hasStopMessageClear) {
|
|
198
|
+
const clearedAt = Date.now();
|
|
184
199
|
nextSessionState.stopMessageText = undefined;
|
|
185
200
|
nextSessionState.stopMessageMaxRepeats = undefined;
|
|
186
201
|
nextSessionState.stopMessageUsed = undefined;
|
|
187
|
-
nextSessionState.stopMessageUpdatedAt =
|
|
188
|
-
nextSessionState.stopMessageLastUsedAt =
|
|
202
|
+
nextSessionState.stopMessageUpdatedAt = clearedAt;
|
|
203
|
+
nextSessionState.stopMessageLastUsedAt = clearedAt;
|
|
204
|
+
nextSessionState.stopMessageSource = undefined;
|
|
189
205
|
shouldPersistSessionState = true;
|
|
190
206
|
}
|
|
191
207
|
else if (hasStopMessageSet) {
|
|
@@ -210,8 +226,8 @@ export class VirtualRouterEngine {
|
|
|
210
226
|
}
|
|
211
227
|
}
|
|
212
228
|
if (shouldPersistSessionState) {
|
|
213
|
-
this.routingInstructionState.set(
|
|
214
|
-
this.persistRoutingInstructionState(
|
|
229
|
+
this.routingInstructionState.set(stopMessageScope, nextSessionState);
|
|
230
|
+
this.persistRoutingInstructionState(stopMessageScope, nextSessionState);
|
|
215
231
|
}
|
|
216
232
|
else {
|
|
217
233
|
nextSessionState = sessionState;
|
|
@@ -228,8 +244,8 @@ export class VirtualRouterEngine {
|
|
|
228
244
|
}
|
|
229
245
|
}
|
|
230
246
|
}
|
|
231
|
-
if (instructions.length === 0 &&
|
|
232
|
-
const sessionState = this.getRoutingInstructionState(
|
|
247
|
+
if (instructions.length === 0 && stopMessageScope) {
|
|
248
|
+
const sessionState = this.getRoutingInstructionState(stopMessageScope);
|
|
233
249
|
if (typeof sessionState.stopMessageText === 'string' ||
|
|
234
250
|
typeof sessionState.stopMessageMaxRepeats === 'number') {
|
|
235
251
|
routingState.stopMessageText = sessionState.stopMessageText;
|
|
@@ -709,6 +725,13 @@ export class VirtualRouterEngine {
|
|
|
709
725
|
}
|
|
710
726
|
return undefined;
|
|
711
727
|
}
|
|
728
|
+
resolveStopMessageScope(metadata) {
|
|
729
|
+
const sessionId = typeof metadata.sessionId === 'string' ? metadata.sessionId.trim() : '';
|
|
730
|
+
if (sessionId) {
|
|
731
|
+
return `session:${sessionId}`;
|
|
732
|
+
}
|
|
733
|
+
return undefined;
|
|
734
|
+
}
|
|
712
735
|
getRoutingInstructionState(stickyKey) {
|
|
713
736
|
const key = stickyKey || 'default';
|
|
714
737
|
const existing = this.routingInstructionState.get(key);
|
|
@@ -718,22 +741,13 @@ export class VirtualRouterEngine {
|
|
|
718
741
|
if (existing && (key.startsWith('session:') || key.startsWith('conversation:'))) {
|
|
719
742
|
try {
|
|
720
743
|
const persisted = this.routingStateStore.loadSync(key);
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
}
|
|
729
|
-
else {
|
|
730
|
-
// 文件被删除或无法解析时,将内存中的 stopMessage 状态一并清空。
|
|
731
|
-
existing.stopMessageText = undefined;
|
|
732
|
-
existing.stopMessageMaxRepeats = undefined;
|
|
733
|
-
existing.stopMessageUsed = undefined;
|
|
734
|
-
existing.stopMessageUpdatedAt = undefined;
|
|
735
|
-
existing.stopMessageLastUsedAt = undefined;
|
|
736
|
-
}
|
|
744
|
+
const merged = mergeStopMessageFromPersisted(existing, persisted);
|
|
745
|
+
existing.stopMessageSource = merged.stopMessageSource;
|
|
746
|
+
existing.stopMessageText = merged.stopMessageText;
|
|
747
|
+
existing.stopMessageMaxRepeats = merged.stopMessageMaxRepeats;
|
|
748
|
+
existing.stopMessageUsed = merged.stopMessageUsed;
|
|
749
|
+
existing.stopMessageUpdatedAt = merged.stopMessageUpdatedAt;
|
|
750
|
+
existing.stopMessageLastUsedAt = merged.stopMessageLastUsedAt;
|
|
737
751
|
}
|
|
738
752
|
catch {
|
|
739
753
|
// 刷新失败不影响原有内存状态
|
|
@@ -753,6 +767,7 @@ export class VirtualRouterEngine {
|
|
|
753
767
|
disabledProviders: new Set(),
|
|
754
768
|
disabledKeys: new Map(),
|
|
755
769
|
disabledModels: new Map(),
|
|
770
|
+
stopMessageSource: undefined,
|
|
756
771
|
stopMessageText: undefined,
|
|
757
772
|
stopMessageMaxRepeats: undefined,
|
|
758
773
|
stopMessageUsed: undefined,
|
|
@@ -765,6 +780,12 @@ export class VirtualRouterEngine {
|
|
|
765
780
|
}
|
|
766
781
|
buildMetadataInstructions(metadata) {
|
|
767
782
|
const instructions = [];
|
|
783
|
+
const forcedProviderKeyRaw = metadata
|
|
784
|
+
.__shadowCompareForcedProviderKey;
|
|
785
|
+
const forcedProviderKey = this.parseMetadataForceProviderKey(forcedProviderKeyRaw);
|
|
786
|
+
if (forcedProviderKey) {
|
|
787
|
+
instructions.push({ type: 'force', ...forcedProviderKey });
|
|
788
|
+
}
|
|
768
789
|
if (Array.isArray(metadata.disabledProviderKeyAliases)) {
|
|
769
790
|
for (const entry of metadata.disabledProviderKeyAliases) {
|
|
770
791
|
const parsed = this.parseMetadataDisableDescriptor(entry);
|
|
@@ -797,6 +818,76 @@ export class VirtualRouterEngine {
|
|
|
797
818
|
}
|
|
798
819
|
return { provider, keyAlias: alias };
|
|
799
820
|
}
|
|
821
|
+
parseMetadataForceProviderKey(entry) {
|
|
822
|
+
if (typeof entry !== 'string') {
|
|
823
|
+
return null;
|
|
824
|
+
}
|
|
825
|
+
const trimmed = entry.trim();
|
|
826
|
+
if (!trimmed) {
|
|
827
|
+
return null;
|
|
828
|
+
}
|
|
829
|
+
// Accept the bracket notation used in virtual-router-hit logs: provider[alias].model
|
|
830
|
+
// - provider[].model means provider.model across all aliases
|
|
831
|
+
const bracketMatch = trimmed.match(/^([a-zA-Z0-9_-]+)\[([a-zA-Z0-9_-]*)\](?:\.(.+))?$/);
|
|
832
|
+
if (bracketMatch) {
|
|
833
|
+
const provider = bracketMatch[1]?.trim() || '';
|
|
834
|
+
const keyAlias = bracketMatch[2]?.trim() || '';
|
|
835
|
+
const model = typeof bracketMatch[3] === 'string' ? bracketMatch[3].trim() : '';
|
|
836
|
+
if (!provider) {
|
|
837
|
+
return null;
|
|
838
|
+
}
|
|
839
|
+
if (keyAlias) {
|
|
840
|
+
return {
|
|
841
|
+
provider,
|
|
842
|
+
keyAlias,
|
|
843
|
+
...(model ? { model } : {}),
|
|
844
|
+
pathLength: 3
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
if (model) {
|
|
848
|
+
return {
|
|
849
|
+
provider,
|
|
850
|
+
model,
|
|
851
|
+
pathLength: 2
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
return { provider, pathLength: 1 };
|
|
855
|
+
}
|
|
856
|
+
// Accept provider.keyAlias.model and provider.model (model may contain dots when keyAlias is explicit).
|
|
857
|
+
const parts = trimmed.split('.').map((part) => part.trim()).filter(Boolean);
|
|
858
|
+
if (parts.length === 0) {
|
|
859
|
+
return null;
|
|
860
|
+
}
|
|
861
|
+
const provider = parts[0] || '';
|
|
862
|
+
if (!provider) {
|
|
863
|
+
return null;
|
|
864
|
+
}
|
|
865
|
+
if (parts.length === 1) {
|
|
866
|
+
return { provider, pathLength: 1 };
|
|
867
|
+
}
|
|
868
|
+
if (parts.length === 2) {
|
|
869
|
+
const second = parts[1] || '';
|
|
870
|
+
if (!second) {
|
|
871
|
+
return null;
|
|
872
|
+
}
|
|
873
|
+
if (/^\d+$/.test(second)) {
|
|
874
|
+
const keyIndex = Number.parseInt(second, 10);
|
|
875
|
+
return Number.isFinite(keyIndex) && keyIndex > 0 ? { provider, keyIndex, pathLength: 2 } : null;
|
|
876
|
+
}
|
|
877
|
+
return { provider, model: second, pathLength: 2 };
|
|
878
|
+
}
|
|
879
|
+
const keyAlias = parts[1] || '';
|
|
880
|
+
const model = parts.slice(2).join('.').trim();
|
|
881
|
+
if (!keyAlias) {
|
|
882
|
+
return null;
|
|
883
|
+
}
|
|
884
|
+
return {
|
|
885
|
+
provider,
|
|
886
|
+
keyAlias,
|
|
887
|
+
...(model ? { model } : {}),
|
|
888
|
+
pathLength: 3
|
|
889
|
+
};
|
|
890
|
+
}
|
|
800
891
|
resolveRoutingMode(instructions, state) {
|
|
801
892
|
const hasForce = instructions.some((inst) => inst.type === 'force');
|
|
802
893
|
const hasAllow = instructions.some((inst) => inst.type === 'allow');
|
|
@@ -1281,11 +1372,29 @@ export class VirtualRouterEngine {
|
|
|
1281
1372
|
if (!key || (!key.startsWith('session:') && !key.startsWith('conversation:'))) {
|
|
1282
1373
|
return;
|
|
1283
1374
|
}
|
|
1375
|
+
const supportsSync = typeof this.routingStateStore.saveSync === 'function';
|
|
1376
|
+
const prefersSync = supportsSync &&
|
|
1377
|
+
key.startsWith('session:') &&
|
|
1378
|
+
(Boolean(state.stopMessageText && state.stopMessageText.trim()) ||
|
|
1379
|
+
(typeof state.stopMessageMaxRepeats === 'number' && Number.isFinite(state.stopMessageMaxRepeats)) ||
|
|
1380
|
+
(typeof state.stopMessageUsed === 'number' && Number.isFinite(state.stopMessageUsed)) ||
|
|
1381
|
+
(typeof state.stopMessageUpdatedAt === 'number' && Number.isFinite(state.stopMessageUpdatedAt)) ||
|
|
1382
|
+
(typeof state.stopMessageLastUsedAt === 'number' && Number.isFinite(state.stopMessageLastUsedAt)));
|
|
1284
1383
|
if (this.isRoutingStateEmpty(state)) {
|
|
1285
|
-
|
|
1384
|
+
if (prefersSync) {
|
|
1385
|
+
this.routingStateStore.saveSync(key, null);
|
|
1386
|
+
}
|
|
1387
|
+
else {
|
|
1388
|
+
this.routingStateStore.saveAsync(key, null);
|
|
1389
|
+
}
|
|
1286
1390
|
return;
|
|
1287
1391
|
}
|
|
1288
|
-
|
|
1392
|
+
if (prefersSync) {
|
|
1393
|
+
this.routingStateStore.saveSync(key, state);
|
|
1394
|
+
}
|
|
1395
|
+
else {
|
|
1396
|
+
this.routingStateStore.saveAsync(key, state);
|
|
1397
|
+
}
|
|
1289
1398
|
}
|
|
1290
1399
|
markProviderCooldown(providerKey, cooldownMs) {
|
|
1291
1400
|
if (!providerKey) {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { HealthWeightedLoadBalancingConfig, ProviderQuotaViewEntry } from './types.js';
|
|
2
|
+
export type ResolvedHealthWeightedConfig = Required<{
|
|
3
|
+
enabled: boolean;
|
|
4
|
+
baseWeight: number;
|
|
5
|
+
minMultiplier: number;
|
|
6
|
+
beta: number;
|
|
7
|
+
halfLifeMs: number;
|
|
8
|
+
recoverToBestOnRetry: boolean;
|
|
9
|
+
}>;
|
|
10
|
+
/**
|
|
11
|
+
* AWRR constant table (defaults).
|
|
12
|
+
*
|
|
13
|
+
* Notes:
|
|
14
|
+
* - `minMultiplier=0.5` is the "50% of baseline share" floor: penalties will not reduce a key below ~half of
|
|
15
|
+
* its initial (equal) share within the same pool bucket.
|
|
16
|
+
* - `halfLifeMs=10min` means: if no new errors occur, the effect of the last error decays by 50% every 10 minutes.
|
|
17
|
+
* - `beta` controls how quickly errors reduce share; tune carefully.
|
|
18
|
+
*/
|
|
19
|
+
export declare const DEFAULT_HEALTH_WEIGHTED_CONFIG: ResolvedHealthWeightedConfig;
|
|
20
|
+
export declare function resolveHealthWeightedConfig(raw?: HealthWeightedLoadBalancingConfig | null): ResolvedHealthWeightedConfig;
|
|
21
|
+
export declare function computeHealthMultiplier(entry: ProviderQuotaViewEntry | null, nowMs: number, cfg: ResolvedHealthWeightedConfig): number;
|
|
22
|
+
export declare function computeHealthWeight(entry: ProviderQuotaViewEntry | null, nowMs: number, cfg: ResolvedHealthWeightedConfig): {
|
|
23
|
+
weight: number;
|
|
24
|
+
multiplier: number;
|
|
25
|
+
};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AWRR constant table (defaults).
|
|
3
|
+
*
|
|
4
|
+
* Notes:
|
|
5
|
+
* - `minMultiplier=0.5` is the "50% of baseline share" floor: penalties will not reduce a key below ~half of
|
|
6
|
+
* its initial (equal) share within the same pool bucket.
|
|
7
|
+
* - `halfLifeMs=10min` means: if no new errors occur, the effect of the last error decays by 50% every 10 minutes.
|
|
8
|
+
* - `beta` controls how quickly errors reduce share; tune carefully.
|
|
9
|
+
*/
|
|
10
|
+
export const DEFAULT_HEALTH_WEIGHTED_CONFIG = {
|
|
11
|
+
enabled: false,
|
|
12
|
+
baseWeight: 100,
|
|
13
|
+
minMultiplier: 0.5,
|
|
14
|
+
beta: 0.1,
|
|
15
|
+
halfLifeMs: 10 * 60 * 1000,
|
|
16
|
+
recoverToBestOnRetry: true
|
|
17
|
+
};
|
|
18
|
+
export function resolveHealthWeightedConfig(raw) {
|
|
19
|
+
const enabled = raw?.enabled ?? DEFAULT_HEALTH_WEIGHTED_CONFIG.enabled;
|
|
20
|
+
const baseWeight = typeof raw?.baseWeight === 'number' && Number.isFinite(raw.baseWeight) && raw.baseWeight > 0
|
|
21
|
+
? Math.floor(raw.baseWeight)
|
|
22
|
+
: DEFAULT_HEALTH_WEIGHTED_CONFIG.baseWeight;
|
|
23
|
+
const minMultiplier = typeof raw?.minMultiplier === 'number' && Number.isFinite(raw.minMultiplier) && raw.minMultiplier > 0
|
|
24
|
+
? Math.min(1, raw.minMultiplier)
|
|
25
|
+
: DEFAULT_HEALTH_WEIGHTED_CONFIG.minMultiplier;
|
|
26
|
+
const beta = typeof raw?.beta === 'number' && Number.isFinite(raw.beta) && raw.beta >= 0
|
|
27
|
+
? raw.beta
|
|
28
|
+
: DEFAULT_HEALTH_WEIGHTED_CONFIG.beta;
|
|
29
|
+
const halfLifeMs = typeof raw?.halfLifeMs === 'number' && Number.isFinite(raw.halfLifeMs) && raw.halfLifeMs > 0
|
|
30
|
+
? Math.floor(raw.halfLifeMs)
|
|
31
|
+
: DEFAULT_HEALTH_WEIGHTED_CONFIG.halfLifeMs;
|
|
32
|
+
const recoverToBestOnRetry = raw?.recoverToBestOnRetry ?? DEFAULT_HEALTH_WEIGHTED_CONFIG.recoverToBestOnRetry;
|
|
33
|
+
return {
|
|
34
|
+
enabled,
|
|
35
|
+
baseWeight,
|
|
36
|
+
minMultiplier,
|
|
37
|
+
beta,
|
|
38
|
+
halfLifeMs,
|
|
39
|
+
recoverToBestOnRetry
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
export function computeHealthMultiplier(entry, nowMs, cfg) {
|
|
43
|
+
if (!entry) {
|
|
44
|
+
return 1;
|
|
45
|
+
}
|
|
46
|
+
const lastErrorAtMs = typeof entry.lastErrorAtMs === 'number' && Number.isFinite(entry.lastErrorAtMs) ? entry.lastErrorAtMs : null;
|
|
47
|
+
const consecutiveErrorCount = typeof entry.consecutiveErrorCount === 'number' && Number.isFinite(entry.consecutiveErrorCount) && entry.consecutiveErrorCount > 0
|
|
48
|
+
? Math.floor(entry.consecutiveErrorCount)
|
|
49
|
+
: 0;
|
|
50
|
+
if (!lastErrorAtMs || consecutiveErrorCount <= 0) {
|
|
51
|
+
return 1;
|
|
52
|
+
}
|
|
53
|
+
const elapsedMs = Math.max(0, nowMs - lastErrorAtMs);
|
|
54
|
+
const decay = Math.exp((-Math.log(2) * elapsedMs) / cfg.halfLifeMs);
|
|
55
|
+
const effectiveErrors = consecutiveErrorCount * decay;
|
|
56
|
+
const raw = 1 - cfg.beta * effectiveErrors;
|
|
57
|
+
return Math.max(cfg.minMultiplier, Math.min(1, raw));
|
|
58
|
+
}
|
|
59
|
+
export function computeHealthWeight(entry, nowMs, cfg) {
|
|
60
|
+
const multiplier = computeHealthMultiplier(entry, nowMs, cfg);
|
|
61
|
+
const weight = Math.max(1, Math.round(cfg.baseWeight * multiplier));
|
|
62
|
+
return { weight, multiplier };
|
|
63
|
+
}
|
|
@@ -3,6 +3,7 @@ export interface LoadBalancingOptions {
|
|
|
3
3
|
routeName: string;
|
|
4
4
|
candidates: string[];
|
|
5
5
|
stickyKey?: string;
|
|
6
|
+
weights?: Record<string, number>;
|
|
6
7
|
availabilityCheck: (providerKey: string) => boolean;
|
|
7
8
|
}
|
|
8
9
|
export declare class RouteLoadBalancer {
|
|
@@ -10,6 +11,7 @@ export declare class RouteLoadBalancer {
|
|
|
10
11
|
private readonly states;
|
|
11
12
|
constructor(policy?: LoadBalancingPolicy);
|
|
12
13
|
updatePolicy(policy?: LoadBalancingPolicy): void;
|
|
14
|
+
getPolicy(): LoadBalancingPolicy;
|
|
13
15
|
select(options: LoadBalancingOptions, strategyOverride?: LoadBalancingPolicy['strategy']): string | null;
|
|
14
16
|
private selectRoundRobin;
|
|
15
17
|
private selectWeighted;
|
|
@@ -9,6 +9,9 @@ export class RouteLoadBalancer {
|
|
|
9
9
|
this.policy = policy;
|
|
10
10
|
}
|
|
11
11
|
}
|
|
12
|
+
getPolicy() {
|
|
13
|
+
return this.policy;
|
|
14
|
+
}
|
|
12
15
|
select(options, strategyOverride) {
|
|
13
16
|
const available = options.candidates.filter((candidate) => options.availabilityCheck(candidate));
|
|
14
17
|
if (available.length === 0) {
|
|
@@ -17,10 +20,16 @@ export class RouteLoadBalancer {
|
|
|
17
20
|
const strategy = strategyOverride ?? this.policy.strategy;
|
|
18
21
|
switch (strategy) {
|
|
19
22
|
case 'sticky':
|
|
20
|
-
return this.selectSticky(options.routeName, available, options.stickyKey);
|
|
23
|
+
return this.selectSticky(options.routeName, available, options.stickyKey, options.weights ?? this.policy.weights);
|
|
21
24
|
case 'weighted':
|
|
22
|
-
return this.selectWeighted(available);
|
|
25
|
+
return this.selectWeighted(options.routeName, available, options.weights ?? this.policy.weights);
|
|
23
26
|
default:
|
|
27
|
+
if (options.weights) {
|
|
28
|
+
const distinct = new Set(available.map((candidate) => Math.max(1, options.weights?.[candidate] ?? 1)));
|
|
29
|
+
if (distinct.size > 1) {
|
|
30
|
+
return this.selectWeighted(options.routeName, available, options.weights);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
24
33
|
return this.selectRoundRobin(options.routeName, available);
|
|
25
34
|
}
|
|
26
35
|
}
|
|
@@ -30,23 +39,41 @@ export class RouteLoadBalancer {
|
|
|
30
39
|
state.pointer = (state.pointer + 1) % candidates.length;
|
|
31
40
|
return choice;
|
|
32
41
|
}
|
|
33
|
-
selectWeighted(candidates) {
|
|
34
|
-
|
|
35
|
-
|
|
42
|
+
selectWeighted(routeName, candidates, weights) {
|
|
43
|
+
// Deterministic smooth weighted round-robin (no randomness) so routing behavior is testable and stable.
|
|
44
|
+
// Each candidate with a positive weight is guaranteed to be selected eventually.
|
|
45
|
+
const state = this.getState(routeName);
|
|
46
|
+
const current = state.weighted.currentWeights;
|
|
47
|
+
const candidateSet = new Set(candidates);
|
|
48
|
+
for (const existing of Array.from(current.keys())) {
|
|
49
|
+
if (!candidateSet.has(existing)) {
|
|
50
|
+
current.delete(existing);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
for (const key of candidates) {
|
|
54
|
+
if (!current.has(key)) {
|
|
55
|
+
current.set(key, 0);
|
|
56
|
+
}
|
|
36
57
|
}
|
|
37
|
-
const
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
let
|
|
58
|
+
const candidateWeights = candidates.map((candidate) => Math.max(1, weights?.[candidate] ?? 1));
|
|
59
|
+
const totalWeight = candidateWeights.reduce((sum, w) => sum + w, 0);
|
|
60
|
+
let bestIndex = 0;
|
|
61
|
+
let bestScore = Number.NEGATIVE_INFINITY;
|
|
41
62
|
for (let i = 0; i < candidates.length; i += 1) {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
63
|
+
const key = candidates[i];
|
|
64
|
+
const w = candidateWeights[i];
|
|
65
|
+
const next = (current.get(key) ?? 0) + w;
|
|
66
|
+
current.set(key, next);
|
|
67
|
+
if (next > bestScore) {
|
|
68
|
+
bestScore = next;
|
|
69
|
+
bestIndex = i;
|
|
45
70
|
}
|
|
46
71
|
}
|
|
47
|
-
|
|
72
|
+
const selectedKey = candidates[bestIndex];
|
|
73
|
+
current.set(selectedKey, (current.get(selectedKey) ?? 0) - totalWeight);
|
|
74
|
+
return selectedKey;
|
|
48
75
|
}
|
|
49
|
-
selectSticky(routeName, candidates, stickyKey) {
|
|
76
|
+
selectSticky(routeName, candidates, stickyKey, weights) {
|
|
50
77
|
if (!stickyKey) {
|
|
51
78
|
return this.selectRoundRobin(routeName, candidates);
|
|
52
79
|
}
|
|
@@ -55,13 +82,15 @@ export class RouteLoadBalancer {
|
|
|
55
82
|
if (pinned && candidates.includes(pinned)) {
|
|
56
83
|
return pinned;
|
|
57
84
|
}
|
|
58
|
-
const choice =
|
|
85
|
+
const choice = weights && Object.keys(weights).length > 0
|
|
86
|
+
? this.selectWeighted(`${routeName}:sticky`, candidates, weights)
|
|
87
|
+
: this.selectRoundRobin(routeName, candidates);
|
|
59
88
|
state.stickyMap.set(stickyKey, choice);
|
|
60
89
|
return choice;
|
|
61
90
|
}
|
|
62
91
|
getState(routeName) {
|
|
63
92
|
if (!this.states.has(routeName)) {
|
|
64
|
-
this.states.set(routeName, { pointer: 0, stickyMap: new Map() });
|
|
93
|
+
this.states.set(routeName, { pointer: 0, stickyMap: new Map(), weighted: { currentWeights: new Map() } });
|
|
65
94
|
}
|
|
66
95
|
return this.states.get(routeName);
|
|
67
96
|
}
|
|
@@ -1,15 +1,21 @@
|
|
|
1
1
|
import { extractMessageText } from './message-utils.js';
|
|
2
|
+
import { resolveStopMessageText } from './stop-message-file-resolver.js';
|
|
2
3
|
export function parseRoutingInstructions(messages) {
|
|
3
4
|
const instructions = [];
|
|
4
5
|
// 从最新一条携带路由指令标记(<** ... **>)的 user 消息中解析指令,
|
|
5
6
|
// 而不是简单地取"最后一条 user 消息"。这样可以在服务重启后,通过完整
|
|
6
7
|
// 会话历史恢复 sticky/黑名单状态,同时保持"最后一次指令生效"的语义。
|
|
7
8
|
let sanitized = null;
|
|
9
|
+
let sanitizedIndex = -1;
|
|
10
|
+
let lastUserIndex = -1;
|
|
8
11
|
for (let idx = messages.length - 1; idx >= 0; idx -= 1) {
|
|
9
12
|
const message = messages[idx];
|
|
10
13
|
if (!message || message.role !== 'user') {
|
|
11
14
|
continue;
|
|
12
15
|
}
|
|
16
|
+
if (lastUserIndex < 0) {
|
|
17
|
+
lastUserIndex = idx;
|
|
18
|
+
}
|
|
13
19
|
const content = extractMessageText(message);
|
|
14
20
|
if (!content) {
|
|
15
21
|
continue;
|
|
@@ -22,6 +28,7 @@ export function parseRoutingInstructions(messages) {
|
|
|
22
28
|
continue;
|
|
23
29
|
}
|
|
24
30
|
sanitized = candidate;
|
|
31
|
+
sanitizedIndex = idx;
|
|
25
32
|
break;
|
|
26
33
|
}
|
|
27
34
|
if (!sanitized) {
|
|
@@ -38,6 +45,15 @@ export function parseRoutingInstructions(messages) {
|
|
|
38
45
|
for (const segment of segments) {
|
|
39
46
|
const parsed = parseSingleInstruction(segment);
|
|
40
47
|
if (parsed) {
|
|
48
|
+
// stopMessage is a "command" and must only be set/cleared from the *latest* user message.
|
|
49
|
+
// Otherwise, clients that resend full history (including a past "<**stopMessage:...**>" message)
|
|
50
|
+
// would keep re-applying stopMessage after it has been consumed/cleared.
|
|
51
|
+
if ((parsed.type === 'stopMessageSet' || parsed.type === 'stopMessageClear') &&
|
|
52
|
+
lastUserIndex >= 0 &&
|
|
53
|
+
sanitizedIndex >= 0 &&
|
|
54
|
+
sanitizedIndex !== lastUserIndex) {
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
41
57
|
instructions.push(parsed);
|
|
42
58
|
}
|
|
43
59
|
}
|
|
@@ -176,7 +192,7 @@ function parseSingleInstruction(instruction) {
|
|
|
176
192
|
}
|
|
177
193
|
return {
|
|
178
194
|
type: 'stopMessageSet',
|
|
179
|
-
stopMessageText: text,
|
|
195
|
+
stopMessageText: resolveStopMessageText(text),
|
|
180
196
|
stopMessageMaxRepeats: maxRepeats
|
|
181
197
|
};
|
|
182
198
|
}
|