@jsonstudio/llms 0.6.1172 → 0.6.1397

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/conversion/codecs/gemini-openai-codec.d.ts +3 -1
  2. package/dist/conversion/codecs/gemini-openai-codec.js +10 -4
  3. package/dist/conversion/compat/actions/gemini-web-search.d.ts +1 -1
  4. package/dist/conversion/compat/actions/gemini-web-search.js +5 -2
  5. package/dist/conversion/compat/actions/iflow-tool-text-fallback.d.ts +12 -0
  6. package/dist/conversion/compat/actions/iflow-tool-text-fallback.js +199 -0
  7. package/dist/conversion/compat/actions/iflow-web-search.d.ts +1 -1
  8. package/dist/conversion/compat/actions/iflow-web-search.js +5 -2
  9. package/dist/conversion/compat/profiles/chat-gemini.json +5 -0
  10. package/dist/conversion/hub/operation-table/semantic-mappers/anthropic-mapper.js +47 -56
  11. package/dist/conversion/hub/operation-table/semantic-mappers/chat-mapper.js +1 -13
  12. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +748 -52
  13. package/dist/conversion/hub/operation-table/semantic-mappers/responses-mapper.js +18 -38
  14. package/dist/conversion/hub/pipeline/compat/compat-pipeline-executor.js +6 -0
  15. package/dist/conversion/hub/pipeline/compat/compat-types.d.ts +3 -0
  16. package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.d.ts +10 -0
  17. package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.js +142 -0
  18. package/dist/conversion/hub/pipeline/hub-pipeline/anthropic-alias-map.d.ts +6 -0
  19. package/dist/conversion/hub/pipeline/hub-pipeline/anthropic-alias-map.js +79 -0
  20. package/dist/conversion/hub/pipeline/hub-pipeline/apply-patch-tool-mode.d.ts +3 -0
  21. package/dist/conversion/hub/pipeline/hub-pipeline/apply-patch-tool-mode.js +46 -0
  22. package/dist/conversion/hub/pipeline/hub-pipeline/execute-chat-process-entry.d.ts +8 -0
  23. package/dist/conversion/hub/pipeline/hub-pipeline/execute-chat-process-entry.js +366 -0
  24. package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.d.ts +9 -0
  25. package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.js +390 -0
  26. package/dist/conversion/hub/pipeline/hub-pipeline/node-results.d.ts +3 -0
  27. package/dist/conversion/hub/pipeline/hub-pipeline/node-results.js +14 -0
  28. package/dist/conversion/hub/pipeline/hub-pipeline/payload-normalize.d.ts +2 -0
  29. package/dist/conversion/hub/pipeline/hub-pipeline/payload-normalize.js +144 -0
  30. package/dist/conversion/hub/pipeline/hub-pipeline/policy.d.ts +4 -0
  31. package/dist/conversion/hub/pipeline/hub-pipeline/policy.js +32 -0
  32. package/dist/conversion/hub/pipeline/hub-pipeline/protocol.d.ts +8 -0
  33. package/dist/conversion/hub/pipeline/hub-pipeline/protocol.js +63 -0
  34. package/dist/conversion/hub/pipeline/hub-pipeline/resolve-protocol-hooks.d.ts +2 -0
  35. package/dist/conversion/hub/pipeline/hub-pipeline/resolve-protocol-hooks.js +43 -0
  36. package/dist/conversion/hub/pipeline/hub-pipeline/semantic-gate.d.ts +1 -0
  37. package/dist/conversion/hub/pipeline/hub-pipeline/semantic-gate.js +29 -0
  38. package/dist/conversion/hub/pipeline/hub-pipeline/servertool-runtime-config.d.ts +2 -0
  39. package/dist/conversion/hub/pipeline/hub-pipeline/servertool-runtime-config.js +16 -0
  40. package/dist/conversion/hub/pipeline/hub-pipeline/types.d.ts +116 -0
  41. package/dist/conversion/hub/pipeline/hub-pipeline/types.js +1 -0
  42. package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +3 -95
  43. package/dist/conversion/hub/pipeline/hub-pipeline.js +19 -1281
  44. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage1_format_parse/index.js +1 -1
  45. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.d.ts +7 -0
  46. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage2_semantic_map/index.js +65 -1
  47. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +25 -22
  48. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage1_semantic_map/index.js +1 -1
  49. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_format_build/index.d.ts +1 -1
  50. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_format_build/index.js +2 -2
  51. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.d.ts +10 -0
  52. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.js +172 -0
  53. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage3_compat/index.js +2 -2
  54. package/dist/conversion/hub/pipeline/stages/req_process/req_process_stage1_tool_governance/index.js +1 -1
  55. package/dist/conversion/hub/pipeline/stages/req_process/req_process_stage2_route_select/index.js +1 -1
  56. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +11 -11
  57. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage2_format_parse/index.js +1 -1
  58. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_semantic_map/index.d.ts +1 -0
  59. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_semantic_map/index.js +4 -2
  60. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.d.ts +10 -0
  61. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.js +71 -0
  62. package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/index.d.ts +1 -0
  63. package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage1_client_remap/index.js +17 -9
  64. package/dist/conversion/hub/pipeline/stages/resp_outbound/resp_outbound_stage2_sse_stream/index.js +2 -2
  65. package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage1_tool_governance/index.js +40 -2
  66. package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage2_finalize/index.js +1 -1
  67. package/dist/conversion/hub/pipeline/target-utils.js +9 -5
  68. package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.d.ts +14 -0
  69. package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.js +289 -0
  70. package/dist/conversion/hub/process/chat-process.js +256 -16
  71. package/dist/conversion/hub/response/provider-response.d.ts +8 -0
  72. package/dist/conversion/hub/response/provider-response.js +91 -27
  73. package/dist/conversion/hub/response/response-mappers.d.ts +10 -3
  74. package/dist/conversion/hub/response/response-mappers.js +30 -6
  75. package/dist/conversion/hub/response/response-runtime.js +4 -38
  76. package/dist/conversion/hub/snapshot-recorder.js +5 -1
  77. package/dist/conversion/hub/standardized-bridge.js +23 -15
  78. package/dist/conversion/pipeline/codecs/v2/anthropic-openai-pipeline.js +36 -5
  79. package/dist/conversion/responses/responses-openai-bridge.js +20 -4
  80. package/dist/conversion/shared/gemini-tool-utils.d.ts +8 -1
  81. package/dist/conversion/shared/gemini-tool-utils.js +580 -108
  82. package/dist/conversion/shared/jsonish.js +1 -1
  83. package/dist/conversion/shared/mcp-injection.js +67 -33
  84. package/dist/conversion/shared/openai-finalizer.js +2 -1
  85. package/dist/conversion/shared/openai-message-normalize.js +76 -21
  86. package/dist/conversion/shared/responses-output-builder.js +6 -0
  87. package/dist/conversion/shared/runtime-metadata.d.ts +7 -0
  88. package/dist/conversion/shared/runtime-metadata.js +23 -0
  89. package/dist/conversion/shared/text-markup-normalizer.d.ts +2 -0
  90. package/dist/conversion/shared/text-markup-normalizer.js +284 -4
  91. package/dist/conversion/shared/tool-canonicalizer.js +2 -1
  92. package/dist/conversion/shared/tool-governor.js +3 -3
  93. package/dist/filters/engine.js +5 -5
  94. package/dist/filters/special/request-tool-list-filter.js +194 -60
  95. package/dist/filters/special/request-tools-normalize.js +1 -1
  96. package/dist/filters/special/response-tool-text-canonicalize.d.ts +4 -7
  97. package/dist/filters/special/response-tool-text-canonicalize.js +7 -35
  98. package/dist/filters/special/tool-filter-hooks.js +58 -62
  99. package/dist/guidance/index.js +5 -1
  100. package/dist/http/sse-response.js +6 -6
  101. package/dist/router/virtual-router/bootstrap.js +54 -4
  102. package/dist/router/virtual-router/engine-health.d.ts +1 -1
  103. package/dist/router/virtual-router/engine-health.js +11 -110
  104. package/dist/router/virtual-router/engine-selection/alias-selection.d.ts +30 -0
  105. package/dist/router/virtual-router/engine-selection/alias-selection.js +237 -0
  106. package/dist/router/virtual-router/engine-selection/context-weight-multipliers.d.ts +11 -0
  107. package/dist/router/virtual-router/engine-selection/context-weight-multipliers.js +23 -0
  108. package/dist/router/virtual-router/engine-selection/direct-provider-model.d.ts +9 -0
  109. package/dist/router/virtual-router/engine-selection/direct-provider-model.js +49 -0
  110. package/dist/router/virtual-router/engine-selection/instruction-target.d.ts +6 -0
  111. package/dist/router/virtual-router/engine-selection/instruction-target.js +54 -0
  112. package/dist/router/virtual-router/engine-selection/key-parsing.d.ts +8 -0
  113. package/dist/router/virtual-router/engine-selection/key-parsing.js +64 -0
  114. package/dist/router/virtual-router/engine-selection/route-utils.d.ts +12 -0
  115. package/dist/router/virtual-router/engine-selection/route-utils.js +150 -0
  116. package/dist/router/virtual-router/engine-selection/routing-state-filter.d.ts +4 -0
  117. package/dist/router/virtual-router/engine-selection/routing-state-filter.js +50 -0
  118. package/dist/router/virtual-router/engine-selection/selection-deps.d.ts +39 -0
  119. package/dist/router/virtual-router/engine-selection/selection-deps.js +1 -0
  120. package/dist/router/virtual-router/engine-selection/sticky-pool.d.ts +11 -0
  121. package/dist/router/virtual-router/engine-selection/sticky-pool.js +109 -0
  122. package/dist/router/virtual-router/engine-selection/tier-priority.d.ts +12 -0
  123. package/dist/router/virtual-router/engine-selection/tier-priority.js +55 -0
  124. package/dist/router/virtual-router/engine-selection/tier-selection-select.d.ts +22 -0
  125. package/dist/router/virtual-router/engine-selection/tier-selection-select.js +423 -0
  126. package/dist/router/virtual-router/engine-selection/tier-selection.d.ts +3 -0
  127. package/dist/router/virtual-router/engine-selection/tier-selection.js +228 -0
  128. package/dist/router/virtual-router/engine-selection.d.ts +4 -30
  129. package/dist/router/virtual-router/engine-selection.js +10 -962
  130. package/dist/router/virtual-router/engine.d.ts +1 -0
  131. package/dist/router/virtual-router/engine.js +64 -11
  132. package/dist/router/virtual-router/routing-instructions.js +6 -1
  133. package/dist/router/virtual-router/stop-message-state-sync.d.ts +5 -0
  134. package/dist/router/virtual-router/stop-message-state-sync.js +6 -14
  135. package/dist/router/virtual-router/types.d.ts +38 -1
  136. package/dist/servertool/clock/config.d.ts +8 -0
  137. package/dist/servertool/clock/config.js +22 -0
  138. package/dist/servertool/clock/log.d.ts +3 -0
  139. package/dist/servertool/clock/log.js +13 -0
  140. package/dist/servertool/clock/task-store.d.ts +1 -1
  141. package/dist/servertool/clock/task-store.js +1 -1
  142. package/dist/servertool/clock/tasks.js +1 -1
  143. package/dist/servertool/engine.js +146 -21
  144. package/dist/servertool/handlers/clock-auto.js +11 -6
  145. package/dist/servertool/handlers/clock.js +36 -10
  146. package/dist/servertool/handlers/followup-request-builder.js +8 -2
  147. package/dist/servertool/handlers/gemini-empty-reply-continue.js +15 -9
  148. package/dist/servertool/handlers/iflow-model-error-retry.js +6 -4
  149. package/dist/servertool/handlers/recursive-detection-guard.js +4 -2
  150. package/dist/servertool/handlers/stop-message-auto.js +100 -10
  151. package/dist/servertool/handlers/vision.js +4 -1
  152. package/dist/servertool/handlers/web-search.js +3 -1
  153. package/dist/servertool/pending-session.d.ts +19 -0
  154. package/dist/servertool/pending-session.js +97 -0
  155. package/dist/servertool/reenter-backend.js +5 -3
  156. package/dist/servertool/server-side-tools.js +235 -6
  157. package/dist/servertool/types.d.ts +13 -0
  158. package/dist/sse/json-to-sse/event-generators/responses.js +1 -1
  159. package/dist/sse/shared/chat-serializer.js +2 -2
  160. package/dist/sse/shared/constants.js +1 -1
  161. package/dist/sse/sse-to-json/anthropic-sse-to-json-converter.d.ts +7 -1
  162. package/dist/sse/sse-to-json/builders/response-builder.js +16 -0
  163. package/dist/sse/sse-to-json/responses-sse-to-json-converter.d.ts +1 -1
  164. package/dist/tools/apply-patch/execution-capturer.js +1 -1
  165. package/dist/tools/exec-command/normalize.js +4 -0
  166. package/dist/tools/exec-command/regression-capturer.js +1 -1
  167. package/package.json +10 -5
@@ -0,0 +1,423 @@
1
+ import { computeContextMultiplier } from '../context-weighted.js';
2
+ import { computeHealthWeight } from '../health-weighted.js';
3
+ import { pinCandidatesByAliasQueue, pinCandidatesByBestQuota, resolveAliasSelectionStrategy } from './alias-selection.js';
4
+ import { computeContextWeightMultipliers } from './context-weight-multipliers.js';
5
+ import { extractKeyAlias, extractProviderId, getProviderModelId } from './key-parsing.js';
6
+ import { pickPriorityGroup } from './tier-priority.js';
7
+ const ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS = 30_000;
8
+ function applyAliasStickyQueuePinning(opts) {
9
+ const { candidates, orderedTargets, deps, excludedKeys, now } = opts;
10
+ if (!Array.isArray(candidates) || candidates.length < 2) {
11
+ return candidates;
12
+ }
13
+ const store = deps.aliasQueueStore;
14
+ if (!store) {
15
+ return candidates;
16
+ }
17
+ // Candidates may include multiple providers. Apply sticky-queue pinning per (providerId, modelId) group
18
+ // while preserving cross-provider competition.
19
+ const groups = new Map();
20
+ const keyToGroup = new Map();
21
+ for (const key of candidates) {
22
+ if (!key || typeof key !== 'string') {
23
+ continue;
24
+ }
25
+ const providerId = extractProviderId(key) ?? '';
26
+ if (!providerId) {
27
+ continue;
28
+ }
29
+ const strategy = resolveAliasSelectionStrategy(providerId, deps.loadBalancer.getPolicy().aliasSelection);
30
+ if (strategy !== 'sticky-queue' && strategy !== 'best-quota') {
31
+ continue;
32
+ }
33
+ const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
34
+ if (!modelId) {
35
+ continue;
36
+ }
37
+ const groupId = `${providerId}::${modelId}`;
38
+ const entry = groups.get(groupId) ?? { providerId, modelId, keys: [] };
39
+ entry.keys.push(key);
40
+ groups.set(groupId, entry);
41
+ keyToGroup.set(key, groupId);
42
+ }
43
+ if (groups.size === 0) {
44
+ return candidates;
45
+ }
46
+ const pinnedByGroup = new Map();
47
+ for (const [groupId, group] of groups.entries()) {
48
+ if (group.keys.length < 2) {
49
+ continue;
50
+ }
51
+ // Only pin when we have multiple aliases for the same provider+model.
52
+ const aliases = new Set();
53
+ for (const key of group.keys) {
54
+ const alias = extractKeyAlias(key);
55
+ if (alias) {
56
+ aliases.add(alias);
57
+ }
58
+ }
59
+ if (aliases.size < 2) {
60
+ continue;
61
+ }
62
+ const strategy = resolveAliasSelectionStrategy(group.providerId, deps.loadBalancer.getPolicy().aliasSelection);
63
+ let pinned = null;
64
+ if (strategy === 'best-quota') {
65
+ pinned = pinCandidatesByBestQuota({
66
+ providerId: group.providerId,
67
+ modelId: group.modelId,
68
+ candidates: group.keys,
69
+ orderedTargets,
70
+ aliasOfKey: extractKeyAlias,
71
+ modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
72
+ quotaView: deps.quotaView,
73
+ now
74
+ });
75
+ }
76
+ if (!pinned) {
77
+ pinned = pinCandidatesByAliasQueue({
78
+ queueStore: store,
79
+ providerId: group.providerId,
80
+ modelId: group.modelId,
81
+ candidates: group.keys,
82
+ orderedTargets,
83
+ excludedProviderKeys: excludedKeys,
84
+ aliasOfKey: extractKeyAlias,
85
+ modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
86
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
87
+ });
88
+ }
89
+ if (pinned && pinned.length) {
90
+ pinnedByGroup.set(groupId, new Set(pinned));
91
+ }
92
+ }
93
+ if (pinnedByGroup.size === 0) {
94
+ return candidates;
95
+ }
96
+ return candidates.filter((key) => {
97
+ const groupId = keyToGroup.get(key);
98
+ if (!groupId) {
99
+ return true;
100
+ }
101
+ const pinned = pinnedByGroup.get(groupId);
102
+ return pinned ? pinned.has(key) : true;
103
+ });
104
+ }
105
+ function preferAntigravityAliasesOnRetry(opts) {
106
+ const { candidates, excludedKeys, deps } = opts;
107
+ if (!Array.isArray(candidates) || candidates.length < 2) {
108
+ return candidates;
109
+ }
110
+ if (!excludedKeys || excludedKeys.size === 0) {
111
+ return candidates;
112
+ }
113
+ // Only apply this stronger retry preference for Antigravity.
114
+ const strategy = resolveAliasSelectionStrategy('antigravity', deps.loadBalancer.getPolicy().aliasSelection);
115
+ if (strategy !== 'sticky-queue') {
116
+ return candidates;
117
+ }
118
+ const excludedModels = new Set();
119
+ for (const ex of excludedKeys) {
120
+ if (!ex || typeof ex !== 'string')
121
+ continue;
122
+ if ((extractProviderId(ex) ?? '') !== 'antigravity')
123
+ continue;
124
+ try {
125
+ const modelId = getProviderModelId(ex, deps.providerRegistry) ?? '';
126
+ if (modelId) {
127
+ excludedModels.add(modelId);
128
+ }
129
+ }
130
+ catch {
131
+ // ignore unknown model ids
132
+ }
133
+ }
134
+ if (excludedModels.size === 0) {
135
+ return candidates;
136
+ }
137
+ const preferred = candidates.filter((key) => {
138
+ if (!key || typeof key !== 'string')
139
+ return false;
140
+ if ((extractProviderId(key) ?? '') !== 'antigravity')
141
+ return false;
142
+ try {
143
+ const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
144
+ return modelId && excludedModels.has(modelId);
145
+ }
146
+ catch {
147
+ return false;
148
+ }
149
+ });
150
+ // If we still have any Antigravity candidates for the failing model, keep retrying within Antigravity
151
+ // (rotate aliases) before falling back to other pool targets.
152
+ return preferred.length > 0 ? preferred : candidates;
153
+ }
154
+ export function selectProviderKeyFromCandidatePool(opts) {
155
+ const { routeName, tier, stickyKey, candidates, isSafePool, deps, options, contextResult, warnRatio, excludedKeys, isRecoveryAttempt, now, nowForWeights, healthWeightedCfg, contextWeightedCfg } = opts;
156
+ const quotaView = deps.quotaView;
157
+ const selectFirstAvailable = (keys) => {
158
+ for (const key of keys) {
159
+ if (deps.healthManager.isAvailable(key)) {
160
+ return key;
161
+ }
162
+ }
163
+ return null;
164
+ };
165
+ if (!quotaView) {
166
+ // Single-provider pool should never be "emptied" by health/cooldown.
167
+ // If there's only one possible target, we must return it even if it's currently unhealthy,
168
+ // otherwise context routing can incorrectly fall back to a smaller-context route.
169
+ if (candidates.length === 1) {
170
+ return candidates[0] ?? null;
171
+ }
172
+ const retryPreferredCandidates = isRecoveryAttempt
173
+ ? preferAntigravityAliasesOnRetry({ candidates, excludedKeys, deps })
174
+ : candidates;
175
+ // Alias-level selection strategy (config-driven).
176
+ // Apply sticky-queue pinning per provider/model group (candidates can be mixed providers).
177
+ const pinnedCandidates = applyAliasStickyQueuePinning({
178
+ candidates: retryPreferredCandidates,
179
+ orderedTargets: tier.targets,
180
+ deps,
181
+ excludedKeys,
182
+ now
183
+ });
184
+ if (tier.mode === 'priority') {
185
+ if (isRecoveryAttempt) {
186
+ return selectFirstAvailable(pinnedCandidates);
187
+ }
188
+ const group = pickPriorityGroup({
189
+ candidates: pinnedCandidates,
190
+ orderedTargets: tier.targets,
191
+ providerRegistry: deps.providerRegistry,
192
+ healthManager: deps.healthManager
193
+ });
194
+ if (!group) {
195
+ return null;
196
+ }
197
+ const weights = (() => {
198
+ if (!isSafePool)
199
+ return undefined;
200
+ const ctx = computeContextWeightMultipliers({ candidates: group.groupCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
201
+ if (!ctx)
202
+ return undefined;
203
+ const out = {};
204
+ for (const key of group.groupCandidates) {
205
+ const m = computeContextMultiplier({
206
+ effectiveSafeRefTokens: ctx.ref,
207
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
208
+ cfg: contextWeightedCfg
209
+ });
210
+ out[key] = Math.max(1, Math.round(100 * m));
211
+ }
212
+ return out;
213
+ })();
214
+ return deps.loadBalancer.select({
215
+ routeName: `${routeName}:${tier.id}:priority:group:${group.groupId}`,
216
+ candidates: group.groupCandidates,
217
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
218
+ weights,
219
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
220
+ }, 'round-robin');
221
+ }
222
+ const weights = (() => {
223
+ if (!isSafePool || !contextWeightedCfg.enabled)
224
+ return undefined;
225
+ const ctx = computeContextWeightMultipliers({ candidates: pinnedCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
226
+ if (!ctx)
227
+ return undefined;
228
+ const out = {};
229
+ for (const key of pinnedCandidates) {
230
+ const m = computeContextMultiplier({
231
+ effectiveSafeRefTokens: ctx.ref,
232
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
233
+ cfg: contextWeightedCfg
234
+ });
235
+ out[key] = Math.max(1, Math.round(100 * m));
236
+ }
237
+ return out;
238
+ })();
239
+ return deps.loadBalancer.select({
240
+ routeName: `${routeName}:${tier.id}`,
241
+ candidates: pinnedCandidates,
242
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
243
+ weights,
244
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
245
+ }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
246
+ }
247
+ const buckets = new Map();
248
+ let order = 0;
249
+ for (const key of candidates) {
250
+ const entry = quotaView(key);
251
+ if (!entry) {
252
+ const list = buckets.get(100) ?? [];
253
+ list.push({ key, penalty: 0, order: order++ });
254
+ buckets.set(100, list);
255
+ continue;
256
+ }
257
+ if (!entry.inPool) {
258
+ continue;
259
+ }
260
+ if (entry.cooldownUntil && entry.cooldownUntil > now) {
261
+ const providerId = extractProviderId(key) ?? '';
262
+ const cooldownMs = entry.cooldownUntil - now;
263
+ if (providerId !== 'antigravity' || cooldownMs >= ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS) {
264
+ continue;
265
+ }
266
+ }
267
+ if (entry.blacklistUntil && entry.blacklistUntil > now) {
268
+ continue;
269
+ }
270
+ const tierPriority = typeof entry.priorityTier === 'number' && Number.isFinite(entry.priorityTier) ? entry.priorityTier : 100;
271
+ const penaltyRaw = entry.selectionPenalty;
272
+ const penalty = typeof penaltyRaw === 'number' && Number.isFinite(penaltyRaw) && penaltyRaw > 0 ? Math.floor(penaltyRaw) : 0;
273
+ const list = buckets.get(tierPriority) ?? [];
274
+ list.push({ key, penalty, order: order++ });
275
+ buckets.set(tierPriority, list);
276
+ }
277
+ const sortedPriorities = Array.from(buckets.keys()).sort((a, b) => a - b);
278
+ for (const priority of sortedPriorities) {
279
+ const bucket = buckets.get(priority) ?? [];
280
+ if (!bucket.length) {
281
+ continue;
282
+ }
283
+ bucket.sort((a, b) => (a.penalty - b.penalty) || (a.order - b.order));
284
+ let bucketCandidates = bucket.map((item) => item.key);
285
+ // Single-provider pool should never be "emptied" by health/cooldown.
286
+ if (bucketCandidates.length === 1) {
287
+ return bucketCandidates[0] ?? null;
288
+ }
289
+ if (isRecoveryAttempt) {
290
+ bucketCandidates = preferAntigravityAliasesOnRetry({ candidates: bucketCandidates, excludedKeys, deps });
291
+ }
292
+ // Alias-level selection strategy (config-driven).
293
+ // For configured providers, we stick to one alias globally and rotate only on error/unavailability.
294
+ bucketCandidates = applyAliasStickyQueuePinning({
295
+ candidates: bucketCandidates,
296
+ orderedTargets: tier.targets,
297
+ deps,
298
+ excludedKeys,
299
+ now
300
+ });
301
+ const bucketPenaltyMap = {};
302
+ for (const item of bucket) {
303
+ bucketPenaltyMap[item.key] = item.penalty;
304
+ }
305
+ const bucketWeights = {};
306
+ const bucketMultipliers = {};
307
+ for (const item of bucket) {
308
+ if (healthWeightedCfg.enabled) {
309
+ const entry = quotaView(item.key);
310
+ const { weight, multiplier } = computeHealthWeight(entry, nowForWeights, healthWeightedCfg);
311
+ bucketWeights[item.key] = weight;
312
+ bucketMultipliers[item.key] = multiplier;
313
+ }
314
+ else {
315
+ // Legacy: penalty => lower weight, but never zero (unhealthy should still get a chance).
316
+ bucketWeights[item.key] = Math.max(1, Math.floor(100 / (1 + Math.max(0, item.penalty))));
317
+ bucketMultipliers[item.key] = 1;
318
+ }
319
+ }
320
+ if (isSafePool && contextWeightedCfg.enabled) {
321
+ const ctx = computeContextWeightMultipliers({ candidates: bucketCandidates, usage: contextResult.usage, warnRatio, cfg: contextWeightedCfg });
322
+ if (ctx) {
323
+ for (const key of bucketCandidates) {
324
+ const m = computeContextMultiplier({
325
+ effectiveSafeRefTokens: ctx.ref,
326
+ effectiveSafeTokens: ctx.eff[key] ?? 1,
327
+ cfg: contextWeightedCfg
328
+ });
329
+ bucketWeights[key] = Math.max(1, Math.round((bucketWeights[key] ?? 1) * m));
330
+ }
331
+ }
332
+ }
333
+ if (tier.mode === 'priority') {
334
+ if (!isRecoveryAttempt) {
335
+ const group = pickPriorityGroup({
336
+ candidates: bucketCandidates,
337
+ orderedTargets: tier.targets,
338
+ providerRegistry: deps.providerRegistry,
339
+ healthManager: deps.healthManager,
340
+ penalties: bucketPenaltyMap
341
+ });
342
+ if (!group) {
343
+ continue;
344
+ }
345
+ const groupWeights = {};
346
+ for (const key of group.groupCandidates) {
347
+ groupWeights[key] = bucketWeights[key] ?? 1;
348
+ }
349
+ const selected = deps.loadBalancer.select({
350
+ routeName: `${routeName}:${tier.id}:priority:${priority}:group:${group.groupId}`,
351
+ candidates: group.groupCandidates,
352
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
353
+ weights: groupWeights,
354
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
355
+ }, 'round-robin');
356
+ if (selected) {
357
+ return selected;
358
+ }
359
+ continue;
360
+ }
361
+ if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
362
+ let best = null;
363
+ let bestM = Number.NEGATIVE_INFINITY;
364
+ for (const key of bucketCandidates) {
365
+ if (!deps.healthManager.isAvailable(key))
366
+ continue;
367
+ const m = bucketMultipliers[key] ?? 1;
368
+ if (m > bestM) {
369
+ bestM = m;
370
+ best = key;
371
+ }
372
+ }
373
+ if (best) {
374
+ return best;
375
+ }
376
+ continue;
377
+ }
378
+ else if (isRecoveryAttempt) {
379
+ const recovered = selectFirstAvailable(bucketCandidates);
380
+ if (recovered)
381
+ return recovered;
382
+ continue;
383
+ }
384
+ // (unreachable) recovery handled above
385
+ }
386
+ else {
387
+ if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
388
+ let best = null;
389
+ let bestM = Number.NEGATIVE_INFINITY;
390
+ for (const key of bucketCandidates) {
391
+ if (!deps.healthManager.isAvailable(key))
392
+ continue;
393
+ const m = bucketMultipliers[key] ?? 1;
394
+ if (m > bestM) {
395
+ bestM = m;
396
+ best = key;
397
+ }
398
+ }
399
+ if (best) {
400
+ return best;
401
+ }
402
+ continue;
403
+ }
404
+ else if (isRecoveryAttempt) {
405
+ const recovered = selectFirstAvailable(bucketCandidates);
406
+ if (recovered)
407
+ return recovered;
408
+ continue;
409
+ }
410
+ const selected = deps.loadBalancer.select({
411
+ routeName: `${routeName}:${tier.id}`,
412
+ candidates: bucketCandidates,
413
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
414
+ weights: bucketWeights,
415
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
416
+ }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
417
+ if (selected) {
418
+ return selected;
419
+ }
420
+ }
421
+ }
422
+ return null;
423
+ }
@@ -0,0 +1,3 @@
1
+ import type { RoutePoolTier, RoutingFeatures } from '../types.js';
2
+ import type { SelectionDeps, TrySelectFromTierOptions, SelectionResult } from './selection-deps.js';
3
+ export declare function trySelectFromTier(routeName: string, tier: RoutePoolTier, stickyKey: string | undefined, estimatedTokens: number, features: RoutingFeatures, deps: SelectionDeps, options: TrySelectFromTierOptions): SelectionResult;
@@ -0,0 +1,228 @@
1
+ import { DEFAULT_ROUTE } from '../types.js';
2
+ import { resolveContextWeightedConfig } from '../context-weighted.js';
3
+ import { resolveHealthWeightedConfig } from '../health-weighted.js';
4
+ import { pinCandidatesByAliasQueue, resolveAliasSelectionStrategy } from './alias-selection.js';
5
+ import { extractKeyAlias, extractKeyIndex, extractProviderId, getProviderModelId } from './key-parsing.js';
6
+ import { selectProviderKeyFromCandidatePool } from './tier-selection-select.js';
7
+ export function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features, deps, options) {
8
+ const { disabledProviders, disabledKeysMap, allowedProviders, disabledModels, requiredProviderKeys } = options;
9
+ let targets = Array.isArray(tier.targets) ? tier.targets : [];
10
+ const excludedRaw = Array.isArray(features.metadata?.excludedProviderKeys)
11
+ ? features.metadata.excludedProviderKeys
12
+ .filter((val) => typeof val === 'string')
13
+ .map((val) => val.trim())
14
+ .filter((val) => val.length > 0)
15
+ : [];
16
+ const excludedKeys = new Set(excludedRaw);
17
+ if (excludedKeys.size > 0) {
18
+ recordAliasQueueFailuresFromExcludedKeys(excludedKeys, tier.targets, deps);
19
+ }
20
+ if (excludedKeys.size > 0) {
21
+ targets = targets.filter((key) => !excludedKeys.has(key));
22
+ }
23
+ const isRecoveryAttempt = excludedKeys.size > 0;
24
+ const singleCandidateFallback = targets.length === 1 ? targets[0] : undefined;
25
+ if (targets.length > 0) {
26
+ // Always respect cooldown signals. If a route/tier is depleted due to cooldown,
27
+ // routing is expected to fall back to other tiers/routes (e.g. longcontext → default),
28
+ // rather than repeatedly selecting the cooled-down provider.
29
+ targets = targets.filter((key) => !deps.isProviderCoolingDown(key));
30
+ }
31
+ if (allowedProviders && allowedProviders.size > 0) {
32
+ targets = targets.filter((key) => {
33
+ const providerId = extractProviderId(key);
34
+ return providerId && allowedProviders.has(providerId);
35
+ });
36
+ }
37
+ if (disabledProviders && disabledProviders.size > 0) {
38
+ targets = targets.filter((key) => {
39
+ const providerId = extractProviderId(key);
40
+ return providerId && !disabledProviders.has(providerId);
41
+ });
42
+ }
43
+ if (disabledKeysMap && disabledKeysMap.size > 0) {
44
+ targets = targets.filter((key) => {
45
+ const providerId = extractProviderId(key);
46
+ if (!providerId)
47
+ return true;
48
+ const disabledKeys = disabledKeysMap.get(providerId);
49
+ if (!disabledKeys || disabledKeys.size === 0)
50
+ return true;
51
+ const keyAlias = extractKeyAlias(key);
52
+ const keyIndex = extractKeyIndex(key);
53
+ if (keyAlias && disabledKeys.has(keyAlias)) {
54
+ return false;
55
+ }
56
+ if (keyIndex !== undefined && disabledKeys.has(keyIndex + 1)) {
57
+ return false;
58
+ }
59
+ return true;
60
+ });
61
+ }
62
+ if (disabledModels && disabledModels.size > 0) {
63
+ targets = targets.filter((key) => {
64
+ const providerId = extractProviderId(key);
65
+ if (!providerId) {
66
+ return true;
67
+ }
68
+ const disabled = disabledModels.get(providerId);
69
+ if (!disabled || disabled.size === 0) {
70
+ return true;
71
+ }
72
+ const modelId = getProviderModelId(key, deps.providerRegistry);
73
+ if (!modelId) {
74
+ return true;
75
+ }
76
+ return !disabled.has(modelId);
77
+ });
78
+ }
79
+ if (requiredProviderKeys && requiredProviderKeys.size > 0) {
80
+ targets = targets.filter((key) => requiredProviderKeys.has(key));
81
+ }
82
+ const serverToolRequired = features.metadata?.serverToolRequired === true;
83
+ if (serverToolRequired) {
84
+ const filtered = [];
85
+ for (const key of targets) {
86
+ try {
87
+ const profile = deps.providerRegistry.get(key);
88
+ if (!profile.serverToolsDisabled) {
89
+ filtered.push(key);
90
+ }
91
+ }
92
+ catch {
93
+ // ignore unknown providers when filtering for servertools
94
+ }
95
+ }
96
+ targets = filtered;
97
+ }
98
+ if (features.hasImageAttachment && (routeName === DEFAULT_ROUTE || routeName === 'thinking')) {
99
+ const prioritized = [];
100
+ const fallthrough = [];
101
+ for (const key of targets) {
102
+ try {
103
+ const profile = deps.providerRegistry.get(key);
104
+ if (profile.providerType === 'responses') {
105
+ prioritized.push(key);
106
+ }
107
+ else if (profile.providerType === 'gemini') {
108
+ prioritized.push(key);
109
+ }
110
+ else {
111
+ fallthrough.push(key);
112
+ }
113
+ }
114
+ catch {
115
+ fallthrough.push(key);
116
+ }
117
+ }
118
+ if (prioritized.length) {
119
+ targets = prioritized;
120
+ }
121
+ }
122
+ if (!targets.length) {
123
+ return { providerKey: null, poolTargets: [], tierId: tier.id, failureHint: `${routeName}:${tier.id}:empty` };
124
+ }
125
+ const contextResult = deps.contextAdvisor.classify(targets, estimatedTokens, (key) => deps.providerRegistry.get(key));
126
+ const prioritizedPools = buildContextCandidatePools(contextResult, routeName);
127
+ const quotaView = deps.quotaView;
128
+ const now = quotaView ? Date.now() : 0;
129
+ const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
130
+ const contextWeightedCfg = resolveContextWeightedConfig(deps.loadBalancer.getPolicy().contextWeighted);
131
+ const warnRatio = deps.contextAdvisor.getConfig().warnRatio;
132
+ const nowForWeights = Date.now();
133
+ for (const candidatePool of prioritizedPools) {
134
+ const providerKey = selectProviderKeyFromCandidatePool({
135
+ routeName,
136
+ tier,
137
+ stickyKey,
138
+ candidates: candidatePool,
139
+ isSafePool: candidatePool === contextResult.safe,
140
+ deps,
141
+ options,
142
+ contextResult,
143
+ warnRatio,
144
+ excludedKeys,
145
+ isRecoveryAttempt,
146
+ now,
147
+ nowForWeights,
148
+ healthWeightedCfg,
149
+ contextWeightedCfg
150
+ });
151
+ if (providerKey) {
152
+ return { providerKey, poolTargets: tier.targets, tierId: tier.id };
153
+ }
154
+ }
155
+ return {
156
+ providerKey: null,
157
+ poolTargets: tier.targets,
158
+ tierId: tier.id,
159
+ failureHint: describeAttempt(routeName, tier.id, contextResult)
160
+ };
161
+ }
162
+ function recordAliasQueueFailuresFromExcludedKeys(excludedKeys, orderedTargets, deps) {
163
+ const store = deps.aliasQueueStore;
164
+ if (!store || !excludedKeys || excludedKeys.size === 0) {
165
+ return;
166
+ }
167
+ if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
168
+ return;
169
+ }
170
+ for (const ex of excludedKeys) {
171
+ if (!ex || typeof ex !== 'string')
172
+ continue;
173
+ const providerId = extractProviderId(ex) ?? '';
174
+ if (!providerId)
175
+ continue;
176
+ const strategy = resolveAliasSelectionStrategy(providerId, deps.loadBalancer.getPolicy().aliasSelection);
177
+ if (strategy !== 'sticky-queue')
178
+ continue;
179
+ const modelId = getProviderModelId(ex, deps.providerRegistry) ?? '';
180
+ if (!modelId)
181
+ continue;
182
+ const groupCandidates = orderedTargets.filter((key) => (extractProviderId(key) ?? '') === providerId && (getProviderModelId(key, deps.providerRegistry) ?? '') === modelId);
183
+ if (groupCandidates.length < 2)
184
+ continue;
185
+ try {
186
+ pinCandidatesByAliasQueue({
187
+ queueStore: store,
188
+ providerId,
189
+ modelId,
190
+ candidates: groupCandidates,
191
+ orderedTargets,
192
+ excludedProviderKeys: new Set([ex]),
193
+ aliasOfKey: extractKeyAlias,
194
+ modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
195
+ availabilityCheck: () => true
196
+ });
197
+ }
198
+ catch {
199
+ // best-effort: alias queue rotation must not block selection
200
+ }
201
+ }
202
+ }
203
+ function buildContextCandidatePools(result, routeName) {
204
+ const ordered = [];
205
+ if (result.safe.length) {
206
+ ordered.push(result.safe);
207
+ }
208
+ if (result.risky.length) {
209
+ ordered.push(result.risky);
210
+ }
211
+ if (routeName === 'longcontext' && result.overflow.length) {
212
+ ordered.push(result.overflow);
213
+ }
214
+ return ordered;
215
+ }
216
+ function describeAttempt(routeName, poolId, result) {
217
+ const prefix = poolId ? `${routeName}:${poolId}` : routeName;
218
+ if (result.safe.length > 0) {
219
+ return `${prefix}:health`;
220
+ }
221
+ if (result.risky.length > 0) {
222
+ return `${prefix}:context_risky`;
223
+ }
224
+ if (result.overflow.length > 0) {
225
+ return `${prefix}:max_context_window`;
226
+ }
227
+ return prefix;
228
+ }