@jsonstudio/llms 0.6.1354 → 0.6.1399

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. package/dist/conversion/compat/profiles/chat-gemini.json +5 -0
  2. package/dist/conversion/hub/operation-table/semantic-mappers/gemini-mapper.js +310 -87
  3. package/dist/conversion/hub/pipeline/hub-pipeline/adapter-context.js +8 -0
  4. package/dist/conversion/hub/pipeline/hub-pipeline/execute-request-stage.js +6 -0
  5. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.d.ts +10 -0
  6. package/dist/conversion/hub/pipeline/stages/req_outbound/req_outbound_stage2_thought_signature_inject/index.js +172 -0
  7. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.d.ts +10 -0
  8. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage3_thought_signature_capture/index.js +71 -0
  9. package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.d.ts +14 -0
  10. package/dist/conversion/hub/pipeline/thought-signature/thought-signature-center.js +289 -0
  11. package/dist/conversion/hub/response/provider-response.js +6 -0
  12. package/dist/router/virtual-router/bootstrap.js +6 -0
  13. package/dist/router/virtual-router/engine-selection/alias-selection.d.ts +15 -0
  14. package/dist/router/virtual-router/engine-selection/alias-selection.js +85 -4
  15. package/dist/router/virtual-router/engine-selection/tier-selection-select.js +40 -17
  16. package/dist/router/virtual-router/engine-selection/tier-selection.js +5 -2
  17. package/dist/router/virtual-router/engine.js +9 -1
  18. package/dist/router/virtual-router/types.d.ts +14 -1
  19. package/dist/servertool/engine.js +6 -6
  20. package/package.json +1 -1
@@ -1,8 +1,9 @@
1
+ const COOLDOWN_EMPTY_THRESHOLD_MS = 30_000;
1
2
  // Default provider-level strategy table.
2
3
  // This is a data-only default; callers can override via `loadBalancing.aliasSelection.providers`.
3
4
  export const DEFAULT_PROVIDER_ALIAS_SELECTION = {
4
- // Antigravity: upstream gateway may reject rapid cross-key switching; stick to one alias until error.
5
- antigravity: 'sticky-queue'
5
+ // Antigravity: prefer the alias with highest remaining quota; fall back to sticky-queue when quota is unknown.
6
+ antigravity: 'best-quota'
6
7
  };
7
8
  export function resolveAliasSelectionStrategy(providerId, cfg) {
8
9
  if (!providerId)
@@ -11,11 +12,11 @@ export function resolveAliasSelectionStrategy(providerId, cfg) {
11
12
  return 'none';
12
13
  const overrides = cfg?.providers ?? {};
13
14
  const override = overrides[providerId];
14
- if (override === 'none' || override === 'sticky-queue') {
15
+ if (override === 'none' || override === 'sticky-queue' || override === 'best-quota') {
15
16
  return override;
16
17
  }
17
18
  const def = cfg?.defaultStrategy;
18
- if (def === 'none' || def === 'sticky-queue') {
19
+ if (def === 'none' || def === 'sticky-queue' || def === 'best-quota') {
19
20
  return def;
20
21
  }
21
22
  const table = DEFAULT_PROVIDER_ALIAS_SELECTION[providerId];
@@ -100,6 +101,86 @@ export function pinCandidatesByAliasQueue(opts) {
100
101
  const selectedSet = new Set(selectedKeys);
101
102
  return candidates.filter((key) => selectedSet.has(key));
102
103
  }
104
+ export function pinCandidatesByBestQuota(opts) {
105
+ const { providerId, modelId, candidates, orderedTargets, aliasOfKey, modelIdOfKey, quotaView, now } = opts;
106
+ if (!quotaView)
107
+ return null;
108
+ if (!providerId || !modelId)
109
+ return null;
110
+ if (!Array.isArray(candidates) || candidates.length < 2)
111
+ return null;
112
+ const aliasBuckets = new Map();
113
+ const aliasOrder = new Map();
114
+ let order = 0;
115
+ for (const key of candidates) {
116
+ if (!key || typeof key !== 'string')
117
+ continue;
118
+ if (!key.startsWith(`${providerId}.`))
119
+ return null;
120
+ const m = modelIdOfKey(key);
121
+ if (!m || m !== modelId)
122
+ return null;
123
+ const alias = aliasOfKey(key);
124
+ if (!alias)
125
+ return null;
126
+ const list = aliasBuckets.get(alias) ?? [];
127
+ list.push(key);
128
+ aliasBuckets.set(alias, list);
129
+ if (!aliasOrder.has(alias)) {
130
+ aliasOrder.set(alias, order++);
131
+ }
132
+ }
133
+ if (aliasBuckets.size <= 1)
134
+ return null;
135
+ const preferredOrder = resolveAliasOrderFromTargets({
136
+ orderedTargets,
137
+ providerId,
138
+ modelId,
139
+ aliasOfKey,
140
+ modelIdOfKey,
141
+ allowedAliases: new Set(aliasBuckets.keys())
142
+ });
143
+ for (const alias of preferredOrder) {
144
+ if (!aliasOrder.has(alias)) {
145
+ aliasOrder.set(alias, order++);
146
+ }
147
+ }
148
+ const eligible = [];
149
+ for (const [alias, keys] of aliasBuckets.entries()) {
150
+ const entry = quotaView(keys[0] ?? '');
151
+ if (!entry || entry.inPool === false) {
152
+ continue;
153
+ }
154
+ if (entry.blacklistUntil && entry.blacklistUntil > now) {
155
+ continue;
156
+ }
157
+ if (entry.cooldownUntil && entry.cooldownUntil - now >= COOLDOWN_EMPTY_THRESHOLD_MS) {
158
+ continue;
159
+ }
160
+ const remainingRaw = entry.remainingFraction;
161
+ const remaining = typeof remainingRaw === 'number' && Number.isFinite(remainingRaw) ? remainingRaw : 0;
162
+ if (remaining <= 0) {
163
+ continue;
164
+ }
165
+ eligible.push({
166
+ alias,
167
+ score: remaining,
168
+ order: aliasOrder.get(alias) ?? Number.MAX_SAFE_INTEGER
169
+ });
170
+ }
171
+ if (!eligible.length) {
172
+ return null;
173
+ }
174
+ eligible.sort((a, b) => (b.score - a.score) || (a.order - b.order));
175
+ const selectedAlias = eligible[0]?.alias;
176
+ if (!selectedAlias)
177
+ return null;
178
+ const selectedKeys = aliasBuckets.get(selectedAlias) ?? [];
179
+ if (!selectedKeys.length)
180
+ return null;
181
+ const selectedSet = new Set(selectedKeys);
182
+ return candidates.filter((key) => selectedSet.has(key));
183
+ }
103
184
  function resolveAliasOrderFromTargets(opts) {
104
185
  const { orderedTargets, providerId, modelId, aliasOfKey, modelIdOfKey, allowedAliases } = opts;
105
186
  if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
@@ -1,11 +1,12 @@
1
1
  import { computeContextMultiplier } from '../context-weighted.js';
2
2
  import { computeHealthWeight } from '../health-weighted.js';
3
- import { pinCandidatesByAliasQueue, resolveAliasSelectionStrategy } from './alias-selection.js';
3
+ import { pinCandidatesByAliasQueue, pinCandidatesByBestQuota, resolveAliasSelectionStrategy } from './alias-selection.js';
4
4
  import { computeContextWeightMultipliers } from './context-weight-multipliers.js';
5
5
  import { extractKeyAlias, extractProviderId, getProviderModelId } from './key-parsing.js';
6
6
  import { pickPriorityGroup } from './tier-priority.js';
7
+ const ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS = 30_000;
7
8
  function applyAliasStickyQueuePinning(opts) {
8
- const { candidates, orderedTargets, deps, excludedKeys } = opts;
9
+ const { candidates, orderedTargets, deps, excludedKeys, now } = opts;
9
10
  if (!Array.isArray(candidates) || candidates.length < 2) {
10
11
  return candidates;
11
12
  }
@@ -26,7 +27,7 @@ function applyAliasStickyQueuePinning(opts) {
26
27
  continue;
27
28
  }
28
29
  const strategy = resolveAliasSelectionStrategy(providerId, deps.loadBalancer.getPolicy().aliasSelection);
29
- if (strategy !== 'sticky-queue') {
30
+ if (strategy !== 'sticky-queue' && strategy !== 'best-quota') {
30
31
  continue;
31
32
  }
32
33
  const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
@@ -58,17 +59,33 @@ function applyAliasStickyQueuePinning(opts) {
58
59
  if (aliases.size < 2) {
59
60
  continue;
60
61
  }
61
- const pinned = pinCandidatesByAliasQueue({
62
- queueStore: store,
63
- providerId: group.providerId,
64
- modelId: group.modelId,
65
- candidates: group.keys,
66
- orderedTargets,
67
- excludedProviderKeys: excludedKeys,
68
- aliasOfKey: extractKeyAlias,
69
- modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
70
- availabilityCheck: (key) => deps.healthManager.isAvailable(key)
71
- });
62
+ const strategy = resolveAliasSelectionStrategy(group.providerId, deps.loadBalancer.getPolicy().aliasSelection);
63
+ let pinned = null;
64
+ if (strategy === 'best-quota') {
65
+ pinned = pinCandidatesByBestQuota({
66
+ providerId: group.providerId,
67
+ modelId: group.modelId,
68
+ candidates: group.keys,
69
+ orderedTargets,
70
+ aliasOfKey: extractKeyAlias,
71
+ modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
72
+ quotaView: deps.quotaView,
73
+ now
74
+ });
75
+ }
76
+ if (!pinned) {
77
+ pinned = pinCandidatesByAliasQueue({
78
+ queueStore: store,
79
+ providerId: group.providerId,
80
+ modelId: group.modelId,
81
+ candidates: group.keys,
82
+ orderedTargets,
83
+ excludedProviderKeys: excludedKeys,
84
+ aliasOfKey: extractKeyAlias,
85
+ modelIdOfKey: (key) => getProviderModelId(key, deps.providerRegistry),
86
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
87
+ });
88
+ }
72
89
  if (pinned && pinned.length) {
73
90
  pinnedByGroup.set(groupId, new Set(pinned));
74
91
  }
@@ -161,7 +178,8 @@ export function selectProviderKeyFromCandidatePool(opts) {
161
178
  candidates: retryPreferredCandidates,
162
179
  orderedTargets: tier.targets,
163
180
  deps,
164
- excludedKeys
181
+ excludedKeys,
182
+ now
165
183
  });
166
184
  if (tier.mode === 'priority') {
167
185
  if (isRecoveryAttempt) {
@@ -240,7 +258,11 @@ export function selectProviderKeyFromCandidatePool(opts) {
240
258
  continue;
241
259
  }
242
260
  if (entry.cooldownUntil && entry.cooldownUntil > now) {
243
- continue;
261
+ const providerId = extractProviderId(key) ?? '';
262
+ const cooldownMs = entry.cooldownUntil - now;
263
+ if (providerId !== 'antigravity' || cooldownMs >= ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS) {
264
+ continue;
265
+ }
244
266
  }
245
267
  if (entry.blacklistUntil && entry.blacklistUntil > now) {
246
268
  continue;
@@ -273,7 +295,8 @@ export function selectProviderKeyFromCandidatePool(opts) {
273
295
  candidates: bucketCandidates,
274
296
  orderedTargets: tier.targets,
275
297
  deps,
276
- excludedKeys
298
+ excludedKeys,
299
+ now
277
300
  });
278
301
  const bucketPenaltyMap = {};
279
302
  for (const item of bucket) {
@@ -123,7 +123,7 @@ export function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, f
123
123
  return { providerKey: null, poolTargets: [], tierId: tier.id, failureHint: `${routeName}:${tier.id}:empty` };
124
124
  }
125
125
  const contextResult = deps.contextAdvisor.classify(targets, estimatedTokens, (key) => deps.providerRegistry.get(key));
126
- const prioritizedPools = buildContextCandidatePools(contextResult);
126
+ const prioritizedPools = buildContextCandidatePools(contextResult, routeName);
127
127
  const quotaView = deps.quotaView;
128
128
  const now = quotaView ? Date.now() : 0;
129
129
  const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
@@ -200,7 +200,7 @@ function recordAliasQueueFailuresFromExcludedKeys(excludedKeys, orderedTargets,
200
200
  }
201
201
  }
202
202
  }
203
- function buildContextCandidatePools(result) {
203
+ function buildContextCandidatePools(result, routeName) {
204
204
  const ordered = [];
205
205
  if (result.safe.length) {
206
206
  ordered.push(result.safe);
@@ -208,6 +208,9 @@ function buildContextCandidatePools(result) {
208
208
  if (result.risky.length) {
209
209
  ordered.push(result.risky);
210
210
  }
211
+ if (routeName === 'longcontext' && result.overflow.length) {
212
+ ordered.push(result.overflow);
213
+ }
211
214
  return ordered;
212
215
  }
213
216
  function describeAttempt(routeName, poolId, result) {
@@ -12,6 +12,7 @@ import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
12
12
  import { selectDirectProviderModel, selectFromStickyPool, selectProviderImpl } from './engine-selection.js';
13
13
  import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
14
14
  import { mergeStopMessageFromPersisted } from './stop-message-state-sync.js';
15
+ const ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS = 30_000;
15
16
  export class VirtualRouterEngine {
16
17
  routing = {};
17
18
  providerRegistry = new ProviderRegistry();
@@ -1468,10 +1469,17 @@ export class VirtualRouterEngine {
1468
1469
  if (!expiry) {
1469
1470
  return false;
1470
1471
  }
1471
- if (Date.now() >= expiry) {
1472
+ const now = Date.now();
1473
+ if (now >= expiry) {
1472
1474
  this.providerCooldowns.delete(providerKey);
1473
1475
  return false;
1474
1476
  }
1477
+ if (providerKey.startsWith('antigravity.')) {
1478
+ const remaining = expiry - now;
1479
+ if (remaining < ANTIGRAVITY_COOLDOWN_ALIAS_THRESHOLD_MS) {
1480
+ return false;
1481
+ }
1482
+ }
1475
1483
  return true;
1476
1484
  }
1477
1485
  restoreHealthFromStore() {
@@ -152,7 +152,7 @@ export interface HealthWeightedLoadBalancingConfig {
152
152
  */
153
153
  recoverToBestOnRetry?: boolean;
154
154
  }
155
- export type AliasSelectionStrategy = 'none' | 'sticky-queue';
155
+ export type AliasSelectionStrategy = 'none' | 'sticky-queue' | 'best-quota';
156
156
  export interface AliasSelectionConfig {
157
157
  /**
158
158
  * Global on/off switch. When false, no alias-level selection is applied.
@@ -514,6 +514,19 @@ export interface ProviderQuotaViewEntry {
514
514
  inPool: boolean;
515
515
  reason?: string;
516
516
  priorityTier?: number;
517
+ /**
518
+ * Optional remaining quota fraction for the provider key (0..1).
519
+ * Used by alias-selection strategies that prefer higher remaining quota.
520
+ */
521
+ remainingFraction?: number | null;
522
+ /**
523
+ * Optional quota reset timestamp (ms since epoch) for the provider key.
524
+ */
525
+ quotaResetAtMs?: number | null;
526
+ /**
527
+ * Optional quota fetch timestamp (ms since epoch) for the provider key.
528
+ */
529
+ quotaFetchedAtMs?: number | null;
517
530
  /**
518
531
  * Optional soft penalty hint for selection ordering.
519
532
  * - 0 / undefined means no penalty
@@ -93,10 +93,8 @@ function coerceFollowupPayloadStream(payload, stream) {
93
93
  if (!payload || typeof payload !== 'object') {
94
94
  return payload;
95
95
  }
96
- // ServerTool followup requests must be non-streaming to keep parsing deterministic and avoid
97
- // provider-side SSE wrappers leaking into internal reenter calls.
98
- if (stream === false) {
99
- payload.stream = false;
96
+ if (typeof stream === 'boolean') {
97
+ payload.stream = stream;
100
98
  }
101
99
  return payload;
102
100
  }
@@ -440,7 +438,6 @@ export async function runServerToolOrchestration(options) {
440
438
  };
441
439
  }
442
440
  const metadata = {
443
- stream: false,
444
441
  ...(engineResult.execution.followup.metadata ?? {})
445
442
  };
446
443
  const rt = ensureRuntimeMetadata(metadata);
@@ -476,7 +473,10 @@ export async function runServerToolOrchestration(options) {
476
473
  const retryEmptyFollowupOnce = isStopMessageFlow || isGeminiEmptyReplyContinue;
477
474
  const maxAttempts = retryEmptyFollowupOnce ? 2 : 1;
478
475
  const followupRequestId = buildFollowupRequestId(options.requestId, engineResult.execution.followup.requestIdSuffix);
479
- let followupPayload = coerceFollowupPayloadStream(followupSeedPayload, metadata.stream === true);
476
+ const followupStream = typeof metadata.stream === 'boolean'
477
+ ? (metadata.stream)
478
+ : undefined;
479
+ let followupPayload = coerceFollowupPayloadStream(followupSeedPayload, followupStream);
480
480
  if (isGeminiEmptyReplyContinue) {
481
481
  // For gemini_empty_reply_continue, the goal is to recover text output from an empty/malformed reply.
482
482
  // Force the followup to be non-tool-calling to avoid repeated MALFORMED_FUNCTION_CALL loops.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jsonstudio/llms",
3
- "version": "0.6.1354",
3
+ "version": "0.6.1399",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",