@jsonstudio/llms 0.6.743 → 0.6.749

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -135,6 +135,7 @@ function expandRoutingTable(routingSource, aliasIndex) {
135
135
  const expandedPools = [];
136
136
  for (const pool of pools) {
137
137
  const expandedTargets = [];
138
+ let orderCounter = 0;
138
139
  for (const entry of pool.targets) {
139
140
  const parsed = parseRouteEntry(entry, aliasIndex);
140
141
  if (!parsed) {
@@ -150,16 +151,35 @@ function expandRoutingTable(routingSource, aliasIndex) {
150
151
  for (const alias of aliases) {
151
152
  const runtimeKey = buildRuntimeKey(parsed.providerId, alias);
152
153
  const targetKey = `${runtimeKey}.${parsed.modelId}`;
153
- pushUnique(expandedTargets, targetKey);
154
+ const existing = expandedTargets.find((candidate) => candidate.key === targetKey);
155
+ if (existing) {
156
+ if (parsed.priority > existing.priority) {
157
+ existing.priority = parsed.priority;
158
+ }
159
+ continue;
160
+ }
161
+ expandedTargets.push({ key: targetKey, priority: parsed.priority, order: orderCounter });
162
+ orderCounter += 1;
154
163
  targetKeys.add(targetKey);
155
164
  }
156
165
  }
157
166
  if (expandedTargets.length) {
167
+ const sortedTargets = pool.mode === 'priority'
168
+ ? [...expandedTargets]
169
+ .sort((a, b) => {
170
+ if (a.priority !== b.priority) {
171
+ return b.priority - a.priority;
172
+ }
173
+ return a.order - b.order;
174
+ })
175
+ .map((candidate) => candidate.key)
176
+ : expandedTargets.map((candidate) => candidate.key);
158
177
  expandedPools.push({
159
178
  id: pool.id,
160
179
  priority: pool.priority,
161
180
  backup: pool.backup,
162
- targets: expandedTargets,
181
+ targets: sortedTargets,
182
+ ...(pool.mode ? { mode: pool.mode } : {}),
163
183
  ...(pool.force ? { force: true } : {})
164
184
  });
165
185
  }
@@ -277,6 +297,7 @@ function normalizeRoutePoolEntry(routeName, entry, index, total) {
277
297
  (typeof record.type === 'string' && record.type.toLowerCase() === 'backup');
278
298
  const priority = normalizePriorityValue(record.priority, total - index);
279
299
  const targets = normalizeRouteTargets(record);
300
+ const mode = normalizeRoutePoolMode(record.mode ?? record?.strategy ?? record?.routingMode);
280
301
  const force = record.force === true ||
281
302
  (typeof record.force === 'string' && record.force.trim().toLowerCase() === 'true');
282
303
  return targets.length
@@ -285,10 +306,30 @@ function normalizeRoutePoolEntry(routeName, entry, index, total) {
285
306
  priority,
286
307
  backup,
287
308
  targets,
309
+ ...(mode ? { mode } : {}),
288
310
  ...(force ? { force: true } : {})
289
311
  }
290
312
  : null;
291
313
  }
314
+ function normalizeRoutePoolMode(value) {
315
+ if (typeof value !== 'string') {
316
+ return undefined;
317
+ }
318
+ const normalized = value.trim().toLowerCase();
319
+ if (!normalized) {
320
+ return undefined;
321
+ }
322
+ if (normalized === 'priority') {
323
+ return 'priority';
324
+ }
325
+ if (normalized === 'round-robin' ||
326
+ normalized === 'round_robin' ||
327
+ normalized === 'roundrobin' ||
328
+ normalized === 'rr') {
329
+ return 'round-robin';
330
+ }
331
+ return undefined;
332
+ }
292
333
  function normalizeRouteTargets(record) {
293
334
  const buckets = [
294
335
  record.targets,
@@ -867,15 +908,38 @@ function parseRouteEntry(entry, aliasIndex) {
867
908
  if (secondDot > 0 && secondDot < remainder.length - 1) {
868
909
  const aliasCandidate = remainder.slice(0, secondDot);
869
910
  if (aliases.includes(aliasCandidate)) {
911
+ const parsed = splitModelPriority(remainder.slice(secondDot + 1));
870
912
  return {
871
913
  providerId,
872
914
  keyAlias: aliasCandidate,
873
- modelId: remainder.slice(secondDot + 1)
915
+ modelId: parsed.modelId,
916
+ priority: parsed.priority
874
917
  };
875
918
  }
876
919
  }
877
920
  }
878
- return { providerId, modelId: remainder };
921
+ const parsed = splitModelPriority(remainder);
922
+ return { providerId, modelId: parsed.modelId, priority: parsed.priority };
923
+ }
924
+ function splitModelPriority(raw) {
925
+ const value = typeof raw === 'string' ? raw.trim() : '';
926
+ if (!value) {
927
+ return { modelId: value, priority: 100 };
928
+ }
929
+ const match = value.match(/^(.*):(\d+)$/);
930
+ if (!match) {
931
+ return { modelId: value, priority: 100 };
932
+ }
933
+ const modelId = (match[1] ?? '').trim();
934
+ const priorityRaw = (match[2] ?? '').trim();
935
+ const parsed = Number(priorityRaw);
936
+ if (!modelId) {
937
+ return { modelId: value, priority: 100 };
938
+ }
939
+ if (!Number.isFinite(parsed)) {
940
+ return { modelId, priority: 100 };
941
+ }
942
+ return { modelId, priority: parsed };
879
943
  }
880
944
  function parseTargetKey(targetKey) {
881
945
  const value = typeof targetKey === 'string' ? targetKey.trim() : '';
@@ -22,6 +22,12 @@ export declare function selectProviderImpl(requestedRoute: string, metadata: Rou
22
22
  pool: string[];
23
23
  poolId?: string;
24
24
  };
25
+ export declare function selectDirectProviderModel(providerId: string, modelId: string, metadata: RouterMetadataInput, features: RoutingFeatures, activeState: RoutingInstructionState, deps: SelectionDeps): {
26
+ providerKey: string;
27
+ routeUsed: string;
28
+ pool: string[];
29
+ poolId?: string;
30
+ } | null;
25
31
  export declare function selectFromStickyPool(stickyKeySet: Set<string>, metadata: RouterMetadataInput, features: RoutingFeatures, state: RoutingInstructionState, deps: SelectionDeps, options: {
26
32
  allowAliasRotation?: boolean;
27
33
  }): {
@@ -110,6 +110,54 @@ export function selectProviderImpl(requestedRoute, metadata, classification, fea
110
110
  allowAliasRotation
111
111
  });
112
112
  }
113
+ export function selectDirectProviderModel(providerId, modelId, metadata, features, activeState, deps) {
114
+ const normalizedProvider = typeof providerId === 'string' ? providerId.trim() : '';
115
+ const normalizedModel = typeof modelId === 'string' ? modelId.trim() : '';
116
+ if (!normalizedProvider || !normalizedModel) {
117
+ return null;
118
+ }
119
+ const providerKeys = deps.providerRegistry.listProviderKeys(normalizedProvider);
120
+ if (providerKeys.length === 0) {
121
+ return null;
122
+ }
123
+ const matchingKeys = providerKeys.filter((key) => {
124
+ try {
125
+ const profile = deps.providerRegistry.get(key);
126
+ return profile?.modelId === normalizedModel;
127
+ }
128
+ catch {
129
+ return false;
130
+ }
131
+ });
132
+ if (matchingKeys.length === 0) {
133
+ return null;
134
+ }
135
+ const attempted = [];
136
+ const estimatedTokens = typeof features.estimatedTokens === 'number' && Number.isFinite(features.estimatedTokens)
137
+ ? Math.max(0, features.estimatedTokens)
138
+ : 0;
139
+ const tier = {
140
+ id: `direct:${normalizedProvider}.${normalizedModel}`,
141
+ targets: matchingKeys,
142
+ priority: 100,
143
+ mode: 'round-robin',
144
+ backup: false
145
+ };
146
+ const { providerKey, poolTargets, tierId, failureHint } = trySelectFromTier('direct', tier, undefined, estimatedTokens, features, deps, {
147
+ disabledProviders: new Set(activeState.disabledProviders),
148
+ disabledKeysMap: new Map(activeState.disabledKeys),
149
+ allowedProviders: new Set(activeState.allowedProviders),
150
+ disabledModels: new Map(activeState.disabledModels),
151
+ allowAliasRotation: true
152
+ });
153
+ if (providerKey) {
154
+ return { providerKey, routeUsed: 'direct', pool: poolTargets, poolId: tierId };
155
+ }
156
+ if (failureHint) {
157
+ attempted.push(failureHint);
158
+ }
159
+ return null;
160
+ }
113
161
  function selectFromCandidates(routes, metadata, classification, features, state, deps, options) {
114
162
  const allowedProviders = new Set(state.allowedProviders);
115
163
  const disabledProviders = new Set(state.disabledProviders);
@@ -272,14 +320,25 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
272
320
  const prioritizedPools = buildContextCandidatePools(contextResult);
273
321
  const quotaView = deps.quotaView;
274
322
  const now = quotaView ? Date.now() : 0;
323
+ const selectFirstAvailable = (candidates) => {
324
+ for (const key of candidates) {
325
+ if (deps.healthManager.isAvailable(key)) {
326
+ return key;
327
+ }
328
+ }
329
+ return null;
330
+ };
275
331
  const selectWithQuota = (candidates) => {
276
332
  if (!quotaView) {
333
+ if (tier.mode === 'priority') {
334
+ return selectFirstAvailable(candidates);
335
+ }
277
336
  return deps.loadBalancer.select({
278
337
  routeName: `${routeName}:${tier.id}`,
279
338
  candidates,
280
339
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
281
340
  availabilityCheck: (key) => deps.healthManager.isAvailable(key)
282
- });
341
+ }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
283
342
  }
284
343
  const buckets = new Map();
285
344
  for (const key of candidates) {
@@ -312,14 +371,22 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
312
371
  if (!bucketCandidates.length) {
313
372
  continue;
314
373
  }
315
- const selected = deps.loadBalancer.select({
316
- routeName: `${routeName}:${tier.id}`,
317
- candidates: bucketCandidates,
318
- stickyKey: options.allowAliasRotation ? undefined : stickyKey,
319
- availabilityCheck: (key) => deps.healthManager.isAvailable(key)
320
- });
321
- if (selected) {
322
- return selected;
374
+ if (tier.mode === 'priority') {
375
+ const selected = selectFirstAvailable(bucketCandidates);
376
+ if (selected) {
377
+ return selected;
378
+ }
379
+ }
380
+ else {
381
+ const selected = deps.loadBalancer.select({
382
+ routeName: `${routeName}:${tier.id}`,
383
+ candidates: bucketCandidates,
384
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
385
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
386
+ }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
387
+ if (selected) {
388
+ return selected;
389
+ }
323
390
  }
324
391
  }
325
392
  return null;
@@ -29,6 +29,7 @@ export declare class VirtualRouterEngine {
29
29
  routingStateStore?: RoutingInstructionStateStore;
30
30
  quotaView?: ProviderQuotaView;
31
31
  });
32
+ private parseDirectProviderModel;
32
33
  initialize(config: VirtualRouterConfig): void;
33
34
  route(request: StandardizedRequest | ProcessedRequest, metadata: RouterMetadataInput): {
34
35
  target: TargetMetadata;
@@ -9,7 +9,7 @@ import { getStatsCenter } from '../../telemetry/stats-center.js';
9
9
  import { parseRoutingInstructions, applyRoutingInstructions, cleanMessagesFromRoutingInstructions } from './routing-instructions.js';
10
10
  import { loadRoutingInstructionStateSync, saveRoutingInstructionStateAsync } from './sticky-session-store.js';
11
11
  import { buildHitReason, formatVirtualRouterHit } from './engine-logging.js';
12
- import { selectProviderImpl } from './engine-selection.js';
12
+ import { selectDirectProviderModel, selectProviderImpl } from './engine-selection.js';
13
13
  import { applyQuotaDepletedImpl, applyQuotaRecoveryImpl, applySeriesCooldownImpl, handleProviderFailureImpl, mapProviderErrorImpl } from './engine-health.js';
14
14
  export class VirtualRouterEngine {
15
15
  routing = {};
@@ -44,6 +44,25 @@ export class VirtualRouterEngine {
44
44
  this.quotaView = deps.quotaView;
45
45
  }
46
46
  }
47
+ parseDirectProviderModel(model) {
48
+ const raw = typeof model === 'string' ? model.trim() : '';
49
+ if (!raw) {
50
+ return null;
51
+ }
52
+ const firstDot = raw.indexOf('.');
53
+ if (firstDot <= 0 || firstDot === raw.length - 1) {
54
+ return null;
55
+ }
56
+ const providerId = raw.slice(0, firstDot).trim();
57
+ const modelId = raw.slice(firstDot + 1).trim();
58
+ if (!providerId || !modelId) {
59
+ return null;
60
+ }
61
+ if (this.providerRegistry.listProviderKeys(providerId).length === 0) {
62
+ return null;
63
+ }
64
+ return { providerId, modelId };
65
+ }
47
66
  initialize(config) {
48
67
  this.validateConfig(config);
49
68
  this.routing = config.routing;
@@ -198,17 +217,64 @@ export class VirtualRouterEngine {
198
217
  }
199
218
  const routingMode = this.resolveRoutingMode([...metadataInstructions, ...instructions], routingState);
200
219
  const features = buildRoutingFeatures(request, metadata);
201
- const classification = metadata.routeHint && metadata.routeHint.trim()
202
- ? {
203
- routeName: metadata.routeHint.trim(),
220
+ const directProviderModel = this.parseDirectProviderModel(request?.model);
221
+ let classification;
222
+ let requestedRoute;
223
+ let selection;
224
+ if (directProviderModel) {
225
+ const providerKeys = this.providerRegistry.listProviderKeys(directProviderModel.providerId);
226
+ let hasModel = false;
227
+ for (const key of providerKeys) {
228
+ try {
229
+ const profile = this.providerRegistry.get(key);
230
+ if (profile?.modelId === directProviderModel.modelId) {
231
+ hasModel = true;
232
+ break;
233
+ }
234
+ }
235
+ catch {
236
+ continue;
237
+ }
238
+ }
239
+ if (!hasModel) {
240
+ throw new VirtualRouterError(`Unknown model ${directProviderModel.modelId} for provider ${directProviderModel.providerId}`, VirtualRouterErrorCode.CONFIG_ERROR, { providerId: directProviderModel.providerId, modelId: directProviderModel.modelId });
241
+ }
242
+ classification = {
243
+ routeName: 'direct',
204
244
  confidence: 1,
205
- reasoning: `route_hint:${metadata.routeHint.trim()}`,
245
+ reasoning: `direct_model:${directProviderModel.providerId}.${directProviderModel.modelId}`,
206
246
  fallback: false,
207
- candidates: [metadata.routeHint.trim()]
247
+ candidates: ['direct']
248
+ };
249
+ requestedRoute = 'direct';
250
+ const directSelection = selectDirectProviderModel(directProviderModel.providerId, directProviderModel.modelId, metadata, features, routingState, {
251
+ routing: this.routing,
252
+ providerRegistry: this.providerRegistry,
253
+ healthManager: this.healthManager,
254
+ contextAdvisor: this.contextAdvisor,
255
+ loadBalancer: this.loadBalancer,
256
+ isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
257
+ resolveStickyKey: (m) => this.resolveStickyKey(m),
258
+ quotaView: this.quotaView
259
+ });
260
+ if (!directSelection) {
261
+ throw new VirtualRouterError(`All providers unavailable for model ${directProviderModel.providerId}.${directProviderModel.modelId}`, VirtualRouterErrorCode.PROVIDER_NOT_AVAILABLE, { providerId: directProviderModel.providerId, modelId: directProviderModel.modelId });
208
262
  }
209
- : this.classifier.classify(features);
210
- const requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
211
- const selection = this.selectProvider(requestedRoute, metadata, classification, features, routingState);
263
+ selection = directSelection;
264
+ }
265
+ else {
266
+ classification = metadata.routeHint && metadata.routeHint.trim()
267
+ ? {
268
+ routeName: metadata.routeHint.trim(),
269
+ confidence: 1,
270
+ reasoning: `route_hint:${metadata.routeHint.trim()}`,
271
+ fallback: false,
272
+ candidates: [metadata.routeHint.trim()]
273
+ }
274
+ : this.classifier.classify(features);
275
+ requestedRoute = this.normalizeRouteAlias(classification.routeName || DEFAULT_ROUTE);
276
+ selection = this.selectProvider(requestedRoute, metadata, classification, features, routingState);
277
+ }
212
278
  const baseTarget = this.providerRegistry.buildTarget(selection.providerKey);
213
279
  const forceVision = this.routeHasForceFlag('vision');
214
280
  const target = {
@@ -0,0 +1,18 @@
1
+ import type { LoadBalancingPolicy } from './types.js';
2
+ export interface LoadBalancingOptions {
3
+ routeName: string;
4
+ candidates: string[];
5
+ stickyKey?: string;
6
+ availabilityCheck: (providerKey: string) => boolean;
7
+ }
8
+ export declare class RouteLoadBalancer {
9
+ private policy;
10
+ private readonly states;
11
+ constructor(policy?: LoadBalancingPolicy);
12
+ updatePolicy(policy?: LoadBalancingPolicy): void;
13
+ select(options: LoadBalancingOptions, strategyOverride?: LoadBalancingPolicy['strategy']): string | null;
14
+ private selectRoundRobin;
15
+ private selectWeighted;
16
+ private selectSticky;
17
+ private getState;
18
+ }
@@ -9,12 +9,13 @@ export class RouteLoadBalancer {
9
9
  this.policy = policy;
10
10
  }
11
11
  }
12
- select(options) {
12
+ select(options, strategyOverride) {
13
13
  const available = options.candidates.filter((candidate) => options.availabilityCheck(candidate));
14
14
  if (available.length === 0) {
15
15
  return null;
16
16
  }
17
- switch (this.policy.strategy) {
17
+ const strategy = strategyOverride ?? this.policy.strategy;
18
+ switch (strategy) {
18
19
  case 'sticky':
19
20
  return this.selectSticky(options.routeName, available, options.stickyKey);
20
21
  case 'weighted':
@@ -6,10 +6,17 @@ export declare const DEFAULT_MODEL_CONTEXT_TOKENS = 200000;
6
6
  export declare const DEFAULT_ROUTE = "default";
7
7
  export declare const ROUTE_PRIORITY: string[];
8
8
  export type RoutingInstructionMode = 'force' | 'sticky' | 'none';
9
+ export type RoutePoolMode = 'round-robin' | 'priority';
9
10
  export interface RoutePoolTier {
10
11
  id: string;
11
12
  targets: string[];
12
13
  priority: number;
14
+ /**
15
+ * Pool-level routing mode:
16
+ * - round-robin: force round-robin selection inside this pool (ignores global loadBalancing strategy)
17
+ * - priority: always pick highest-priority key first, only fallback when unavailable
18
+ */
19
+ mode?: RoutePoolMode;
13
20
  backup?: boolean;
14
21
  /**
15
22
  * Optional force flag for this route pool.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jsonstudio/llms",
3
- "version": "0.6.743",
3
+ "version": "0.6.749",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",