npm - @jsonstudio/llms - Versions diffs - 0.6.1164 → 0.6.1172 - Mend

@jsonstudio/llms 0.6.1164 → 0.6.1172

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/router/virtual-router/bootstrap.js +17 -1
package/dist/router/virtual-router/context-advisor.d.ts +4 -0
package/dist/router/virtual-router/context-advisor.js +3 -0
package/dist/router/virtual-router/context-weighted.d.ts +31 -0
package/dist/router/virtual-router/context-weighted.js +54 -0
package/dist/router/virtual-router/engine-selection.js +162 -15
package/dist/router/virtual-router/types.d.ts +28 -0
package/package.json +1 -1

package/dist/router/virtual-router/bootstrap.js CHANGED Viewed

@@ -1177,10 +1177,26 @@ function normalizeLoadBalancing(input) {
                 : {})
         }
         : undefined;
+    const contextWeightedRaw = asRecord(record.contextWeighted);
+    const contextWeighted = Object.keys(contextWeightedRaw).length > 0
+        ? {
+            ...(typeof contextWeightedRaw.enabled === 'boolean' ? { enabled: contextWeightedRaw.enabled } : {}),
+            ...(typeof contextWeightedRaw.clientCapTokens === 'number' && Number.isFinite(contextWeightedRaw.clientCapTokens)
+                ? { clientCapTokens: contextWeightedRaw.clientCapTokens }
+                : {}),
+            ...(typeof contextWeightedRaw.gamma === 'number' && Number.isFinite(contextWeightedRaw.gamma)
+                ? { gamma: contextWeightedRaw.gamma }
+                : {}),
+            ...(typeof contextWeightedRaw.maxMultiplier === 'number' && Number.isFinite(contextWeightedRaw.maxMultiplier)
+                ? { maxMultiplier: contextWeightedRaw.maxMultiplier }
+                : {})
+        }
+        : undefined;
     return {
         strategy,
         ...(Object.keys(weightsEntries).length ? { weights: weightsEntries } : {}),
-        ...(healthWeighted ? { healthWeighted } : {})
+        ...(healthWeighted ? { healthWeighted } : {}),
+        ...(contextWeighted ? { contextWeighted } : {})
     };
 }
 function coerceRatio(value) {

package/dist/router/virtual-router/context-advisor.d.ts CHANGED Viewed

@@ -16,4 +16,8 @@ export declare class ContextAdvisor {
     private hardLimit;
     configure(config?: VirtualRouterContextRoutingConfig | null): void;
     classify(pool: string[], estimatedTokens: number, resolveProfile: (key: string) => ProviderProfile): ContextAdvisorResult;
+    getConfig(): {
+        warnRatio: number;
+        hardLimit: boolean;
+    };
 }

package/dist/router/virtual-router/context-advisor.js CHANGED Viewed

@@ -55,6 +55,9 @@ export class ContextAdvisor {
             allOverflow: safe.length === 0 && risky.length === 0 && overflow.length > 0
         };
     }
+    getConfig() {
+        return { warnRatio: this.warnRatio, hardLimit: this.hardLimit };
+    }
 }
 function clampWarnRatio(value) {
     if (!Number.isFinite(value)) {

package/dist/router/virtual-router/context-weighted.d.ts ADDED Viewed

@@ -0,0 +1,31 @@
+import type { ContextWeightedLoadBalancingConfig } from './types.js';
+export type ResolvedContextWeightedConfig = Required<{
+    enabled: boolean;
+    clientCapTokens: number;
+    gamma: number;
+    maxMultiplier: number;
+}>;
+/**
+ * Context-weighted constant table (defaults).
+ *
+ * Intended behavior:
+ * - Prefer smaller effective safe context windows early, so that larger windows remain available later.
+ * - Compensation is proportional by default (`gamma=1`), but capped by `maxMultiplier`.
+ *
+ * Notes:
+ * - `clientCapTokens` is the maximum effective context the client can consume, even if the model supports more.
+ * - The effective safe window is computed using ContextAdvisor's `warnRatio` and model "slack" above the client cap.
+ *   - If a model has slack >= the reserved margin, it effectively gets the full client cap as safe window.
+ */
+export declare const DEFAULT_CONTEXT_WEIGHTED_CONFIG: ResolvedContextWeightedConfig;
+export declare function resolveContextWeightedConfig(raw?: ContextWeightedLoadBalancingConfig | null): ResolvedContextWeightedConfig;
+export declare function computeEffectiveSafeWindowTokens(options: {
+    modelMaxTokens: number;
+    warnRatio: number;
+    clientCapTokens: number;
+}): number;
+export declare function computeContextMultiplier(options: {
+    effectiveSafeRefTokens: number;
+    effectiveSafeTokens: number;
+    cfg: ResolvedContextWeightedConfig;
+}): number;

package/dist/router/virtual-router/context-weighted.js ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * Context-weighted constant table (defaults).
+ *
+ * Intended behavior:
+ * - Prefer smaller effective safe context windows early, so that larger windows remain available later.
+ * - Compensation is proportional by default (`gamma=1`), but capped by `maxMultiplier`.
+ *
+ * Notes:
+ * - `clientCapTokens` is the maximum effective context the client can consume, even if the model supports more.
+ * - The effective safe window is computed using ContextAdvisor's `warnRatio` and model "slack" above the client cap.
+ *   - If a model has slack >= the reserved margin, it effectively gets the full client cap as safe window.
+ */
+export const DEFAULT_CONTEXT_WEIGHTED_CONFIG = {
+    enabled: false,
+    clientCapTokens: 200_000,
+    gamma: 1,
+    maxMultiplier: 2
+};
+export function resolveContextWeightedConfig(raw) {
+    const enabled = raw?.enabled ?? DEFAULT_CONTEXT_WEIGHTED_CONFIG.enabled;
+    const clientCapTokens = typeof raw?.clientCapTokens === 'number' && Number.isFinite(raw.clientCapTokens) && raw.clientCapTokens > 0
+        ? Math.floor(raw.clientCapTokens)
+        : DEFAULT_CONTEXT_WEIGHTED_CONFIG.clientCapTokens;
+    const gamma = typeof raw?.gamma === 'number' && Number.isFinite(raw.gamma) && raw.gamma > 0
+        ? raw.gamma
+        : DEFAULT_CONTEXT_WEIGHTED_CONFIG.gamma;
+    const maxMultiplier = typeof raw?.maxMultiplier === 'number' && Number.isFinite(raw.maxMultiplier) && raw.maxMultiplier >= 1
+        ? raw.maxMultiplier
+        : DEFAULT_CONTEXT_WEIGHTED_CONFIG.maxMultiplier;
+    return { enabled, clientCapTokens, gamma, maxMultiplier };
+}
+export function computeEffectiveSafeWindowTokens(options) {
+    const modelMaxTokens = typeof options.modelMaxTokens === 'number' && Number.isFinite(options.modelMaxTokens) && options.modelMaxTokens > 0
+        ? Math.floor(options.modelMaxTokens)
+        : 1;
+    const clientCapTokens = typeof options.clientCapTokens === 'number' && Number.isFinite(options.clientCapTokens) && options.clientCapTokens > 0
+        ? Math.floor(options.clientCapTokens)
+        : DEFAULT_CONTEXT_WEIGHTED_CONFIG.clientCapTokens;
+    const warnRatio = typeof options.warnRatio === 'number' && Number.isFinite(options.warnRatio) && options.warnRatio > 0 && options.warnRatio < 1
+        ? options.warnRatio
+        : 0.9;
+    const effectiveMax = Math.min(modelMaxTokens, clientCapTokens);
+    const reserve = Math.ceil(effectiveMax * (1 - warnRatio));
+    const slack = Math.max(0, modelMaxTokens - clientCapTokens);
+    const reserveEff = Math.max(0, reserve - slack);
+    return Math.max(1, effectiveMax - reserveEff);
+}
+export function computeContextMultiplier(options) {
+    const ref = Math.max(1, Math.floor(options.effectiveSafeRefTokens));
+    const cur = Math.max(1, Math.floor(options.effectiveSafeTokens));
+    const ratio = ref / cur;
+    const raw = Math.pow(Math.max(1, ratio), options.cfg.gamma);
+    return Math.min(options.cfg.maxMultiplier, raw);
+}

package/dist/router/virtual-router/engine-selection.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { computeContextMultiplier, computeEffectiveSafeWindowTokens, resolveContextWeightedConfig } from './context-weighted.js';
 import { computeHealthWeight, resolveHealthWeightedConfig } from './health-weighted.js';
 import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
 export function selectProviderImpl(requestedRoute, metadata, classification, features, activeState, deps, options = {}) {
@@ -402,6 +403,9 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
     const quotaView = deps.quotaView;
     const now = quotaView ? Date.now() : 0;
     const healthWeightedCfg = resolveHealthWeightedConfig(deps.loadBalancer.getPolicy().healthWeighted);
+    const contextWeightedCfg = resolveContextWeightedConfig(deps.loadBalancer.getPolicy().contextWeighted);
+    const warnRatio = deps.contextAdvisor.getConfig().warnRatio;
+    const nowForWeights = Date.now();
     const selectFirstAvailable = (candidates) => {
         for (const key of candidates) {
             if (deps.healthManager.isAvailable(key)) {
@@ -410,23 +414,137 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
         }
         return null;
     };
-    const selectWithQuota = (candidates) => {
+    const resolvePriorityMeta = (orderedTargets) => {
+        // Priority mode semantics (strict group priority + alias-level balancing):
+        // - Targets are interpreted as ordered (providerId, modelId) groups.
+        // - Group base priorities: 100, 90, 80, ... (step=10) by appearance order.
+        // - Within a group (different auth aliases), base scores: 100, 99, 98, ... (step=1).
+        //
+        // Group selection is strict: always use the best group until it is unavailable.
+        // Alias selection is balanced within the chosen group (RR / health-weighted / context-weighted).
+        const meta = new Map();
+        if (!Array.isArray(orderedTargets) || orderedTargets.length === 0) {
+            return meta;
+        }
+        let groupIndex = -1;
+        let aliasOffset = 0;
+        let lastGroupKey = '';
+        for (const key of orderedTargets) {
+            const providerId = extractProviderId(key) ?? '';
+            const modelId = getProviderModelId(key, deps.providerRegistry) ?? '';
+            const groupKey = `${providerId}::${modelId}`;
+            if (groupKey !== lastGroupKey) {
+                groupIndex += 1;
+                aliasOffset = 0;
+                lastGroupKey = groupKey;
+            }
+            const groupBase = 100 - groupIndex * 10;
+            const base = groupBase - aliasOffset;
+            meta.set(key, { groupId: `${providerId}.${modelId}`, groupBase, base });
+            aliasOffset += 1;
+        }
+        return meta;
+    };
+    const pickPriorityGroup = (candidates, orderedTargets, penalties) => {
+        const meta = resolvePriorityMeta(orderedTargets);
+        let bestGroupId = null;
+        let bestScore = Number.NEGATIVE_INFINITY;
+        for (const key of candidates) {
+            if (!deps.healthManager.isAvailable(key))
+                continue;
+            const m = meta.get(key);
+            if (!m)
+                continue;
+            const penalty = penalties ? Math.max(0, Math.floor(penalties[key] ?? 0)) : 0;
+            const score = m.base - penalty;
+            if (score > bestScore) {
+                bestScore = score;
+                bestGroupId = m.groupId;
+            }
+        }
+        if (!bestGroupId)
+            return null;
+        const groupCandidates = candidates.filter((key) => meta.get(key)?.groupId === bestGroupId);
+        return groupCandidates.length ? { groupId: bestGroupId, groupCandidates } : null;
+    };
+    const computeContextWeightMultipliers = (candidates) => {
+        if (!contextWeightedCfg.enabled) {
+            return null;
+        }
+        const eff = {};
+        let ref = 1;
+        for (const key of candidates) {
+            const usage = contextResult.usage?.[key];
+            const limit = usage && typeof usage.limit === 'number' && Number.isFinite(usage.limit) ? Math.floor(usage.limit) : 0;
+            const safeEff = computeEffectiveSafeWindowTokens({
+                modelMaxTokens: Math.max(1, limit),
+                warnRatio,
+                clientCapTokens: contextWeightedCfg.clientCapTokens
+            });
+            eff[key] = safeEff;
+            if (safeEff > ref) {
+                ref = safeEff;
+            }
+        }
+        return { ref, eff };
+    };
+    const selectWithQuota = (candidates, isSafePool) => {
         if (!quotaView) {
             if (tier.mode === 'priority') {
                 if (isRecoveryAttempt) {
                     return selectFirstAvailable(candidates);
                 }
+                const group = pickPriorityGroup(candidates, tier.targets);
+                if (!group) {
+                    return null;
+                }
+                const weights = (() => {
+                    if (!isSafePool)
+                        return undefined;
+                    const ctx = computeContextWeightMultipliers(group.groupCandidates);
+                    if (!ctx)
+                        return undefined;
+                    const out = {};
+                    for (const key of group.groupCandidates) {
+                        const m = computeContextMultiplier({
+                            effectiveSafeRefTokens: ctx.ref,
+                            effectiveSafeTokens: ctx.eff[key] ?? 1,
+                            cfg: contextWeightedCfg
+                        });
+                        out[key] = Math.max(1, Math.round(100 * m));
+                    }
+                    return out;
+                })();
                 return deps.loadBalancer.select({
-                    routeName: `${routeName}:${tier.id}:priority`,
-                    candidates,
+                    routeName: `${routeName}:${tier.id}:priority:group:${group.groupId}`,
+                    candidates: group.groupCandidates,
                     stickyKey: options.allowAliasRotation ? undefined : stickyKey,
+                    weights,
                     availabilityCheck: (key) => deps.healthManager.isAvailable(key)
                 }, 'round-robin');
             }
+            const weights = (() => {
+                if (!isSafePool || !contextWeightedCfg.enabled)
+                    return undefined;
+                const ctx = computeContextWeightMultipliers(candidates);
+                if (!ctx)
+                    return undefined;
+                const out = {};
+                for (const key of candidates) {
+                    const m = computeContextMultiplier({
+                        effectiveSafeRefTokens: ctx.ref,
+                        effectiveSafeTokens: ctx.eff[key] ?? 1,
+                        cfg: contextWeightedCfg
+                    });
+                    out[key] = Math.max(1, Math.round(100 * m));
+                }
+                return out;
+            })();
             const selected = deps.loadBalancer.select({
                 routeName: `${routeName}:${tier.id}`,
                 candidates,
                 stickyKey: options.allowAliasRotation ? undefined : stickyKey,
+                weights,
                 availabilityCheck: (key) => deps.healthManager.isAvailable(key)
             }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
             return selected;
@@ -508,12 +626,16 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
                     return pinned;
                 }
             }
+            const bucketPenaltyMap = {};
+            for (const item of bucket) {
+                bucketPenaltyMap[item.key] = item.penalty;
+            }
             const bucketWeights = {};
             const bucketMultipliers = {};
             for (const item of bucket) {
                 if (healthWeightedCfg.enabled) {
                     const entry = quotaView(item.key);
-                    const { weight, multiplier } = computeHealthWeight(entry, now, healthWeightedCfg);
+                    const { weight, multiplier } = computeHealthWeight(entry, nowForWeights, healthWeightedCfg);
                     bucketWeights[item.key] = weight;
                     bucketMultipliers[item.key] = multiplier;
                 }
@@ -523,7 +645,41 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
                     bucketMultipliers[item.key] = 1;
                 }
             }
+            if (isSafePool && contextWeightedCfg.enabled) {
+                const ctx = computeContextWeightMultipliers(bucketCandidates);
+                if (ctx) {
+                    for (const key of bucketCandidates) {
+                        const m = computeContextMultiplier({
+                            effectiveSafeRefTokens: ctx.ref,
+                            effectiveSafeTokens: ctx.eff[key] ?? 1,
+                            cfg: contextWeightedCfg
+                        });
+                        bucketWeights[key] = Math.max(1, Math.round((bucketWeights[key] ?? 1) * m));
+                    }
+                }
+            }
             if (tier.mode === 'priority') {
+                if (!isRecoveryAttempt) {
+                    const group = pickPriorityGroup(bucketCandidates, tier.targets, bucketPenaltyMap);
+                    if (!group) {
+                        continue;
+                    }
+                    const groupWeights = {};
+                    for (const key of group.groupCandidates) {
+                        groupWeights[key] = bucketWeights[key] ?? 1;
+                    }
+                    const selected = deps.loadBalancer.select({
+                        routeName: `${routeName}:${tier.id}:priority:${priority}:group:${group.groupId}`,
+                        candidates: group.groupCandidates,
+                        stickyKey: options.allowAliasRotation ? undefined : stickyKey,
+                        weights: groupWeights,
+                        availabilityCheck: (key) => deps.healthManager.isAvailable(key)
+                    }, 'round-robin');
+                    if (selected) {
+                        return selected;
+                    }
+                    continue;
+                }
                 if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
                     let best = null;
                     let bestM = Number.NEGATIVE_INFINITY;
@@ -547,16 +703,7 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
                         return recovered;
                     continue;
                 }
-                const selected = deps.loadBalancer.select({
-                    routeName: `${routeName}:${tier.id}:priority:${priority}`,
-                    candidates: bucketCandidates,
-                    stickyKey: options.allowAliasRotation ? undefined : stickyKey,
-                    weights: bucketWeights,
-                    availabilityCheck: (key) => deps.healthManager.isAvailable(key)
-                }, 'round-robin');
-                if (selected) {
-                    return selected;
-                }
+                // (unreachable) recovery handled above
             }
             else {
                 if (isRecoveryAttempt && healthWeightedCfg.enabled && healthWeightedCfg.recoverToBestOnRetry) {
@@ -597,7 +744,7 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
         return null;
     };
     for (const candidatePool of prioritizedPools) {
-        const providerKey = selectWithQuota(candidatePool);
+        const providerKey = selectWithQuota(candidatePool, candidatePool === contextResult.safe);
         if (providerKey) {
             return { providerKey, poolTargets: tier.targets, tierId: tier.id };
         }

package/dist/router/virtual-router/types.d.ts CHANGED Viewed

@@ -107,6 +107,13 @@ export interface LoadBalancingPolicy {
      * - Gradually recovers weights as time passes without errors
      */
     healthWeighted?: HealthWeightedLoadBalancingConfig;
+    /**
+     * Context-aware weighting (best-fit under safe window):
+     * - Prefer smaller effective context windows early, to preserve larger windows for later.
+     * - Uses ContextAdvisor's warnRatio to compute an "effective safe window" per model.
+     * - Caps comparisons by client context (e.g. 200k).
+     */
+    contextWeighted?: ContextWeightedLoadBalancingConfig;
 }
 export interface HealthWeightedLoadBalancingConfig {
     /**
@@ -136,6 +143,27 @@ export interface HealthWeightedLoadBalancingConfig {
      */
     recoverToBestOnRetry?: boolean;
 }
+export interface ContextWeightedLoadBalancingConfig {
+    /**
+     * When false, context-weighted logic is disabled.
+     * When true/undefined, context-weighted logic applies within the same pool bucket,
+     * and only for candidates that are considered "safe" by ContextAdvisor.
+     */
+    enabled?: boolean;
+    /**
+     * Client-side maximum usable context (tokens). Models above this are capped.
+     * Example: 200000 for Codex/Claude Code style clients.
+     */
+    clientCapTokens?: number;
+    /**
+     * Exponent for the compensation ratio. Use 1 for proportional compensation.
+     */
+    gamma?: number;
+    /**
+     * Upper bound for the multiplier to avoid extreme skew.
+     */
+    maxMultiplier?: number;
+}
 export interface ProviderHealthConfig {
     failureThreshold: number;
     cooldownMs: number;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jsonstudio/llms",
-  "version": "0.6.1164",
+  "version": "0.6.1172",
   "type": "module",
   "main": "dist/index.js",
   "module": "dist/index.js",