adaptive-memory-multi-model-router 2.14.44 → 2.14.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,15 +46,35 @@ function buildModelProfiles(): Record<string, ModelProfile> {
46
46
  for (const [providerId, provider] of Object.entries(available)) {
47
47
  for (const model of provider.models) {
48
48
  const modelKey = model.includes('/') ? model : providerId + '/' + model;
49
- const costPerKInput = provider.costPerK ? provider.costPerK.input : 0;
50
- const costPerKOutput = provider.costPerK ? provider.costPerK.output : 0;
49
+ let costPerKInput = provider.costPerK ? provider.costPerK.input : 0;
50
+ let costPerKOutput = provider.costPerK ? provider.costPerK.output : 0;
51
+
52
+ // OpenRouter: per-model cost overrides for paid models
53
+ if (provider.name === 'OpenRouter') {
54
+ const orCosts: Record<string, [number, number]> = {
55
+ 'openai/gpt-4o': [2.5, 10],
56
+ 'anthropic/claude-3.5-sonnet': [3, 15],
57
+ 'google/gemini-pro-1.5': [1.25, 5],
58
+ 'meta-llama/llama-3.1-70b-instruct': [0.18, 0.18],
59
+ 'mistralai/mistral-large': [2, 6],
60
+ };
61
+ const orKey = model.includes('/') ? model : 'openrouter/' + model;
62
+ // Try matching by full key or by model name
63
+ for (const [pattern, cost] of Object.entries(orCosts)) {
64
+ if (orKey.includes(pattern) || model.includes(pattern.split('/')[1] || pattern)) {
65
+ costPerKInput = cost[0];
66
+ costPerKOutput = cost[1];
67
+ break;
68
+ }
69
+ }
70
+ }
51
71
 
52
72
  // Assign strengths based on model characteristics
53
73
  const strengths: string[] = [];
54
74
  if (provider.type === 'cli') {
55
75
  strengths.push('free', 'local');
56
76
  }
57
- if (costPerKInput < 0.3) {
77
+ if (costPerKInput < 0.3 && provider.name !== 'OpenRouter') {
58
78
  strengths.push('budget', 'fast');
59
79
  } else if (costPerKInput > 2) {
60
80
  strengths.push('premium', 'reasoning');
@@ -62,6 +82,29 @@ function buildModelProfiles(): Record<string, ModelProfile> {
62
82
  if (provider.name === 'Mistral' || provider.name === 'Groq' || provider.name === 'Cerebras') {
63
83
  strengths.push('fast', 'coding');
64
84
  }
85
+ // OpenRouter premium free models get quality boosts
86
+ if (provider.name === 'OpenRouter') {
87
+ const modelLower = modelKey.toLowerCase();
88
+ // Premium-tier free models (large, high-context)
89
+ if (modelLower.includes('kimi') || modelLower.includes('qwen3-coder') ||
90
+ modelLower.includes('nemotron-3-ultra') || modelLower.includes('nemotron-3-super') ||
91
+ modelLower.includes('hermes-3') || modelLower.includes('gemma-4')) {
92
+ strengths.push('reasoning', 'long-context', 'premium');
93
+ }
94
+ // Mid-tier free models (good quality, smaller)
95
+ else if (modelLower.includes('gpt-oss') || modelLower.includes('qwen3-next') ||
96
+ modelLower.includes('gemma-4') || modelLower.includes('llama-3.3')) {
97
+ strengths.push('fast', 'reasoning');
98
+ }
99
+ // Budget free models
100
+ else {
101
+ strengths.push('fast');
102
+ }
103
+ }
104
+ // Minimax gets a quality boost (capable model, cheap pricing)
105
+ if (provider.name === 'MiniMax') {
106
+ strengths.push('fast', 'reasoning');
107
+ }
65
108
  if (provider.name === 'CommandCode') {
66
109
  strengths.push('code-aware', 'context-rich');
67
110
  }
@@ -407,6 +450,30 @@ export function extractQueryFeatures(prompt: string): QueryFeatures {
407
450
  const isDevops = /docker|kubernetes|terraform|ansible|ci.cd|pipeline|sql|nosql|database|sqlserver|mysql|postgres|deploy|container|orchestrat/i.test(lower);
408
451
  const isMultimodal = /image|video|audio|generate.*picture|generate.*image|transcribe|voice/i.test(lower);
409
452
 
453
+ // === ARCHITECTURAL COMPLEXITY SIGNALS ===
454
+ // Boost complexity for system design / architecture queries
455
+ // These indicate the query needs a capable model
456
+ const archPatterns = [
457
+ /architect\s+(a\s+)?(distributed|real-time|high-availability|fault-tolerant|scalable|multi-region|global)/i,
458
+ /design\s+(a\s+)?(distributed|real-time|high-availability|fault-tolerant|scalable|multi-region|global)\s+(system|architecture|platform|infrastructure)/i,
459
+ /design\s+a\s+system\s+that\s+handles/i,
460
+ /data\s+warehouse\s+architecture/i,
461
+ /security\s+architecture\s+for/i,
462
+ /multi-cloud\s+hybrid/i,
463
+ /disaster\s+recovery\s+strategy/i,
464
+ /zero-downtime\s+deployment/i,
465
+ /petabyte-scale/i,
466
+ /billion\s+events?/i,
467
+ /million\s+transactions?/i,
468
+ /sensor\s+fusion/i,
469
+ /autonomous\s+vehicle/i,
470
+ /fraud\s+detection\s+system/i,
471
+ /privacy-preserving\s+analytics/i,
472
+ /real-time\s+(anomaly|fraud|video)\s+detection/i,
473
+ ];
474
+ const archMatches = archPatterns.filter(p => p.test(prompt)).length;
475
+ if (archMatches > 0) complexity += 0.25 + (archMatches * 0.08);
476
+
410
477
  // Cap at 1.0
411
478
  complexity = Math.min(complexity, 1.0);
412
479
 
@@ -456,6 +523,50 @@ export function extractQueryFeatures(prompt: string): QueryFeatures {
456
523
  function scoreModelFit(model: ModelProfile, features: QueryFeatures): number {
457
524
  let score = model.quality_score * 0.6;
458
525
 
526
+ // === ADAPTIVE TIER DETECTION ===
527
+ // Uses cost percentiles computed from actually available providers.
528
+ // If user has only free models → everything is "free" tier.
529
+ // If user has free + groq + openai → quartiles split them naturally.
530
+ const modelCost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
531
+ let tierFromModel: string;
532
+ if (model.strengths.includes('free') || modelCost === 0) {
533
+ tierFromModel = 'free';
534
+ } else if (modelCost <= _costPercentiles.p25) {
535
+ tierFromModel = 'cheap';
536
+ } else if (modelCost <= _costPercentiles.p75) {
537
+ tierFromModel = 'mid';
538
+ } else {
539
+ tierFromModel = 'premium';
540
+ }
541
+
542
+ // === ADAPTIVE TIER SCORING ===
543
+ // Boost/penalty scales with how well the model's tier matches the query complexity
544
+ // Simple queries (0-0.3): strongly prefer free/cheap
545
+ // Medium queries (0.3-0.5): prefer cheap/mid
546
+ // Complex queries (0.5-0.65): prefer mid
547
+ // Very complex (0.65+): prefer premium/mid
548
+ if (features.complexity < 0.3) {
549
+ if (tierFromModel === 'free') score += 0.20;
550
+ else if (tierFromModel === 'cheap') score += 0.10;
551
+ else if (tierFromModel === 'mid') score -= 0.05;
552
+ else if (tierFromModel === 'premium') score -= 0.15;
553
+ } else if (features.complexity < 0.5) {
554
+ if (tierFromModel === 'cheap') score += 0.20;
555
+ else if (tierFromModel === 'free') score += 0.10;
556
+ else if (tierFromModel === 'mid') score += 0.05;
557
+ else if (tierFromModel === 'premium') score -= 0.05;
558
+ } else if (features.complexity <= 0.65) {
559
+ if (tierFromModel === 'mid') score += 0.30;
560
+ else if (tierFromModel === 'cheap') score += 0.10;
561
+ else if (tierFromModel === 'premium') score += 0.10;
562
+ else if (tierFromModel === 'free') score -= 0.20;
563
+ } else {
564
+ if (tierFromModel === 'premium') score += 0.35;
565
+ else if (tierFromModel === 'mid') score += 0.15;
566
+ else if (tierFromModel === 'cheap') score -= 0.15;
567
+ else if (tierFromModel === 'free') score -= 0.30;
568
+ }
569
+
459
570
  // Domain match (reduced for budget models)
460
571
  // Premium models get +0.2 for domain match
461
572
  // Budget/free models get only +0.05 (they lack capability for complex domains)
@@ -561,10 +672,26 @@ export interface RouteDecision {
561
672
  provider_type?: string;
562
673
  }
563
674
 
675
+ // Cost percentile cache for adaptive tier detection (updated on each routeQuery call)
676
+ let _costPercentiles: { p25: number; p50: number; p75: number } = { p25: 0, p50: 0.5, p75: 1.0 };
677
+
678
+ // ============================================================
679
+ // ADAPTIVE SCORING CORE
680
+ // ============================================================
681
+
564
682
  export function routeQuery(prompt: string, available_models?: string[], budget_multiplier: number = 1.0): RouteDecision {
565
683
  // Use cached profiles instead of rebuilding every time (5-10ms savings)
566
684
  const profiles = getModelProfiles();
567
685
 
686
+ // === ADAPTIVE: Compute cost percentiles from available providers ===
687
+ // This makes tier detection dynamic based on what the user has configured
688
+ const allCosts = Object.values(profiles).map(p => (p.cost_per_1k_input + p.cost_per_1k_output) / 2).sort((a, b) => a - b);
689
+ _costPercentiles = {
690
+ p25: allCosts[Math.floor(allCosts.length * 0.25)] || 0,
691
+ p50: allCosts[Math.floor(allCosts.length * 0.50)] || 0.5,
692
+ p75: allCosts[Math.floor(allCosts.length * 0.75)] || 1.0,
693
+ };
694
+
568
695
  const features = extractQueryFeatures(prompt);
569
696
  const candidate_names = available_models || Object.keys(profiles);
570
697
 
@@ -595,11 +722,59 @@ export function routeQuery(prompt: string, available_models?: string[], budget_m
595
722
  };
596
723
  }
597
724
 
598
- // Sort by total score (quality vs cost tradeoff based on complexity)
599
- const complexity_bias = features.complexity > 0.6 ? 0.7 : features.complexity > 0.5 ? 0.45 : 0.3;
725
+ // === ADAPTIVE SCORING: Dynamic tier boundaries based on available providers ===
726
+ // Instead of fixed complexity thresholds, we analyze the actual provider landscape
727
+ // and adjust scoring to make the best use of what's available.
728
+
729
+ // Gather provider statistics for adaptive scoring
730
+ const allProfiles = Object.values(profiles);
731
+ const freeModels = allProfiles.filter(p => (p.cost_per_1k_input + p.cost_per_1k_output) === 0);
732
+ const paidModels = allProfiles.filter(p => (p.cost_per_1k_input + p.cost_per_1k_output) > 0);
733
+ const maxQuality = Math.max(...allProfiles.map(p => p.quality_score), 0.95);
734
+ const minQuality = Math.min(...allProfiles.map(p => p.quality_score), 0.72);
735
+ const qualityRange = maxQuality - minQuality;
736
+
737
+ // Calculate the "value gap" — how much better paid models are than free ones
738
+ const avgFreeQuality = freeModels.length > 0
739
+ ? freeModels.reduce((s, p) => s + p.quality_score, 0) / freeModels.length
740
+ : 0.72;
741
+ const avgPaidQuality = paidModels.length > 0
742
+ ? paidModels.reduce((s, p) => s + p.quality_score, 0) / paidModels.length
743
+ : 0.85;
744
+ const qualityGap = avgPaidQuality - avgFreeQuality;
745
+
746
+ // Adaptive complexity bias:
747
+ // - If quality gap is large (paid models are much better), weight quality more
748
+ // - If quality gap is small (free models are good enough), weight cost more
749
+ // - Scale by complexity: complex queries need quality, simple queries need cost savings
750
+ const baseComplexityBias = features.complexity < 0.3 ? 0.3
751
+ : features.complexity <= 0.5 ? 0.5
752
+ : features.complexity <= 0.65 ? 0.7
753
+ : 0.85;
754
+
755
+ // Adjust bias based on quality gap: bigger gap → more weight on quality
756
+ const gapAdjustment = Math.min(qualityGap * 0.5, 0.15); // max 0.15 adjustment
757
+ const complexity_bias = Math.min(baseComplexityBias + gapAdjustment, 0.9);
758
+ const cost_bias = 1 - complexity_bias;
759
+
760
+ // Adaptive quality floor: scale with complexity and available quality
761
+ // For complex queries, require at least avgPaidQuality if paid models exist
762
+ const adaptiveQualityFloor = features.complexity > 0.5
763
+ ? Math.max(avgPaidQuality - 0.1, 0.75)
764
+ : 0;
765
+
600
766
  const scoreFn = (c: typeof candidates[0]) => c.quality_score * complexity_bias + c.cost_score * (1 - complexity_bias);
601
767
 
602
- const topCandidates = quickselectTopK(candidates, 4, scoreFn);
768
+ let topCandidates = quickselectTopK(candidates, 4, scoreFn);
769
+
770
+ // Adaptive quality floor: for complex queries, prefer models above the floor
771
+ if (adaptiveQualityFloor > 0) {
772
+ const qualified = topCandidates.filter(c => c.quality_score >= adaptiveQualityFloor);
773
+ if (qualified.length > 0) {
774
+ topCandidates = qualified;
775
+ }
776
+ // If no model meets the floor, keep original topCandidates (graceful degradation)
777
+ }
603
778
 
604
779
  const primary = topCandidates[0];
605
780
  const secondary = topCandidates.slice(1, 3);
@@ -23,6 +23,7 @@
23
23
  "src/skills/__tests__",
24
24
  "src/cli/setupWizard.ts",
25
25
  "src/integrations/langchainAdapter.ts",
26
- "src/tui/index.ts"
26
+ "src/tui/index.ts",
27
+ "src/benchmark/comprehensive.ts"
27
28
  ]
28
29
  }