adaptive-memory-multi-model-router 2.14.44 → 2.14.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/data/adaptive-benchmark.json +92 -0
- package/data/benchmark-results.json +47 -0
- package/dist/benchmark/comprehensive.d.ts +56 -0
- package/dist/benchmark/comprehensive.js +390 -0
- package/dist/benchmark/comprehensive.js.map +1 -0
- package/dist/memory/memoryTree.d.ts +15 -2
- package/dist/memory/memoryTree.js +66 -7
- package/dist/memory/memoryTree.js.map +1 -1
- package/dist/providers/providerConfig.js +14 -2
- package/dist/providers/providerConfig.js.map +1 -1
- package/dist/routing/advancedRouter.js +190 -6
- package/dist/routing/advancedRouter.js.map +1 -1
- package/package.json +1 -1
- package/research-state.yaml +32 -0
- package/src/benchmark/comprehensive.ts +323 -0
- package/src/memory/memoryTree.ts +77 -7
- package/src/providers/providerConfig.ts +14 -2
- package/src/routing/advancedRouter.ts +181 -6
- package/tsconfig.build.json +2 -1
|
@@ -46,15 +46,35 @@ function buildModelProfiles(): Record<string, ModelProfile> {
|
|
|
46
46
|
for (const [providerId, provider] of Object.entries(available)) {
|
|
47
47
|
for (const model of provider.models) {
|
|
48
48
|
const modelKey = model.includes('/') ? model : providerId + '/' + model;
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
let costPerKInput = provider.costPerK ? provider.costPerK.input : 0;
|
|
50
|
+
let costPerKOutput = provider.costPerK ? provider.costPerK.output : 0;
|
|
51
|
+
|
|
52
|
+
// OpenRouter: per-model cost overrides for paid models
|
|
53
|
+
if (provider.name === 'OpenRouter') {
|
|
54
|
+
const orCosts: Record<string, [number, number]> = {
|
|
55
|
+
'openai/gpt-4o': [2.5, 10],
|
|
56
|
+
'anthropic/claude-3.5-sonnet': [3, 15],
|
|
57
|
+
'google/gemini-pro-1.5': [1.25, 5],
|
|
58
|
+
'meta-llama/llama-3.1-70b-instruct': [0.18, 0.18],
|
|
59
|
+
'mistralai/mistral-large': [2, 6],
|
|
60
|
+
};
|
|
61
|
+
const orKey = model.includes('/') ? model : 'openrouter/' + model;
|
|
62
|
+
// Try matching by full key or by model name
|
|
63
|
+
for (const [pattern, cost] of Object.entries(orCosts)) {
|
|
64
|
+
if (orKey.includes(pattern) || model.includes(pattern.split('/')[1] || pattern)) {
|
|
65
|
+
costPerKInput = cost[0];
|
|
66
|
+
costPerKOutput = cost[1];
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
51
71
|
|
|
52
72
|
// Assign strengths based on model characteristics
|
|
53
73
|
const strengths: string[] = [];
|
|
54
74
|
if (provider.type === 'cli') {
|
|
55
75
|
strengths.push('free', 'local');
|
|
56
76
|
}
|
|
57
|
-
if (costPerKInput < 0.3) {
|
|
77
|
+
if (costPerKInput < 0.3 && provider.name !== 'OpenRouter') {
|
|
58
78
|
strengths.push('budget', 'fast');
|
|
59
79
|
} else if (costPerKInput > 2) {
|
|
60
80
|
strengths.push('premium', 'reasoning');
|
|
@@ -62,6 +82,29 @@ function buildModelProfiles(): Record<string, ModelProfile> {
|
|
|
62
82
|
if (provider.name === 'Mistral' || provider.name === 'Groq' || provider.name === 'Cerebras') {
|
|
63
83
|
strengths.push('fast', 'coding');
|
|
64
84
|
}
|
|
85
|
+
// OpenRouter premium free models get quality boosts
|
|
86
|
+
if (provider.name === 'OpenRouter') {
|
|
87
|
+
const modelLower = modelKey.toLowerCase();
|
|
88
|
+
// Premium-tier free models (large, high-context)
|
|
89
|
+
if (modelLower.includes('kimi') || modelLower.includes('qwen3-coder') ||
|
|
90
|
+
modelLower.includes('nemotron-3-ultra') || modelLower.includes('nemotron-3-super') ||
|
|
91
|
+
modelLower.includes('hermes-3') || modelLower.includes('gemma-4')) {
|
|
92
|
+
strengths.push('reasoning', 'long-context', 'premium');
|
|
93
|
+
}
|
|
94
|
+
// Mid-tier free models (good quality, smaller)
|
|
95
|
+
else if (modelLower.includes('gpt-oss') || modelLower.includes('qwen3-next') ||
|
|
96
|
+
modelLower.includes('gemma-4') || modelLower.includes('llama-3.3')) {
|
|
97
|
+
strengths.push('fast', 'reasoning');
|
|
98
|
+
}
|
|
99
|
+
// Budget free models
|
|
100
|
+
else {
|
|
101
|
+
strengths.push('fast');
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// Minimax gets a quality boost (capable model, cheap pricing)
|
|
105
|
+
if (provider.name === 'MiniMax') {
|
|
106
|
+
strengths.push('fast', 'reasoning');
|
|
107
|
+
}
|
|
65
108
|
if (provider.name === 'CommandCode') {
|
|
66
109
|
strengths.push('code-aware', 'context-rich');
|
|
67
110
|
}
|
|
@@ -407,6 +450,30 @@ export function extractQueryFeatures(prompt: string): QueryFeatures {
|
|
|
407
450
|
const isDevops = /docker|kubernetes|terraform|ansible|ci.cd|pipeline|sql|nosql|database|sqlserver|mysql|postgres|deploy|container|orchestrat/i.test(lower);
|
|
408
451
|
const isMultimodal = /image|video|audio|generate.*picture|generate.*image|transcribe|voice/i.test(lower);
|
|
409
452
|
|
|
453
|
+
// === ARCHITECTURAL COMPLEXITY SIGNALS ===
|
|
454
|
+
// Boost complexity for system design / architecture queries
|
|
455
|
+
// These indicate the query needs a capable model
|
|
456
|
+
const archPatterns = [
|
|
457
|
+
/architect\s+(a\s+)?(distributed|real-time|high-availability|fault-tolerant|scalable|multi-region|global)/i,
|
|
458
|
+
/design\s+(a\s+)?(distributed|real-time|high-availability|fault-tolerant|scalable|multi-region|global)\s+(system|architecture|platform|infrastructure)/i,
|
|
459
|
+
/design\s+a\s+system\s+that\s+handles/i,
|
|
460
|
+
/data\s+warehouse\s+architecture/i,
|
|
461
|
+
/security\s+architecture\s+for/i,
|
|
462
|
+
/multi-cloud\s+hybrid/i,
|
|
463
|
+
/disaster\s+recovery\s+strategy/i,
|
|
464
|
+
/zero-downtime\s+deployment/i,
|
|
465
|
+
/petabyte-scale/i,
|
|
466
|
+
/billion\s+events?/i,
|
|
467
|
+
/million\s+transactions?/i,
|
|
468
|
+
/sensor\s+fusion/i,
|
|
469
|
+
/autonomous\s+vehicle/i,
|
|
470
|
+
/fraud\s+detection\s+system/i,
|
|
471
|
+
/privacy-preserving\s+analytics/i,
|
|
472
|
+
/real-time\s+(anomaly|fraud|video)\s+detection/i,
|
|
473
|
+
];
|
|
474
|
+
const archMatches = archPatterns.filter(p => p.test(prompt)).length;
|
|
475
|
+
if (archMatches > 0) complexity += 0.25 + (archMatches * 0.08);
|
|
476
|
+
|
|
410
477
|
// Cap at 1.0
|
|
411
478
|
complexity = Math.min(complexity, 1.0);
|
|
412
479
|
|
|
@@ -456,6 +523,50 @@ export function extractQueryFeatures(prompt: string): QueryFeatures {
|
|
|
456
523
|
function scoreModelFit(model: ModelProfile, features: QueryFeatures): number {
|
|
457
524
|
let score = model.quality_score * 0.6;
|
|
458
525
|
|
|
526
|
+
// === ADAPTIVE TIER DETECTION ===
|
|
527
|
+
// Uses cost percentiles computed from actually available providers.
|
|
528
|
+
// If user has only free models → everything is "free" tier.
|
|
529
|
+
// If user has free + groq + openai → quartiles split them naturally.
|
|
530
|
+
const modelCost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
|
|
531
|
+
let tierFromModel: string;
|
|
532
|
+
if (model.strengths.includes('free') || modelCost === 0) {
|
|
533
|
+
tierFromModel = 'free';
|
|
534
|
+
} else if (modelCost <= _costPercentiles.p25) {
|
|
535
|
+
tierFromModel = 'cheap';
|
|
536
|
+
} else if (modelCost <= _costPercentiles.p75) {
|
|
537
|
+
tierFromModel = 'mid';
|
|
538
|
+
} else {
|
|
539
|
+
tierFromModel = 'premium';
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// === ADAPTIVE TIER SCORING ===
|
|
543
|
+
// Boost/penalty scales with how well the model's tier matches the query complexity
|
|
544
|
+
// Simple queries (0-0.3): strongly prefer free/cheap
|
|
545
|
+
// Medium queries (0.3-0.5): prefer cheap/mid
|
|
546
|
+
// Complex queries (0.5-0.65): prefer mid
|
|
547
|
+
// Very complex (0.65+): prefer premium/mid
|
|
548
|
+
if (features.complexity < 0.3) {
|
|
549
|
+
if (tierFromModel === 'free') score += 0.20;
|
|
550
|
+
else if (tierFromModel === 'cheap') score += 0.10;
|
|
551
|
+
else if (tierFromModel === 'mid') score -= 0.05;
|
|
552
|
+
else if (tierFromModel === 'premium') score -= 0.15;
|
|
553
|
+
} else if (features.complexity < 0.5) {
|
|
554
|
+
if (tierFromModel === 'cheap') score += 0.20;
|
|
555
|
+
else if (tierFromModel === 'free') score += 0.10;
|
|
556
|
+
else if (tierFromModel === 'mid') score += 0.05;
|
|
557
|
+
else if (tierFromModel === 'premium') score -= 0.05;
|
|
558
|
+
} else if (features.complexity <= 0.65) {
|
|
559
|
+
if (tierFromModel === 'mid') score += 0.30;
|
|
560
|
+
else if (tierFromModel === 'cheap') score += 0.10;
|
|
561
|
+
else if (tierFromModel === 'premium') score += 0.10;
|
|
562
|
+
else if (tierFromModel === 'free') score -= 0.20;
|
|
563
|
+
} else {
|
|
564
|
+
if (tierFromModel === 'premium') score += 0.35;
|
|
565
|
+
else if (tierFromModel === 'mid') score += 0.15;
|
|
566
|
+
else if (tierFromModel === 'cheap') score -= 0.15;
|
|
567
|
+
else if (tierFromModel === 'free') score -= 0.30;
|
|
568
|
+
}
|
|
569
|
+
|
|
459
570
|
// Domain match (reduced for budget models)
|
|
460
571
|
// Premium models get +0.2 for domain match
|
|
461
572
|
// Budget/free models get only +0.05 (they lack capability for complex domains)
|
|
@@ -561,10 +672,26 @@ export interface RouteDecision {
|
|
|
561
672
|
provider_type?: string;
|
|
562
673
|
}
|
|
563
674
|
|
|
675
|
+
// Cost percentile cache for adaptive tier detection (updated on each routeQuery call)
|
|
676
|
+
let _costPercentiles: { p25: number; p50: number; p75: number } = { p25: 0, p50: 0.5, p75: 1.0 };
|
|
677
|
+
|
|
678
|
+
// ============================================================
|
|
679
|
+
// ADAPTIVE SCORING CORE
|
|
680
|
+
// ============================================================
|
|
681
|
+
|
|
564
682
|
export function routeQuery(prompt: string, available_models?: string[], budget_multiplier: number = 1.0): RouteDecision {
|
|
565
683
|
// Use cached profiles instead of rebuilding every time (5-10ms savings)
|
|
566
684
|
const profiles = getModelProfiles();
|
|
567
685
|
|
|
686
|
+
// === ADAPTIVE: Compute cost percentiles from available providers ===
|
|
687
|
+
// This makes tier detection dynamic based on what the user has configured
|
|
688
|
+
const allCosts = Object.values(profiles).map(p => (p.cost_per_1k_input + p.cost_per_1k_output) / 2).sort((a, b) => a - b);
|
|
689
|
+
_costPercentiles = {
|
|
690
|
+
p25: allCosts[Math.floor(allCosts.length * 0.25)] || 0,
|
|
691
|
+
p50: allCosts[Math.floor(allCosts.length * 0.50)] || 0.5,
|
|
692
|
+
p75: allCosts[Math.floor(allCosts.length * 0.75)] || 1.0,
|
|
693
|
+
};
|
|
694
|
+
|
|
568
695
|
const features = extractQueryFeatures(prompt);
|
|
569
696
|
const candidate_names = available_models || Object.keys(profiles);
|
|
570
697
|
|
|
@@ -595,11 +722,59 @@ export function routeQuery(prompt: string, available_models?: string[], budget_m
|
|
|
595
722
|
};
|
|
596
723
|
}
|
|
597
724
|
|
|
598
|
-
//
|
|
599
|
-
|
|
725
|
+
// === ADAPTIVE SCORING: Dynamic tier boundaries based on available providers ===
|
|
726
|
+
// Instead of fixed complexity thresholds, we analyze the actual provider landscape
|
|
727
|
+
// and adjust scoring to make the best use of what's available.
|
|
728
|
+
|
|
729
|
+
// Gather provider statistics for adaptive scoring
|
|
730
|
+
const allProfiles = Object.values(profiles);
|
|
731
|
+
const freeModels = allProfiles.filter(p => (p.cost_per_1k_input + p.cost_per_1k_output) === 0);
|
|
732
|
+
const paidModels = allProfiles.filter(p => (p.cost_per_1k_input + p.cost_per_1k_output) > 0);
|
|
733
|
+
const maxQuality = Math.max(...allProfiles.map(p => p.quality_score), 0.95);
|
|
734
|
+
const minQuality = Math.min(...allProfiles.map(p => p.quality_score), 0.72);
|
|
735
|
+
const qualityRange = maxQuality - minQuality;
|
|
736
|
+
|
|
737
|
+
// Calculate the "value gap" — how much better paid models are than free ones
|
|
738
|
+
const avgFreeQuality = freeModels.length > 0
|
|
739
|
+
? freeModels.reduce((s, p) => s + p.quality_score, 0) / freeModels.length
|
|
740
|
+
: 0.72;
|
|
741
|
+
const avgPaidQuality = paidModels.length > 0
|
|
742
|
+
? paidModels.reduce((s, p) => s + p.quality_score, 0) / paidModels.length
|
|
743
|
+
: 0.85;
|
|
744
|
+
const qualityGap = avgPaidQuality - avgFreeQuality;
|
|
745
|
+
|
|
746
|
+
// Adaptive complexity bias:
|
|
747
|
+
// - If quality gap is large (paid models are much better), weight quality more
|
|
748
|
+
// - If quality gap is small (free models are good enough), weight cost more
|
|
749
|
+
// - Scale by complexity: complex queries need quality, simple queries need cost savings
|
|
750
|
+
const baseComplexityBias = features.complexity < 0.3 ? 0.3
|
|
751
|
+
: features.complexity <= 0.5 ? 0.5
|
|
752
|
+
: features.complexity <= 0.65 ? 0.7
|
|
753
|
+
: 0.85;
|
|
754
|
+
|
|
755
|
+
// Adjust bias based on quality gap: bigger gap → more weight on quality
|
|
756
|
+
const gapAdjustment = Math.min(qualityGap * 0.5, 0.15); // max 0.15 adjustment
|
|
757
|
+
const complexity_bias = Math.min(baseComplexityBias + gapAdjustment, 0.9);
|
|
758
|
+
const cost_bias = 1 - complexity_bias;
|
|
759
|
+
|
|
760
|
+
// Adaptive quality floor: scale with complexity and available quality
|
|
761
|
+
// For complex queries, require at least avgPaidQuality if paid models exist
|
|
762
|
+
const adaptiveQualityFloor = features.complexity > 0.5
|
|
763
|
+
? Math.max(avgPaidQuality - 0.1, 0.75)
|
|
764
|
+
: 0;
|
|
765
|
+
|
|
600
766
|
const scoreFn = (c: typeof candidates[0]) => c.quality_score * complexity_bias + c.cost_score * (1 - complexity_bias);
|
|
601
767
|
|
|
602
|
-
|
|
768
|
+
let topCandidates = quickselectTopK(candidates, 4, scoreFn);
|
|
769
|
+
|
|
770
|
+
// Adaptive quality floor: for complex queries, prefer models above the floor
|
|
771
|
+
if (adaptiveQualityFloor > 0) {
|
|
772
|
+
const qualified = topCandidates.filter(c => c.quality_score >= adaptiveQualityFloor);
|
|
773
|
+
if (qualified.length > 0) {
|
|
774
|
+
topCandidates = qualified;
|
|
775
|
+
}
|
|
776
|
+
// If no model meets the floor, keep original topCandidates (graceful degradation)
|
|
777
|
+
}
|
|
603
778
|
|
|
604
779
|
const primary = topCandidates[0];
|
|
605
780
|
const secondary = topCandidates.slice(1, 3);
|