adaptive-memory-multi-model-router 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +146 -66
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/integrations/airtable.js +20 -0
- package/dist/integrations/discord.js +18 -0
- package/dist/integrations/github.js +23 -0
- package/dist/integrations/gmail.js +19 -0
- package/dist/integrations/google-calendar.js +18 -0
- package/dist/integrations/index.js +61 -0
- package/dist/integrations/jira.js +21 -0
- package/dist/integrations/linear.js +19 -0
- package/dist/integrations/notion.js +19 -0
- package/dist/integrations/slack.js +18 -0
- package/dist/integrations/telegram.js +19 -0
- package/dist/providers/registry.js +7 -3
- package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
- package/docs/CONFIGURATION.md +476 -0
- package/docs/COUNCIL_DECISION.json +308 -0
- package/docs/COUNCIL_SUMMARY.md +265 -0
- package/docs/COUNCIL_V2.2_DECISION.md +416 -0
- package/docs/IMPROVEMENT_ROADMAP.md +515 -0
- package/docs/LLM_COUNCIL_DECISION.md +508 -0
- package/docs/QUICK_START_VISIBILITY.md +782 -0
- package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
- package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
- package/docs/TMLPD_QNA.md +751 -0
- package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
- package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
- package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
- package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
- package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
- package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
- package/docs/launch-content/README.md +457 -0
- package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
- package/docs/launch-content/assets/cumulative_savings.png +0 -0
- package/docs/launch-content/assets/parallel_speedup.png +0 -0
- package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
- package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
- package/docs/launch-content/generate_charts.py +313 -0
- package/docs/launch-content/hn_show_post.md +139 -0
- package/docs/launch-content/partner_outreach_templates.md +745 -0
- package/docs/launch-content/reddit_posts.md +467 -0
- package/docs/launch-content/twitter_thread.txt +460 -0
- package/examples/QUICKSTART.md +1 -1
- package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
- package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
- package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
- package/openclaw-alexa-bridge/test_fixes.js +77 -0
- package/package.json +120 -29
- package/package.json.tmp +0 -0
- package/qna/TMLPD_QNA.md +3 -3
- package/skill/SKILL.md +2 -2
- package/src/__tests__/integration/tmpld_integration.test.py +540 -0
- package/src/agents/skill_enhanced_agent.py +318 -0
- package/src/memory/__init__.py +15 -0
- package/src/memory/agentic_memory.py +353 -0
- package/src/memory/semantic_memory.py +444 -0
- package/src/memory/simple_memory.py +466 -0
- package/src/memory/working_memory.py +447 -0
- package/src/orchestration/__init__.py +52 -0
- package/src/orchestration/execution_engine.py +353 -0
- package/src/orchestration/halo_orchestrator.py +367 -0
- package/src/orchestration/mcts_workflow.py +498 -0
- package/src/orchestration/role_assigner.py +473 -0
- package/src/orchestration/task_planner.py +522 -0
- package/src/providers/__init__.py +67 -0
- package/src/providers/anthropic.py +304 -0
- package/src/providers/base.py +241 -0
- package/src/providers/cerebras.py +373 -0
- package/src/providers/registry.py +476 -0
- package/src/routing/__init__.py +30 -0
- package/src/routing/universal_router.py +621 -0
- package/src/skills/TMLPD-QUICKREF.md +210 -0
- package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
- package/src/skills/TMLPD.md +540 -0
- package/src/skills/__tests__/skill_manager.test.ts +328 -0
- package/src/skills/skill_manager.py +385 -0
- package/src/skills/test-tmlpd.sh +108 -0
- package/src/skills/tmlpd-category.yaml +67 -0
- package/src/skills/tmlpd-monitoring.yaml +188 -0
- package/src/skills/tmlpd-phase.yaml +132 -0
- package/src/state/__init__.py +17 -0
- package/src/state/simple_checkpoint.py +508 -0
- package/src/tmlpd_agent.py +464 -0
- package/src/tmpld_v2.py +427 -0
- package/src/workflows/__init__.py +18 -0
- package/src/workflows/advanced_difficulty_classifier.py +377 -0
- package/src/workflows/chaining_executor.py +417 -0
- package/src/workflows/difficulty_integration.py +209 -0
- package/src/workflows/orchestrator.py +469 -0
- package/src/workflows/orchestrator_executor.py +456 -0
- package/src/workflows/parallelization_executor.py +382 -0
- package/src/workflows/router.py +311 -0
- package/test_integration_simple.py +86 -0
- package/test_mcts_workflow.py +150 -0
- package/test_templd_integration.py +262 -0
- package/test_universal_router.py +275 -0
- package/tmlpd-pi-extension/README.md +36 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cli.js +59 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
- package/tmlpd-pi-extension/dist/index.d.ts +723 -0
- package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/index.js +239 -0
- package/tmlpd-pi-extension/dist/index.js.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
- package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
- package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
- package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
- package/tmlpd-pi-extension/package-lock.json +75 -0
- package/tmlpd-pi-extension/package.json +172 -0
- package/tmlpd-pi-extension/python/examples.py +53 -0
- package/tmlpd-pi-extension/python/integrations.py +330 -0
- package/tmlpd-pi-extension/python/setup.py +28 -0
- package/tmlpd-pi-extension/python/tmlpd.py +369 -0
- package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
- package/tmlpd-pi-extension/skill/SKILL.md +238 -0
- package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
- package/tmlpd-pi-extension/tsconfig.json +18 -0
- package/demo/research-demo.js +0 -266
- package/notebooks/quickstart.ipynb +0 -157
- package/rust/tmlpd.h +0 -268
- package/src/cache/prefixCache.ts +0 -365
- package/src/routing/advancedRouter.ts +0 -406
- package/src/utils/speculativeDecoding.ts +0 -344
- /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
|
@@ -1,406 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* TMLPD Advanced Routing - RouteLLM Style
|
|
3
|
-
*
|
|
4
|
-
* Learned routing based on arXiv:2404.06035 (RouteLLM)
|
|
5
|
-
* Balances cost-quality tradeoff with confidence-based model selection
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { countTokens, estimateCost } from "../utils/tokenUtils";
|
|
9
|
-
|
|
10
|
-
export interface QueryFeatures {
|
|
11
|
-
complexity: number; // 0-1 estimated complexity
|
|
12
|
-
length: number; // token count
|
|
13
|
-
has_code: boolean; // contains code patterns
|
|
14
|
-
has_math: boolean; // contains math/equations
|
|
15
|
-
is_multilingual: boolean; // non-English keywords
|
|
16
|
-
is_creative: boolean; // creative writing detected
|
|
17
|
-
requires_reasoning: boolean; // chain-of-thought needed
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export interface ModelProfile {
|
|
21
|
-
name: string;
|
|
22
|
-
provider: string;
|
|
23
|
-
cost_per_1k_input: number;
|
|
24
|
-
cost_per_1k_output: number;
|
|
25
|
-
latency_ms: number;
|
|
26
|
-
quality_score: number; // relative quality 0-1
|
|
27
|
-
strengths: string[]; // ["coding", "reasoning", "creative"]
|
|
28
|
-
context_window: number;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface RouteDecision {
|
|
32
|
-
primary_model: string;
|
|
33
|
-
fallback_models: string[];
|
|
34
|
-
confidence: number; // 0-1 confidence in routing
|
|
35
|
-
reasoning: string;
|
|
36
|
-
estimated_cost: number;
|
|
37
|
-
estimated_latency_ms: number;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Pre-configured model profiles
|
|
41
|
-
export const MODEL_PROFILES: Record<string, ModelProfile> = {
|
|
42
|
-
"openai/gpt-4o": {
|
|
43
|
-
name: "openai/gpt-4o",
|
|
44
|
-
provider: "openai",
|
|
45
|
-
cost_per_1k_input: 2.50,
|
|
46
|
-
cost_per_1k_output: 10.00,
|
|
47
|
-
latency_ms: 2000,
|
|
48
|
-
quality_score: 0.95,
|
|
49
|
-
strengths: ["reasoning", "coding", "analysis"],
|
|
50
|
-
context_window: 128000
|
|
51
|
-
},
|
|
52
|
-
"openai/gpt-4o-mini": {
|
|
53
|
-
name: "openai/gpt-4o-mini",
|
|
54
|
-
provider: "openai",
|
|
55
|
-
cost_per_1k_input: 0.15,
|
|
56
|
-
cost_per_1k_output: 0.60,
|
|
57
|
-
latency_ms: 500,
|
|
58
|
-
quality_score: 0.85,
|
|
59
|
-
strengths: ["fast", "coding"],
|
|
60
|
-
context_window: 128000
|
|
61
|
-
},
|
|
62
|
-
"anthropic/claude-3.5-sonnet": {
|
|
63
|
-
name: "anthropic/claude-3.5-sonnet",
|
|
64
|
-
provider: "anthropic",
|
|
65
|
-
cost_per_1k_input: 3.00,
|
|
66
|
-
cost_per_1k_output: 15.00,
|
|
67
|
-
latency_ms: 2500,
|
|
68
|
-
quality_score: 0.96,
|
|
69
|
-
strengths: ["reasoning", "creative", "analysis"],
|
|
70
|
-
context_window: 200000
|
|
71
|
-
},
|
|
72
|
-
"anthropic/claude-3-haiku": {
|
|
73
|
-
name: "anthropic/claude-3-haiku",
|
|
74
|
-
provider: "anthropic",
|
|
75
|
-
cost_per_1k_input: 0.25,
|
|
76
|
-
cost_per_1k_output: 1.25,
|
|
77
|
-
latency_ms: 500,
|
|
78
|
-
quality_score: 0.80,
|
|
79
|
-
strengths: ["fast", "simple"],
|
|
80
|
-
context_window: 200000
|
|
81
|
-
},
|
|
82
|
-
"google/gemini-2.0-flash": {
|
|
83
|
-
name: "google/gemini-2.0-flash",
|
|
84
|
-
provider: "google",
|
|
85
|
-
cost_per_1k_input: 0.00, // Free
|
|
86
|
-
cost_per_1k_output: 0.00,
|
|
87
|
-
latency_ms: 800,
|
|
88
|
-
quality_score: 0.88,
|
|
89
|
-
strengths: ["fast", "multilingual"],
|
|
90
|
-
context_window: 1000000
|
|
91
|
-
},
|
|
92
|
-
"google/gemini-1.5-pro": {
|
|
93
|
-
name: "google/gemini-1.5-pro",
|
|
94
|
-
provider: "google",
|
|
95
|
-
cost_per_1k_input: 1.25,
|
|
96
|
-
cost_per_1k_output: 5.00,
|
|
97
|
-
latency_ms: 1500,
|
|
98
|
-
quality_score: 0.92,
|
|
99
|
-
strengths: ["reasoning", "long-context"],
|
|
100
|
-
context_window: 2000000
|
|
101
|
-
},
|
|
102
|
-
"groq/llama-3.3-70b": {
|
|
103
|
-
name: "groq/llama-3.3-70b",
|
|
104
|
-
provider: "groq",
|
|
105
|
-
cost_per_1k_input: 0.59,
|
|
106
|
-
cost_per_1k_output: 0.79,
|
|
107
|
-
latency_ms: 400,
|
|
108
|
-
quality_score: 0.82,
|
|
109
|
-
strengths: ["fast", "coding"],
|
|
110
|
-
context_window: 128000
|
|
111
|
-
},
|
|
112
|
-
"cerebras/llama-3.3-70b": {
|
|
113
|
-
name: "cerebras/llama-3.3-70b",
|
|
114
|
-
provider: "cerebras",
|
|
115
|
-
cost_per_1k_input: 0.60,
|
|
116
|
-
cost_per_1k_output: 0.60,
|
|
117
|
-
latency_ms: 350,
|
|
118
|
-
quality_score: 0.82,
|
|
119
|
-
strengths: ["fast", "budget"],
|
|
120
|
-
context_window: 128000
|
|
121
|
-
},
|
|
122
|
-
"local/llama-3.3-70b": {
|
|
123
|
-
name: "local/llama-3.3-70b",
|
|
124
|
-
provider: "ollama",
|
|
125
|
-
cost_per_1k_input: 0.00,
|
|
126
|
-
cost_per_1k_output: 0.00,
|
|
127
|
-
latency_ms: 100,
|
|
128
|
-
quality_score: 0.75,
|
|
129
|
-
strengths: ["privacy", "free"],
|
|
130
|
-
context_window: 128000
|
|
131
|
-
}
|
|
132
|
-
};
|
|
133
|
-
|
|
134
|
-
/**
|
|
135
|
-
* Extract features from prompt for routing decision
|
|
136
|
-
*/
|
|
137
|
-
export function extractQueryFeatures(prompt: string): QueryFeatures {
|
|
138
|
-
const lower = prompt.toLowerCase();
|
|
139
|
-
|
|
140
|
-
// Code patterns
|
|
141
|
-
const code_indicators = [
|
|
142
|
-
"function", "class ", "def ", "import ", "const ", "let ",
|
|
143
|
-
"python", "javascript", "typescript", "java", "cpp", "rust",
|
|
144
|
-
"```", "=>", "->", "async", "await"
|
|
145
|
-
];
|
|
146
|
-
const has_code = code_indicators.some(pattern => lower.includes(pattern));
|
|
147
|
-
|
|
148
|
-
// Math patterns (expanded for unicode and common notation)
|
|
149
|
-
const math_indicators = [
|
|
150
|
-
"equation", "formula", "calculate", "sqrt", "^", "log",
|
|
151
|
-
"sin", "cos", "tan", "integral", "derivative", "$", "math",
|
|
152
|
-
"∫", "∂", "∑", "∏", "√", "∞", "π", "θ", "β",
|
|
153
|
-
"dx", "dy", "dz", "=", "solver", "compute"
|
|
154
|
-
];
|
|
155
|
-
const has_math = math_indicators.some(pattern => prompt.includes(pattern));
|
|
156
|
-
|
|
157
|
-
// Multilingual
|
|
158
|
-
const lang_patterns = [
|
|
159
|
-
/[\u4e00-\u9fff]/, // Chinese
|
|
160
|
-
/[\u3040-\u309f\u30a0-\u30ff]/, // Japanese
|
|
161
|
-
/[\uac00-\ud7af]/, // Korean
|
|
162
|
-
/[а-яА-Я]/, // Russian
|
|
163
|
-
/[áéíóúñ]/ // Spanish accented
|
|
164
|
-
];
|
|
165
|
-
const is_multilingual = lang_patterns.some(pattern => pattern.test(prompt));
|
|
166
|
-
|
|
167
|
-
// Creative writing
|
|
168
|
-
const creative_indicators = [
|
|
169
|
-
"write a", "story", "poem", "creative", "imagine",
|
|
170
|
-
"describe", "explain in", "tell me", "narrative"
|
|
171
|
-
];
|
|
172
|
-
const is_creative = creative_indicators.some(pattern => lower.includes(pattern));
|
|
173
|
-
|
|
174
|
-
// Reasoning
|
|
175
|
-
const reasoning_indicators = [
|
|
176
|
-
"explain", "why", "because", "therefore", "thus",
|
|
177
|
-
"analyze", "think", "consider", "reason", "logic"
|
|
178
|
-
];
|
|
179
|
-
const requires_reasoning = reasoning_indicators.some(pattern => lower.includes(pattern));
|
|
180
|
-
|
|
181
|
-
// Complexity estimation based on length and patterns
|
|
182
|
-
const tokens = countTokens(prompt, "gpt-4o");
|
|
183
|
-
let complexity = 0.3;
|
|
184
|
-
|
|
185
|
-
if (tokens > 1000) complexity += 0.2;
|
|
186
|
-
if (has_code) complexity += 0.15;
|
|
187
|
-
if (has_math) complexity += 0.2;
|
|
188
|
-
if (requires_reasoning) complexity += 0.15;
|
|
189
|
-
if (is_creative) complexity += 0.1;
|
|
190
|
-
|
|
191
|
-
complexity = Math.min(1.0, complexity);
|
|
192
|
-
|
|
193
|
-
return {
|
|
194
|
-
complexity,
|
|
195
|
-
length: tokens,
|
|
196
|
-
has_code,
|
|
197
|
-
has_math,
|
|
198
|
-
is_multilingual,
|
|
199
|
-
is_creative,
|
|
200
|
-
requires_reasoning
|
|
201
|
-
};
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
/**
|
|
205
|
-
* Score model fit for query
|
|
206
|
-
*/
|
|
207
|
-
function scoreModelFit(model: ModelProfile, features: QueryFeatures): number {
|
|
208
|
-
let score = model.quality_score * 0.4; // Base quality
|
|
209
|
-
|
|
210
|
-
// Strengths matching
|
|
211
|
-
if (features.has_code && model.strengths.includes("coding")) {
|
|
212
|
-
score += 0.2;
|
|
213
|
-
}
|
|
214
|
-
if (features.requires_reasoning && model.strengths.includes("reasoning")) {
|
|
215
|
-
score += 0.2;
|
|
216
|
-
}
|
|
217
|
-
if (features.is_creative && model.strengths.includes("creative")) {
|
|
218
|
-
score += 0.15;
|
|
219
|
-
}
|
|
220
|
-
if (features.is_multilingual && model.strengths.includes("multilingual")) {
|
|
221
|
-
score += 0.15;
|
|
222
|
-
}
|
|
223
|
-
if (features.has_math && model.strengths.includes("analysis")) {
|
|
224
|
-
score += 0.15;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
// Speed bonus for simple tasks
|
|
228
|
-
if (features.complexity < 0.4 && model.latency_ms < 1000) {
|
|
229
|
-
score += 0.1;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
return score;
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
/**
|
|
236
|
-
* Cost efficiency score (inverse of normalized cost)
|
|
237
|
-
*/
|
|
238
|
-
function costEfficiency(model: ModelProfile, features: QueryFeatures): number {
|
|
239
|
-
const avg_cost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
|
|
240
|
-
|
|
241
|
-
// For simple tasks, prioritize cost efficiency
|
|
242
|
-
if (features.complexity < 0.5) {
|
|
243
|
-
return (1 - Math.min(avg_cost / 10, 1)) * 0.6;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
// For complex tasks, deprioritize cost
|
|
247
|
-
return (1 - Math.min(avg_cost / 10, 1)) * 0.2;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
/**
|
|
251
|
-
* RouteLLM-style learned routing decision
|
|
252
|
-
*/
|
|
253
|
-
export function routeQuery(
|
|
254
|
-
prompt: string,
|
|
255
|
-
available_models?: string[],
|
|
256
|
-
budget_multiplier: number = 1.0
|
|
257
|
-
): RouteDecision {
|
|
258
|
-
const features = extractQueryFeatures(prompt);
|
|
259
|
-
const candidate_names = available_models || Object.keys(MODEL_PROFILES);
|
|
260
|
-
|
|
261
|
-
// Score all candidates
|
|
262
|
-
interface ScoredModel {
|
|
263
|
-
name: string;
|
|
264
|
-
profile: ModelProfile;
|
|
265
|
-
quality_score: number;
|
|
266
|
-
cost_score: number;
|
|
267
|
-
total_score: number;
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
const candidates: ScoredModel[] = candidate_names
|
|
271
|
-
.filter(name => MODEL_PROFILES[name])
|
|
272
|
-
.map(name => {
|
|
273
|
-
const profile = MODEL_PROFILES[name]!;
|
|
274
|
-
const quality = scoreModelFit(profile, features);
|
|
275
|
-
const cost = costEfficiency(profile, features);
|
|
276
|
-
return {
|
|
277
|
-
name,
|
|
278
|
-
profile,
|
|
279
|
-
quality_score: quality,
|
|
280
|
-
cost_score: cost,
|
|
281
|
-
total_score: quality + cost
|
|
282
|
-
};
|
|
283
|
-
});
|
|
284
|
-
|
|
285
|
-
// Sort by total score (quality vs cost tradeoff based on complexity)
|
|
286
|
-
const complexity_bias = features.complexity > 0.6 ? 0.7 : 0.3; // High complexity = quality bias
|
|
287
|
-
candidates.sort((a, b) => {
|
|
288
|
-
const score_a = a.quality_score * complexity_bias + a.cost_score * (1 - complexity_bias);
|
|
289
|
-
const score_b = b.quality_score * complexity_bias + b.cost_score * (1 - complexity_bias);
|
|
290
|
-
return score_b - score_a;
|
|
291
|
-
});
|
|
292
|
-
|
|
293
|
-
const primary = candidates[0];
|
|
294
|
-
const secondary = candidates.slice(1, 3);
|
|
295
|
-
|
|
296
|
-
// Calculate confidence based on score gap
|
|
297
|
-
let confidence = 0.5;
|
|
298
|
-
if (candidates.length > 1) {
|
|
299
|
-
const gap = primary.total_score - candidates[1].total_score;
|
|
300
|
-
confidence = Math.min(0.95, 0.5 + gap * 2);
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// Build reasoning
|
|
304
|
-
const reasons: string[] = [];
|
|
305
|
-
if (features.has_code) reasons.push("code detected");
|
|
306
|
-
if (features.requires_reasoning) reasons.push("reasoning needed");
|
|
307
|
-
if (features.complexity > 0.6) reasons.push("high complexity");
|
|
308
|
-
if (features.is_multilingual) reasons.push("multilingual");
|
|
309
|
-
|
|
310
|
-
const estimated_tokens = features.length * 1.5; // rough completion estimate
|
|
311
|
-
const estimated_cost = estimateCost(features.length, estimated_tokens, primary.name);
|
|
312
|
-
|
|
313
|
-
return {
|
|
314
|
-
primary_model: primary.name,
|
|
315
|
-
fallback_models: secondary.map(c => c.name),
|
|
316
|
-
confidence,
|
|
317
|
-
reasoning: `Selected ${primary.profile.provider}/${primary.name.split("/")[1]} for ${reasons.join(", ") || "general query"}`,
|
|
318
|
-
estimated_cost: estimated_cost * budget_multiplier,
|
|
319
|
-
estimated_latency_ms: primary.profile.latency_ms
|
|
320
|
-
};
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
/**
|
|
324
|
-
* Batch routing for multiple prompts
|
|
325
|
-
*/
|
|
326
|
-
export function routeBatch(
|
|
327
|
-
prompts: string[],
|
|
328
|
-
options?: {
|
|
329
|
-
same_model?: boolean;
|
|
330
|
-
max_cost_per_prompt?: number;
|
|
331
|
-
balance_cost?: boolean;
|
|
332
|
-
}
|
|
333
|
-
): RouteDecision[] {
|
|
334
|
-
const decisions = prompts.map(p => routeQuery(p));
|
|
335
|
-
|
|
336
|
-
if (options?.same_model && decisions.length > 0) {
|
|
337
|
-
// Use first decision's model for all (for batch consistency)
|
|
338
|
-
const primary_model = decisions[0].primary_model;
|
|
339
|
-
decisions.forEach(d => {
|
|
340
|
-
d.primary_model = primary_model;
|
|
341
|
-
d.fallback_models = decisions[0].fallback_models;
|
|
342
|
-
});
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
if (options?.max_cost_per_prompt) {
|
|
346
|
-
decisions.forEach(d => {
|
|
347
|
-
if (d.estimated_cost > options.max_cost_per_prompt!) {
|
|
348
|
-
// Route to cheaper alternative
|
|
349
|
-
const cheap = Object.entries(MODEL_PROFILES)
|
|
350
|
-
.find(([name, p]) => p.cost_per_1k_input < 0.5);
|
|
351
|
-
if (cheap) {
|
|
352
|
-
d.primary_model = cheap[0];
|
|
353
|
-
d.reasoning = `Budget-limited routing to ${cheap[1].provider}`;
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
});
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
return decisions;
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
/**
|
|
363
|
-
* Get model recommendation for task type
|
|
364
|
-
*/
|
|
365
|
-
export function recommendForTask(task: string): string[] {
|
|
366
|
-
const features = extractQueryFeatures(task);
|
|
367
|
-
const decision = routeQuery(task);
|
|
368
|
-
|
|
369
|
-
// Return primary + fallbacks
|
|
370
|
-
return [decision.primary_model, ...decision.fallback_models];
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
/**
|
|
374
|
-
* Update model profile from execution feedback (online learning)
|
|
375
|
-
*/
|
|
376
|
-
export function updateModelProfile(
|
|
377
|
-
model_name: string,
|
|
378
|
-
actual_latency_ms: number,
|
|
379
|
-
actual_cost: number,
|
|
380
|
-
quality_rating: number // 0-1
|
|
381
|
-
): void {
|
|
382
|
-
const profile = MODEL_PROFILES[model_name];
|
|
383
|
-
if (!profile) return;
|
|
384
|
-
|
|
385
|
-
// Exponential moving average update
|
|
386
|
-
const alpha = 0.2; // Learning rate
|
|
387
|
-
|
|
388
|
-
profile.latency_ms = profile.latency_ms * (1 - alpha) + actual_latency_ms * alpha;
|
|
389
|
-
profile.quality_score = profile.quality_score * (1 - alpha) + quality_rating * alpha;
|
|
390
|
-
|
|
391
|
-
// Adjust cost perception
|
|
392
|
-
const actual_cost_per_1k = actual_cost * 1000;
|
|
393
|
-
const current_avg_cost = (profile.cost_per_1k_input + profile.cost_per_1k_output) / 2;
|
|
394
|
-
// Keep stored costs as reference, but note actual in profile
|
|
395
|
-
|
|
396
|
-
console.log(`[RouteLLM] Updated ${model_name}: latency=${profile.latency_ms.toFixed(0)}ms, quality=${profile.quality_score.toFixed(2)}`);
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
export default {
|
|
400
|
-
extractQueryFeatures,
|
|
401
|
-
routeQuery,
|
|
402
|
-
routeBatch,
|
|
403
|
-
recommendForTask,
|
|
404
|
-
updateModelProfile,
|
|
405
|
-
MODEL_PROFILES
|
|
406
|
-
};
|