adaptive-memory-multi-model-router 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -66
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/integrations/airtable.js +20 -0
  6. package/dist/integrations/discord.js +18 -0
  7. package/dist/integrations/github.js +23 -0
  8. package/dist/integrations/gmail.js +19 -0
  9. package/dist/integrations/google-calendar.js +18 -0
  10. package/dist/integrations/index.js +61 -0
  11. package/dist/integrations/jira.js +21 -0
  12. package/dist/integrations/linear.js +19 -0
  13. package/dist/integrations/notion.js +19 -0
  14. package/dist/integrations/slack.js +18 -0
  15. package/dist/integrations/telegram.js +19 -0
  16. package/dist/providers/registry.js +7 -3
  17. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
  18. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
  19. package/docs/CONFIGURATION.md +476 -0
  20. package/docs/COUNCIL_DECISION.json +308 -0
  21. package/docs/COUNCIL_SUMMARY.md +265 -0
  22. package/docs/COUNCIL_V2.2_DECISION.md +416 -0
  23. package/docs/IMPROVEMENT_ROADMAP.md +515 -0
  24. package/docs/LLM_COUNCIL_DECISION.md +508 -0
  25. package/docs/QUICK_START_VISIBILITY.md +782 -0
  26. package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
  27. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
  28. package/docs/TMLPD_QNA.md +751 -0
  29. package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
  30. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
  31. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
  32. package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
  33. package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
  34. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
  35. package/docs/launch-content/README.md +457 -0
  36. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  37. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  38. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  39. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  40. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  41. package/docs/launch-content/generate_charts.py +313 -0
  42. package/docs/launch-content/hn_show_post.md +139 -0
  43. package/docs/launch-content/partner_outreach_templates.md +745 -0
  44. package/docs/launch-content/reddit_posts.md +467 -0
  45. package/docs/launch-content/twitter_thread.txt +460 -0
  46. package/examples/QUICKSTART.md +1 -1
  47. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
  48. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
  49. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
  50. package/openclaw-alexa-bridge/test_fixes.js +77 -0
  51. package/package.json +120 -29
  52. package/package.json.tmp +0 -0
  53. package/qna/TMLPD_QNA.md +3 -3
  54. package/skill/SKILL.md +2 -2
  55. package/src/__tests__/integration/tmpld_integration.test.py +540 -0
  56. package/src/agents/skill_enhanced_agent.py +318 -0
  57. package/src/memory/__init__.py +15 -0
  58. package/src/memory/agentic_memory.py +353 -0
  59. package/src/memory/semantic_memory.py +444 -0
  60. package/src/memory/simple_memory.py +466 -0
  61. package/src/memory/working_memory.py +447 -0
  62. package/src/orchestration/__init__.py +52 -0
  63. package/src/orchestration/execution_engine.py +353 -0
  64. package/src/orchestration/halo_orchestrator.py +367 -0
  65. package/src/orchestration/mcts_workflow.py +498 -0
  66. package/src/orchestration/role_assigner.py +473 -0
  67. package/src/orchestration/task_planner.py +522 -0
  68. package/src/providers/__init__.py +67 -0
  69. package/src/providers/anthropic.py +304 -0
  70. package/src/providers/base.py +241 -0
  71. package/src/providers/cerebras.py +373 -0
  72. package/src/providers/registry.py +476 -0
  73. package/src/routing/__init__.py +30 -0
  74. package/src/routing/universal_router.py +621 -0
  75. package/src/skills/TMLPD-QUICKREF.md +210 -0
  76. package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
  77. package/src/skills/TMLPD.md +540 -0
  78. package/src/skills/__tests__/skill_manager.test.ts +328 -0
  79. package/src/skills/skill_manager.py +385 -0
  80. package/src/skills/test-tmlpd.sh +108 -0
  81. package/src/skills/tmlpd-category.yaml +67 -0
  82. package/src/skills/tmlpd-monitoring.yaml +188 -0
  83. package/src/skills/tmlpd-phase.yaml +132 -0
  84. package/src/state/__init__.py +17 -0
  85. package/src/state/simple_checkpoint.py +508 -0
  86. package/src/tmlpd_agent.py +464 -0
  87. package/src/tmpld_v2.py +427 -0
  88. package/src/workflows/__init__.py +18 -0
  89. package/src/workflows/advanced_difficulty_classifier.py +377 -0
  90. package/src/workflows/chaining_executor.py +417 -0
  91. package/src/workflows/difficulty_integration.py +209 -0
  92. package/src/workflows/orchestrator.py +469 -0
  93. package/src/workflows/orchestrator_executor.py +456 -0
  94. package/src/workflows/parallelization_executor.py +382 -0
  95. package/src/workflows/router.py +311 -0
  96. package/test_integration_simple.py +86 -0
  97. package/test_mcts_workflow.py +150 -0
  98. package/test_templd_integration.py +262 -0
  99. package/test_universal_router.py +275 -0
  100. package/tmlpd-pi-extension/README.md +36 -0
  101. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
  102. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
  103. package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
  104. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
  105. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
  106. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
  107. package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
  108. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
  109. package/tmlpd-pi-extension/dist/cli.js +59 -0
  110. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
  111. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
  112. package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
  113. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
  114. package/tmlpd-pi-extension/dist/index.d.ts +723 -0
  115. package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
  116. package/tmlpd-pi-extension/dist/index.js +239 -0
  117. package/tmlpd-pi-extension/dist/index.js.map +1 -0
  118. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
  119. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
  120. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
  121. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
  122. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
  123. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  124. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
  125. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
  126. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
  127. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  128. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
  129. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
  130. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
  131. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
  132. package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
  133. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
  134. package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
  135. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
  136. package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
  137. package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
  138. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
  139. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
  140. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
  141. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
  142. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
  143. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
  144. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
  145. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
  146. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
  147. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
  148. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
  149. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
  150. package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
  151. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
  152. package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
  153. package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
  154. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
  155. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
  156. package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
  157. package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
  158. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
  159. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
  160. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
  161. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
  162. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
  163. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
  164. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
  165. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
  166. package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
  167. package/tmlpd-pi-extension/package-lock.json +75 -0
  168. package/tmlpd-pi-extension/package.json +172 -0
  169. package/tmlpd-pi-extension/python/examples.py +53 -0
  170. package/tmlpd-pi-extension/python/integrations.py +330 -0
  171. package/tmlpd-pi-extension/python/setup.py +28 -0
  172. package/tmlpd-pi-extension/python/tmlpd.py +369 -0
  173. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  174. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
  175. package/tmlpd-pi-extension/skill/SKILL.md +238 -0
  176. package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
  177. package/tmlpd-pi-extension/tsconfig.json +18 -0
  178. package/demo/research-demo.js +0 -266
  179. package/notebooks/quickstart.ipynb +0 -157
  180. package/rust/tmlpd.h +0 -268
  181. package/src/cache/prefixCache.ts +0 -365
  182. package/src/routing/advancedRouter.ts +0 -406
  183. package/src/utils/speculativeDecoding.ts +0 -344
  184. /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
  185. /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
  186. /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
  187. /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
  188. /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
  189. /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
  190. /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
  191. /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
  192. /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
  193. /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
  194. /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
  195. /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
@@ -1,406 +0,0 @@
1
- /**
2
- * TMLPD Advanced Routing - RouteLLM Style
3
- *
4
- * Learned routing based on arXiv:2404.06035 (RouteLLM)
5
- * Balances cost-quality tradeoff with confidence-based model selection
6
- */
7
-
8
- import { countTokens, estimateCost } from "../utils/tokenUtils";
9
-
10
- export interface QueryFeatures {
11
- complexity: number; // 0-1 estimated complexity
12
- length: number; // token count
13
- has_code: boolean; // contains code patterns
14
- has_math: boolean; // contains math/equations
15
- is_multilingual: boolean; // non-English keywords
16
- is_creative: boolean; // creative writing detected
17
- requires_reasoning: boolean; // chain-of-thought needed
18
- }
19
-
20
- export interface ModelProfile {
21
- name: string;
22
- provider: string;
23
- cost_per_1k_input: number;
24
- cost_per_1k_output: number;
25
- latency_ms: number;
26
- quality_score: number; // relative quality 0-1
27
- strengths: string[]; // ["coding", "reasoning", "creative"]
28
- context_window: number;
29
- }
30
-
31
- export interface RouteDecision {
32
- primary_model: string;
33
- fallback_models: string[];
34
- confidence: number; // 0-1 confidence in routing
35
- reasoning: string;
36
- estimated_cost: number;
37
- estimated_latency_ms: number;
38
- }
39
-
40
- // Pre-configured model profiles
41
- export const MODEL_PROFILES: Record<string, ModelProfile> = {
42
- "openai/gpt-4o": {
43
- name: "openai/gpt-4o",
44
- provider: "openai",
45
- cost_per_1k_input: 2.50,
46
- cost_per_1k_output: 10.00,
47
- latency_ms: 2000,
48
- quality_score: 0.95,
49
- strengths: ["reasoning", "coding", "analysis"],
50
- context_window: 128000
51
- },
52
- "openai/gpt-4o-mini": {
53
- name: "openai/gpt-4o-mini",
54
- provider: "openai",
55
- cost_per_1k_input: 0.15,
56
- cost_per_1k_output: 0.60,
57
- latency_ms: 500,
58
- quality_score: 0.85,
59
- strengths: ["fast", "coding"],
60
- context_window: 128000
61
- },
62
- "anthropic/claude-3.5-sonnet": {
63
- name: "anthropic/claude-3.5-sonnet",
64
- provider: "anthropic",
65
- cost_per_1k_input: 3.00,
66
- cost_per_1k_output: 15.00,
67
- latency_ms: 2500,
68
- quality_score: 0.96,
69
- strengths: ["reasoning", "creative", "analysis"],
70
- context_window: 200000
71
- },
72
- "anthropic/claude-3-haiku": {
73
- name: "anthropic/claude-3-haiku",
74
- provider: "anthropic",
75
- cost_per_1k_input: 0.25,
76
- cost_per_1k_output: 1.25,
77
- latency_ms: 500,
78
- quality_score: 0.80,
79
- strengths: ["fast", "simple"],
80
- context_window: 200000
81
- },
82
- "google/gemini-2.0-flash": {
83
- name: "google/gemini-2.0-flash",
84
- provider: "google",
85
- cost_per_1k_input: 0.00, // Free
86
- cost_per_1k_output: 0.00,
87
- latency_ms: 800,
88
- quality_score: 0.88,
89
- strengths: ["fast", "multilingual"],
90
- context_window: 1000000
91
- },
92
- "google/gemini-1.5-pro": {
93
- name: "google/gemini-1.5-pro",
94
- provider: "google",
95
- cost_per_1k_input: 1.25,
96
- cost_per_1k_output: 5.00,
97
- latency_ms: 1500,
98
- quality_score: 0.92,
99
- strengths: ["reasoning", "long-context"],
100
- context_window: 2000000
101
- },
102
- "groq/llama-3.3-70b": {
103
- name: "groq/llama-3.3-70b",
104
- provider: "groq",
105
- cost_per_1k_input: 0.59,
106
- cost_per_1k_output: 0.79,
107
- latency_ms: 400,
108
- quality_score: 0.82,
109
- strengths: ["fast", "coding"],
110
- context_window: 128000
111
- },
112
- "cerebras/llama-3.3-70b": {
113
- name: "cerebras/llama-3.3-70b",
114
- provider: "cerebras",
115
- cost_per_1k_input: 0.60,
116
- cost_per_1k_output: 0.60,
117
- latency_ms: 350,
118
- quality_score: 0.82,
119
- strengths: ["fast", "budget"],
120
- context_window: 128000
121
- },
122
- "local/llama-3.3-70b": {
123
- name: "local/llama-3.3-70b",
124
- provider: "ollama",
125
- cost_per_1k_input: 0.00,
126
- cost_per_1k_output: 0.00,
127
- latency_ms: 100,
128
- quality_score: 0.75,
129
- strengths: ["privacy", "free"],
130
- context_window: 128000
131
- }
132
- };
133
-
134
- /**
135
- * Extract features from prompt for routing decision
136
- */
137
- export function extractQueryFeatures(prompt: string): QueryFeatures {
138
- const lower = prompt.toLowerCase();
139
-
140
- // Code patterns
141
- const code_indicators = [
142
- "function", "class ", "def ", "import ", "const ", "let ",
143
- "python", "javascript", "typescript", "java", "cpp", "rust",
144
- "```", "=>", "->", "async", "await"
145
- ];
146
- const has_code = code_indicators.some(pattern => lower.includes(pattern));
147
-
148
- // Math patterns (expanded for unicode and common notation)
149
- const math_indicators = [
150
- "equation", "formula", "calculate", "sqrt", "^", "log",
151
- "sin", "cos", "tan", "integral", "derivative", "$", "math",
152
- "∫", "∂", "∑", "∏", "√", "∞", "π", "θ", "β",
153
- "dx", "dy", "dz", "=", "solver", "compute"
154
- ];
155
- const has_math = math_indicators.some(pattern => prompt.includes(pattern));
156
-
157
- // Multilingual
158
- const lang_patterns = [
159
- /[\u4e00-\u9fff]/, // Chinese
160
- /[\u3040-\u309f\u30a0-\u30ff]/, // Japanese
161
- /[\uac00-\ud7af]/, // Korean
162
- /[а-яА-Я]/, // Russian
163
- /[áéíóúñ]/ // Spanish accented
164
- ];
165
- const is_multilingual = lang_patterns.some(pattern => pattern.test(prompt));
166
-
167
- // Creative writing
168
- const creative_indicators = [
169
- "write a", "story", "poem", "creative", "imagine",
170
- "describe", "explain in", "tell me", "narrative"
171
- ];
172
- const is_creative = creative_indicators.some(pattern => lower.includes(pattern));
173
-
174
- // Reasoning
175
- const reasoning_indicators = [
176
- "explain", "why", "because", "therefore", "thus",
177
- "analyze", "think", "consider", "reason", "logic"
178
- ];
179
- const requires_reasoning = reasoning_indicators.some(pattern => lower.includes(pattern));
180
-
181
- // Complexity estimation based on length and patterns
182
- const tokens = countTokens(prompt, "gpt-4o");
183
- let complexity = 0.3;
184
-
185
- if (tokens > 1000) complexity += 0.2;
186
- if (has_code) complexity += 0.15;
187
- if (has_math) complexity += 0.2;
188
- if (requires_reasoning) complexity += 0.15;
189
- if (is_creative) complexity += 0.1;
190
-
191
- complexity = Math.min(1.0, complexity);
192
-
193
- return {
194
- complexity,
195
- length: tokens,
196
- has_code,
197
- has_math,
198
- is_multilingual,
199
- is_creative,
200
- requires_reasoning
201
- };
202
- }
203
-
204
- /**
205
- * Score model fit for query
206
- */
207
- function scoreModelFit(model: ModelProfile, features: QueryFeatures): number {
208
- let score = model.quality_score * 0.4; // Base quality
209
-
210
- // Strengths matching
211
- if (features.has_code && model.strengths.includes("coding")) {
212
- score += 0.2;
213
- }
214
- if (features.requires_reasoning && model.strengths.includes("reasoning")) {
215
- score += 0.2;
216
- }
217
- if (features.is_creative && model.strengths.includes("creative")) {
218
- score += 0.15;
219
- }
220
- if (features.is_multilingual && model.strengths.includes("multilingual")) {
221
- score += 0.15;
222
- }
223
- if (features.has_math && model.strengths.includes("analysis")) {
224
- score += 0.15;
225
- }
226
-
227
- // Speed bonus for simple tasks
228
- if (features.complexity < 0.4 && model.latency_ms < 1000) {
229
- score += 0.1;
230
- }
231
-
232
- return score;
233
- }
234
-
235
- /**
236
- * Cost efficiency score (inverse of normalized cost)
237
- */
238
- function costEfficiency(model: ModelProfile, features: QueryFeatures): number {
239
- const avg_cost = (model.cost_per_1k_input + model.cost_per_1k_output) / 2;
240
-
241
- // For simple tasks, prioritize cost efficiency
242
- if (features.complexity < 0.5) {
243
- return (1 - Math.min(avg_cost / 10, 1)) * 0.6;
244
- }
245
-
246
- // For complex tasks, deprioritize cost
247
- return (1 - Math.min(avg_cost / 10, 1)) * 0.2;
248
- }
249
-
250
- /**
251
- * RouteLLM-style learned routing decision
252
- */
253
- export function routeQuery(
254
- prompt: string,
255
- available_models?: string[],
256
- budget_multiplier: number = 1.0
257
- ): RouteDecision {
258
- const features = extractQueryFeatures(prompt);
259
- const candidate_names = available_models || Object.keys(MODEL_PROFILES);
260
-
261
- // Score all candidates
262
- interface ScoredModel {
263
- name: string;
264
- profile: ModelProfile;
265
- quality_score: number;
266
- cost_score: number;
267
- total_score: number;
268
- }
269
-
270
- const candidates: ScoredModel[] = candidate_names
271
- .filter(name => MODEL_PROFILES[name])
272
- .map(name => {
273
- const profile = MODEL_PROFILES[name]!;
274
- const quality = scoreModelFit(profile, features);
275
- const cost = costEfficiency(profile, features);
276
- return {
277
- name,
278
- profile,
279
- quality_score: quality,
280
- cost_score: cost,
281
- total_score: quality + cost
282
- };
283
- });
284
-
285
- // Sort by total score (quality vs cost tradeoff based on complexity)
286
- const complexity_bias = features.complexity > 0.6 ? 0.7 : 0.3; // High complexity = quality bias
287
- candidates.sort((a, b) => {
288
- const score_a = a.quality_score * complexity_bias + a.cost_score * (1 - complexity_bias);
289
- const score_b = b.quality_score * complexity_bias + b.cost_score * (1 - complexity_bias);
290
- return score_b - score_a;
291
- });
292
-
293
- const primary = candidates[0];
294
- const secondary = candidates.slice(1, 3);
295
-
296
- // Calculate confidence based on score gap
297
- let confidence = 0.5;
298
- if (candidates.length > 1) {
299
- const gap = primary.total_score - candidates[1].total_score;
300
- confidence = Math.min(0.95, 0.5 + gap * 2);
301
- }
302
-
303
- // Build reasoning
304
- const reasons: string[] = [];
305
- if (features.has_code) reasons.push("code detected");
306
- if (features.requires_reasoning) reasons.push("reasoning needed");
307
- if (features.complexity > 0.6) reasons.push("high complexity");
308
- if (features.is_multilingual) reasons.push("multilingual");
309
-
310
- const estimated_tokens = features.length * 1.5; // rough completion estimate
311
- const estimated_cost = estimateCost(features.length, estimated_tokens, primary.name);
312
-
313
- return {
314
- primary_model: primary.name,
315
- fallback_models: secondary.map(c => c.name),
316
- confidence,
317
- reasoning: `Selected ${primary.profile.provider}/${primary.name.split("/")[1]} for ${reasons.join(", ") || "general query"}`,
318
- estimated_cost: estimated_cost * budget_multiplier,
319
- estimated_latency_ms: primary.profile.latency_ms
320
- };
321
- }
322
-
323
- /**
324
- * Batch routing for multiple prompts
325
- */
326
- export function routeBatch(
327
- prompts: string[],
328
- options?: {
329
- same_model?: boolean;
330
- max_cost_per_prompt?: number;
331
- balance_cost?: boolean;
332
- }
333
- ): RouteDecision[] {
334
- const decisions = prompts.map(p => routeQuery(p));
335
-
336
- if (options?.same_model && decisions.length > 0) {
337
- // Use first decision's model for all (for batch consistency)
338
- const primary_model = decisions[0].primary_model;
339
- decisions.forEach(d => {
340
- d.primary_model = primary_model;
341
- d.fallback_models = decisions[0].fallback_models;
342
- });
343
- }
344
-
345
- if (options?.max_cost_per_prompt) {
346
- decisions.forEach(d => {
347
- if (d.estimated_cost > options.max_cost_per_prompt!) {
348
- // Route to cheaper alternative
349
- const cheap = Object.entries(MODEL_PROFILES)
350
- .find(([name, p]) => p.cost_per_1k_input < 0.5);
351
- if (cheap) {
352
- d.primary_model = cheap[0];
353
- d.reasoning = `Budget-limited routing to ${cheap[1].provider}`;
354
- }
355
- }
356
- });
357
- }
358
-
359
- return decisions;
360
- }
361
-
362
- /**
363
- * Get model recommendation for task type
364
- */
365
- export function recommendForTask(task: string): string[] {
366
- const features = extractQueryFeatures(task);
367
- const decision = routeQuery(task);
368
-
369
- // Return primary + fallbacks
370
- return [decision.primary_model, ...decision.fallback_models];
371
- }
372
-
373
- /**
374
- * Update model profile from execution feedback (online learning)
375
- */
376
- export function updateModelProfile(
377
- model_name: string,
378
- actual_latency_ms: number,
379
- actual_cost: number,
380
- quality_rating: number // 0-1
381
- ): void {
382
- const profile = MODEL_PROFILES[model_name];
383
- if (!profile) return;
384
-
385
- // Exponential moving average update
386
- const alpha = 0.2; // Learning rate
387
-
388
- profile.latency_ms = profile.latency_ms * (1 - alpha) + actual_latency_ms * alpha;
389
- profile.quality_score = profile.quality_score * (1 - alpha) + quality_rating * alpha;
390
-
391
- // Adjust cost perception
392
- const actual_cost_per_1k = actual_cost * 1000;
393
- const current_avg_cost = (profile.cost_per_1k_input + profile.cost_per_1k_output) / 2;
394
- // Keep stored costs as reference, but note actual in profile
395
-
396
- console.log(`[RouteLLM] Updated ${model_name}: latency=${profile.latency_ms.toFixed(0)}ms, quality=${profile.quality_score.toFixed(2)}`);
397
- }
398
-
399
- export default {
400
- extractQueryFeatures,
401
- routeQuery,
402
- routeBatch,
403
- recommendForTask,
404
- updateModelProfile,
405
- MODEL_PROFILES
406
- };