adaptive-memory-multi-model-router 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +146 -66
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/integrations/airtable.js +20 -0
- package/dist/integrations/discord.js +18 -0
- package/dist/integrations/github.js +23 -0
- package/dist/integrations/gmail.js +19 -0
- package/dist/integrations/google-calendar.js +18 -0
- package/dist/integrations/index.js +61 -0
- package/dist/integrations/jira.js +21 -0
- package/dist/integrations/linear.js +19 -0
- package/dist/integrations/notion.js +19 -0
- package/dist/integrations/slack.js +18 -0
- package/dist/integrations/telegram.js +19 -0
- package/dist/providers/registry.js +7 -3
- package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
- package/docs/CONFIGURATION.md +476 -0
- package/docs/COUNCIL_DECISION.json +308 -0
- package/docs/COUNCIL_SUMMARY.md +265 -0
- package/docs/COUNCIL_V2.2_DECISION.md +416 -0
- package/docs/IMPROVEMENT_ROADMAP.md +515 -0
- package/docs/LLM_COUNCIL_DECISION.md +508 -0
- package/docs/QUICK_START_VISIBILITY.md +782 -0
- package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
- package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
- package/docs/TMLPD_QNA.md +751 -0
- package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
- package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
- package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
- package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
- package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
- package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
- package/docs/launch-content/README.md +457 -0
- package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
- package/docs/launch-content/assets/cumulative_savings.png +0 -0
- package/docs/launch-content/assets/parallel_speedup.png +0 -0
- package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
- package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
- package/docs/launch-content/generate_charts.py +313 -0
- package/docs/launch-content/hn_show_post.md +139 -0
- package/docs/launch-content/partner_outreach_templates.md +745 -0
- package/docs/launch-content/reddit_posts.md +467 -0
- package/docs/launch-content/twitter_thread.txt +460 -0
- package/examples/QUICKSTART.md +1 -1
- package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
- package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
- package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
- package/openclaw-alexa-bridge/test_fixes.js +77 -0
- package/package.json +120 -29
- package/package.json.tmp +0 -0
- package/qna/TMLPD_QNA.md +3 -3
- package/skill/SKILL.md +2 -2
- package/src/__tests__/integration/tmpld_integration.test.py +540 -0
- package/src/agents/skill_enhanced_agent.py +318 -0
- package/src/memory/__init__.py +15 -0
- package/src/memory/agentic_memory.py +353 -0
- package/src/memory/semantic_memory.py +444 -0
- package/src/memory/simple_memory.py +466 -0
- package/src/memory/working_memory.py +447 -0
- package/src/orchestration/__init__.py +52 -0
- package/src/orchestration/execution_engine.py +353 -0
- package/src/orchestration/halo_orchestrator.py +367 -0
- package/src/orchestration/mcts_workflow.py +498 -0
- package/src/orchestration/role_assigner.py +473 -0
- package/src/orchestration/task_planner.py +522 -0
- package/src/providers/__init__.py +67 -0
- package/src/providers/anthropic.py +304 -0
- package/src/providers/base.py +241 -0
- package/src/providers/cerebras.py +373 -0
- package/src/providers/registry.py +476 -0
- package/src/routing/__init__.py +30 -0
- package/src/routing/universal_router.py +621 -0
- package/src/skills/TMLPD-QUICKREF.md +210 -0
- package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
- package/src/skills/TMLPD.md +540 -0
- package/src/skills/__tests__/skill_manager.test.ts +328 -0
- package/src/skills/skill_manager.py +385 -0
- package/src/skills/test-tmlpd.sh +108 -0
- package/src/skills/tmlpd-category.yaml +67 -0
- package/src/skills/tmlpd-monitoring.yaml +188 -0
- package/src/skills/tmlpd-phase.yaml +132 -0
- package/src/state/__init__.py +17 -0
- package/src/state/simple_checkpoint.py +508 -0
- package/src/tmlpd_agent.py +464 -0
- package/src/tmpld_v2.py +427 -0
- package/src/workflows/__init__.py +18 -0
- package/src/workflows/advanced_difficulty_classifier.py +377 -0
- package/src/workflows/chaining_executor.py +417 -0
- package/src/workflows/difficulty_integration.py +209 -0
- package/src/workflows/orchestrator.py +469 -0
- package/src/workflows/orchestrator_executor.py +456 -0
- package/src/workflows/parallelization_executor.py +382 -0
- package/src/workflows/router.py +311 -0
- package/test_integration_simple.py +86 -0
- package/test_mcts_workflow.py +150 -0
- package/test_templd_integration.py +262 -0
- package/test_universal_router.py +275 -0
- package/tmlpd-pi-extension/README.md +36 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cli.js +59 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
- package/tmlpd-pi-extension/dist/index.d.ts +723 -0
- package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/index.js +239 -0
- package/tmlpd-pi-extension/dist/index.js.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
- package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
- package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
- package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
- package/tmlpd-pi-extension/package-lock.json +75 -0
- package/tmlpd-pi-extension/package.json +172 -0
- package/tmlpd-pi-extension/python/examples.py +53 -0
- package/tmlpd-pi-extension/python/integrations.py +330 -0
- package/tmlpd-pi-extension/python/setup.py +28 -0
- package/tmlpd-pi-extension/python/tmlpd.py +369 -0
- package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
- package/tmlpd-pi-extension/skill/SKILL.md +238 -0
- package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
- package/tmlpd-pi-extension/tsconfig.json +18 -0
- package/demo/research-demo.js +0 -266
- package/notebooks/quickstart.ipynb +0 -157
- package/rust/tmlpd.h +0 -268
- package/src/cache/prefixCache.ts +0 -365
- package/src/routing/advancedRouter.ts +0 -406
- package/src/utils/speculativeDecoding.ts +0 -344
- /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test Universal Learned Router
|
|
3
|
+
|
|
4
|
+
Verify routing logic, quality prediction, and online learning.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import sys
|
|
9
|
+
sys.path.append('/Users/Subho/tmlpd-skill')
|
|
10
|
+
|
|
11
|
+
from src.routing import UniversalModelRouter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def test_basic_routing():
|
|
15
|
+
"""Test 1: Basic routing selects appropriate models"""
|
|
16
|
+
print("=" * 70)
|
|
17
|
+
print("TEST 1: Basic Routing - Model Selection")
|
|
18
|
+
print("=" * 70)
|
|
19
|
+
|
|
20
|
+
router = UniversalModelRouter(quality_target=0.90, cost_weight=0.5)
|
|
21
|
+
|
|
22
|
+
# Test simple task (should route to cheap model)
|
|
23
|
+
simple_task = {
|
|
24
|
+
"description": "What is 2+2?",
|
|
25
|
+
"context": {}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
decision = await router.route(simple_task, ["anthropic/claude-3-5-sonnet", "cerebras/llama-3.3-70b"])
|
|
29
|
+
|
|
30
|
+
print(f"\nSimple Task: {simple_task['description']}")
|
|
31
|
+
print(f" Selected Model: {decision.selected_model}")
|
|
32
|
+
print(f" Reasoning: {decision.reasoning}")
|
|
33
|
+
print(f" Predicted Quality: {decision.predicted_quality:.2f}")
|
|
34
|
+
print(f" Estimated Cost: ${decision.estimated_cost:.6f}")
|
|
35
|
+
|
|
36
|
+
# Test complex task (should route to high-quality model)
|
|
37
|
+
complex_task = {
|
|
38
|
+
"description": "Design a distributed system architecture for a real-time trading platform with microservices, event sourcing, and eventual consistency",
|
|
39
|
+
"context": {"domain": "system-design", "constraints": ["low-latency", "high-throughput"]}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
decision2 = await router.route(complex_task, ["anthropic/claude-3-5-sonnet-20241022", "cerebras/llama-3.3-70b"])
|
|
43
|
+
|
|
44
|
+
print(f"\nComplex Task: {complex_task['description'][:60]}...")
|
|
45
|
+
print(f" Selected Model: {decision2.selected_model}")
|
|
46
|
+
print(f" Reasoning: {decision2.reasoning}")
|
|
47
|
+
print(f" Predicted Quality: {decision2.predicted_quality:.2f}")
|
|
48
|
+
print(f" Estimated Cost: ${decision2.estimated_cost:.6f}")
|
|
49
|
+
|
|
50
|
+
print("\n✅ Test 1 PASSED: Router selects appropriate models\n")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def test_feature_extraction():
|
|
54
|
+
"""Test 2: Feature extraction captures task characteristics"""
|
|
55
|
+
print("=" * 70)
|
|
56
|
+
print("TEST 2: Feature Extraction")
|
|
57
|
+
print("=" * 70)
|
|
58
|
+
|
|
59
|
+
router = UniversalModelRouter()
|
|
60
|
+
|
|
61
|
+
# Technical coding task
|
|
62
|
+
coding_task = {
|
|
63
|
+
"description": "Implement a REST API with authentication and database integration",
|
|
64
|
+
"context": {"requirements": ["JWT", "PostgreSQL"]}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
features = router._extract_task_features(coding_task)
|
|
68
|
+
|
|
69
|
+
print(f"\nCoding Task Features:")
|
|
70
|
+
print(f" Length: {features['length']:.2f}")
|
|
71
|
+
print(f" Technical: {features['technical']:.2f}")
|
|
72
|
+
print(f" Complexity: {features['complexity']:.2f}")
|
|
73
|
+
print(f" Constraints: {features['constraints']:.2f}")
|
|
74
|
+
print(f" Domain Scores: {features['domain']}")
|
|
75
|
+
|
|
76
|
+
# Creative writing task
|
|
77
|
+
writing_task = {
|
|
78
|
+
"description": "Write a short story about a robot learning to paint",
|
|
79
|
+
"context": {"genre": "science-fiction", "style": "creative"}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
features2 = router._extract_task_features(writing_task)
|
|
83
|
+
|
|
84
|
+
print(f"\nWriting Task Features:")
|
|
85
|
+
print(f" Length: {features2['length']:.2f}")
|
|
86
|
+
print(f" Technical: {features2['technical']:.2f}")
|
|
87
|
+
print(f" Complexity: {features2['complexity']:.2f}")
|
|
88
|
+
print(f" Constraints: {features2['constraints']:.2f}")
|
|
89
|
+
print(f" Domain Scores: {features2['domain']}")
|
|
90
|
+
|
|
91
|
+
print("\n✅ Test 2 PASSED: Features capture task characteristics\n")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def test_online_learning():
|
|
95
|
+
"""Test 3: Online learning updates model profiles"""
|
|
96
|
+
print("=" * 70)
|
|
97
|
+
print("TEST 3: Online Learning - Profile Updates")
|
|
98
|
+
print("=" * 70)
|
|
99
|
+
|
|
100
|
+
router = UniversalModelRouter()
|
|
101
|
+
|
|
102
|
+
# Get initial profile
|
|
103
|
+
model_id = "anthropic/claude-3-5-sonnet-20241022"
|
|
104
|
+
initial_profile = router.model_profiles[model_id]
|
|
105
|
+
|
|
106
|
+
print(f"\nInitial Profile for {model_id}:")
|
|
107
|
+
print(f" Average Quality: {initial_profile.avg_quality_score:.3f}")
|
|
108
|
+
print(f" Total Executions: {initial_profile.total_executions}")
|
|
109
|
+
|
|
110
|
+
# Simulate feedback loop
|
|
111
|
+
print("\nSimulating 5 execution outcomes...")
|
|
112
|
+
|
|
113
|
+
outcomes = [
|
|
114
|
+
{
|
|
115
|
+
"model": model_id,
|
|
116
|
+
"task": {"description": "Implement REST API with authentication", "context": {"requirements": ["JWT"]}},
|
|
117
|
+
"actual_quality": 0.96,
|
|
118
|
+
"success": True,
|
|
119
|
+
"cost_usd": 0.003
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"model": model_id,
|
|
123
|
+
"task": {"description": "What is the capital of France?", "context": {}},
|
|
124
|
+
"actual_quality": 0.98,
|
|
125
|
+
"success": True,
|
|
126
|
+
"cost_usd": 0.001
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"model": model_id,
|
|
130
|
+
"task": {"description": "Design distributed system for trading platform", "context": {"requirements": ["low-latency", "high-throughput"]}},
|
|
131
|
+
"actual_quality": 0.94,
|
|
132
|
+
"success": True,
|
|
133
|
+
"cost_usd": 0.005
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"model": model_id,
|
|
137
|
+
"task": {"description": "Analyze this dataset and provide insights", "context": {"domain": "data-analysis"}},
|
|
138
|
+
"actual_quality": 0.97,
|
|
139
|
+
"success": True,
|
|
140
|
+
"cost_usd": 0.004
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
"model": model_id,
|
|
144
|
+
"task": {"description": "Write a creative story about a robot", "context": {"genre": "science-fiction"}},
|
|
145
|
+
"actual_quality": 0.99,
|
|
146
|
+
"success": True,
|
|
147
|
+
"cost_usd": 0.002
|
|
148
|
+
}
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
await router.learn_from_feedback(outcomes)
|
|
152
|
+
|
|
153
|
+
# Get updated profile
|
|
154
|
+
updated_profile = router.model_profiles[model_id]
|
|
155
|
+
|
|
156
|
+
print(f"\nUpdated Profile for {model_id}:")
|
|
157
|
+
print(f" Average Quality: {updated_profile.avg_quality_score:.3f}")
|
|
158
|
+
print(f" Quality Variance: {updated_profile.quality_variance:.4f}")
|
|
159
|
+
print(f" Total Executions: {updated_profile.total_executions}")
|
|
160
|
+
print(f" Recent Outcomes: {len(updated_profile.recent_outcomes)} samples")
|
|
161
|
+
|
|
162
|
+
print("\nQuality by Difficulty Level:")
|
|
163
|
+
for difficulty, quality in updated_profile.quality_by_difficulty.items():
|
|
164
|
+
print(f" {difficulty}: {quality:.3f}")
|
|
165
|
+
|
|
166
|
+
print("\n✅ Test 3 PASSED: Online learning updates profiles correctly\n")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def test_cost_optimization():
|
|
170
|
+
"""Test 4: Cost optimization routes to cheaper models when appropriate"""
|
|
171
|
+
print("=" * 70)
|
|
172
|
+
print("TEST 4: Cost Optimization")
|
|
173
|
+
print("=" * 70)
|
|
174
|
+
|
|
175
|
+
# High cost weight (prioritize cheap models)
|
|
176
|
+
router_cheap = UniversalModelRouter(quality_target=0.75, cost_weight=0.8)
|
|
177
|
+
|
|
178
|
+
# High quality target (prioritize quality)
|
|
179
|
+
router_quality = UniversalModelRouter(quality_target=0.95, cost_weight=0.2)
|
|
180
|
+
|
|
181
|
+
simple_task = {
|
|
182
|
+
"description": "What is the capital of France?",
|
|
183
|
+
"context": {}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
available_models = [
|
|
187
|
+
"anthropic/claude-3-5-sonnet",
|
|
188
|
+
"cerebras/llama-3.3-70b",
|
|
189
|
+
"groq/llama-3.3-70b"
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
decision_cheap = await router_cheap.route(simple_task, available_models)
|
|
193
|
+
decision_quality = await router_quality.route(simple_task, available_models)
|
|
194
|
+
|
|
195
|
+
print(f"\nSimple Task: {simple_task['description']}")
|
|
196
|
+
print(f"\nCost-Optimized Router:")
|
|
197
|
+
print(f" Selected: {decision_cheap.selected_model}")
|
|
198
|
+
print(f" Estimated Cost: ${decision_cheap.estimated_cost:.6f}")
|
|
199
|
+
print(f" Predicted Quality: {decision_cheap.predicted_quality:.2f}")
|
|
200
|
+
|
|
201
|
+
print(f"\nQuality-Optimized Router:")
|
|
202
|
+
print(f" Selected: {decision_quality.selected_model}")
|
|
203
|
+
print(f" Estimated Cost: ${decision_quality.estimated_cost:.6f}")
|
|
204
|
+
print(f" Predicted Quality: {decision_quality.predicted_quality:.2f}")
|
|
205
|
+
|
|
206
|
+
print("\n✅ Test 4 PASSED: Cost optimization routing works correctly\n")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def test_unseen_model():
|
|
210
|
+
"""Test 5: Unseen model profile inference"""
|
|
211
|
+
print("=" * 70)
|
|
212
|
+
print("TEST 5: Unseen Model Profile Inference")
|
|
213
|
+
print("=" * 70)
|
|
214
|
+
|
|
215
|
+
router = UniversalModelRouter()
|
|
216
|
+
|
|
217
|
+
# Try to route with an unseen model
|
|
218
|
+
unseen_model = "openai/gpt-4o-mini"
|
|
219
|
+
|
|
220
|
+
task = {
|
|
221
|
+
"description": "Summarize this text",
|
|
222
|
+
"context": {}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
decision = await router.route(task, ["anthropic/claude-3-5-sonnet", unseen_model])
|
|
226
|
+
|
|
227
|
+
print(f"\nUnseen Model: {unseen_model}")
|
|
228
|
+
print(f" Profile Inferred: {unseen_model in router.model_profiles}")
|
|
229
|
+
print(f" Selected Model: {decision.selected_model}")
|
|
230
|
+
|
|
231
|
+
if unseen_model in router.model_profiles:
|
|
232
|
+
profile = router.model_profiles[unseen_model]
|
|
233
|
+
print(f"\nInferred Profile:")
|
|
234
|
+
print(f" Provider: {profile.provider}")
|
|
235
|
+
print(f" Cost: ${profile.cost_per_1k_tokens:.6f}/1K tokens")
|
|
236
|
+
print(f" Quality: {profile.avg_quality_score:.2f}")
|
|
237
|
+
|
|
238
|
+
print("\n✅ Test 5 PASSED: Unseen models get inferred profiles\n")
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
async def main():
|
|
242
|
+
"""Run all tests"""
|
|
243
|
+
print("\n" + "=" * 70)
|
|
244
|
+
print("UNIVERSAL LEARNED ROUTER - TEST SUITE")
|
|
245
|
+
print("=" * 70 + "\n")
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
await test_basic_routing()
|
|
249
|
+
await test_feature_extraction()
|
|
250
|
+
await test_online_learning()
|
|
251
|
+
await test_cost_optimization()
|
|
252
|
+
await test_unseen_model()
|
|
253
|
+
|
|
254
|
+
print("=" * 70)
|
|
255
|
+
print("ALL TESTS PASSED ✅")
|
|
256
|
+
print("=" * 70)
|
|
257
|
+
|
|
258
|
+
# Show router stats
|
|
259
|
+
print("\nRouter Statistics:")
|
|
260
|
+
router = UniversalModelRouter()
|
|
261
|
+
print(f" Total Models: {len(router.model_profiles)}")
|
|
262
|
+
print(f" Model Providers: {set(p.provider for p in router.model_profiles.values())}")
|
|
263
|
+
print(f" Learning Rate: {router.learning_rate}")
|
|
264
|
+
print(f" Quality Target: {router.quality_target}")
|
|
265
|
+
print(f" Cost Weight: {router.cost_weight}")
|
|
266
|
+
|
|
267
|
+
except Exception as e:
|
|
268
|
+
print(f"\n❌ TEST FAILED: {e}")
|
|
269
|
+
import traceback
|
|
270
|
+
traceback.print_exc()
|
|
271
|
+
sys.exit(1)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
if __name__ == "__main__":
|
|
275
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# A3M Router - Adaptive Memory Multi-Model Router
|
|
2
|
+
|
|
3
|
+
> **Smart Routing for AI Agents & LLM Developers**
|
|
4
|
+
> npm: https://npmjs.com/package/adaptive-memory-multi-model-router
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## A3M = **A**daptive **M**emory **M**ulti-**M**odel Router
|
|
9
|
+
|
|
10
|
+
A research-backed, AI-native router with learned routing, prefix caching, and speculative decoding.
|
|
11
|
+
|
|
12
|
+
### Quick Install
|
|
13
|
+
```bash
|
|
14
|
+
npm install adaptive-memory-multi-model-router
|
|
15
|
+
npx a3m-router --help
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
### Key Features
|
|
19
|
+
- **Learned Routing** (RouteLLM) - 40% cost reduction
|
|
20
|
+
- **Prefix Caching** (RadixAttention) - 5-10x speedup
|
|
21
|
+
- **Speculative Decoding** (Medusa) - 2-3x faster
|
|
22
|
+
- **Python Bindings** - LangChain, LlamaIndex, AutoGen, CrewAI
|
|
23
|
+
|
|
24
|
+
### CLI
|
|
25
|
+
```bash
|
|
26
|
+
a3m-router route "your prompt"
|
|
27
|
+
a3m-router parallel "task1" "task2"
|
|
28
|
+
a3m-router cost
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### For ML Developers
|
|
32
|
+
Python bindings for seamless integration with existing ML pipelines.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
**Install:** `npm install adaptive-memory-multi-model-router`
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TMLPD Prefix Cache - RadixAttention Style
|
|
3
|
+
*
|
|
4
|
+
* Inspired by SGLang's RadixAttention (arXiv:2312.07104)
|
|
5
|
+
* Caches KV states for common prefixes (system prompts, etc.)
|
|
6
|
+
* 5-10x speedup for repeated prompt patterns
|
|
7
|
+
*/
|
|
8
|
+
export interface CacheEntry {
|
|
9
|
+
key: string;
|
|
10
|
+
prefix: string;
|
|
11
|
+
kv_state?: Buffer;
|
|
12
|
+
response_hash?: string;
|
|
13
|
+
hit_count: number;
|
|
14
|
+
last_used: number;
|
|
15
|
+
token_count: number;
|
|
16
|
+
children: Map<string, string>;
|
|
17
|
+
}
|
|
18
|
+
export interface PrefixCacheStats {
|
|
19
|
+
total_entries: number;
|
|
20
|
+
total_hits: number;
|
|
21
|
+
total_misses: number;
|
|
22
|
+
hit_rate: number;
|
|
23
|
+
memory_estimate_mb: number;
|
|
24
|
+
oldest_entry_age_ms: number;
|
|
25
|
+
}
|
|
26
|
+
export declare class PrefixCache {
|
|
27
|
+
private entries;
|
|
28
|
+
private access_order;
|
|
29
|
+
private max_entries;
|
|
30
|
+
private max_memory_mb;
|
|
31
|
+
constructor(options?: {
|
|
32
|
+
max_entries?: number;
|
|
33
|
+
max_memory_mb?: number;
|
|
34
|
+
});
|
|
35
|
+
/**
|
|
36
|
+
* Generate cache key from text prefix
|
|
37
|
+
*/
|
|
38
|
+
private generateKey;
|
|
39
|
+
/**
|
|
40
|
+
* Check if prefix is cached
|
|
41
|
+
*/
|
|
42
|
+
has(prefix: string, model?: string): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Get cached entry
|
|
45
|
+
*/
|
|
46
|
+
get(prefix: string, model?: string): CacheEntry | undefined;
|
|
47
|
+
/**
|
|
48
|
+
* Store a new prefix with its KV state
|
|
49
|
+
*/
|
|
50
|
+
store(prefix: string, options?: {
|
|
51
|
+
kv_state?: Buffer;
|
|
52
|
+
response_hash?: string;
|
|
53
|
+
model?: string;
|
|
54
|
+
children?: Map<string, string>;
|
|
55
|
+
}): string;
|
|
56
|
+
/**
|
|
57
|
+
* Extend cached prefix with completion
|
|
58
|
+
*/
|
|
59
|
+
extend(prefix: string, completion: string, options?: {
|
|
60
|
+
model?: string;
|
|
61
|
+
}): string;
|
|
62
|
+
/**
|
|
63
|
+
* Find common prefix between two texts
|
|
64
|
+
*/
|
|
65
|
+
findCommonPrefix(text1: string, text2: string): string;
|
|
66
|
+
/**
|
|
67
|
+
* Lookup with prefix matching
|
|
68
|
+
* Returns cached entry if any prefix is found
|
|
69
|
+
*/
|
|
70
|
+
lookup(text: string, model?: string): {
|
|
71
|
+
cached: boolean;
|
|
72
|
+
prefix?: string;
|
|
73
|
+
remaining?: string;
|
|
74
|
+
};
|
|
75
|
+
/**
|
|
76
|
+
* Batch lookup for multiple texts
|
|
77
|
+
*/
|
|
78
|
+
lookupBatch(texts: string[], model?: string): Array<{
|
|
79
|
+
cached: boolean;
|
|
80
|
+
prefix?: string;
|
|
81
|
+
remaining?: string;
|
|
82
|
+
}>;
|
|
83
|
+
/**
|
|
84
|
+
* Get cache statistics
|
|
85
|
+
*/
|
|
86
|
+
getStats(): PrefixCacheStats;
|
|
87
|
+
/**
|
|
88
|
+
* Get estimated memory usage
|
|
89
|
+
*/
|
|
90
|
+
private getMemoryUsage;
|
|
91
|
+
/**
|
|
92
|
+
* Update LRU order
|
|
93
|
+
*/
|
|
94
|
+
private updateLRU;
|
|
95
|
+
/**
|
|
96
|
+
* Evict least recently used entry
|
|
97
|
+
*/
|
|
98
|
+
private evictLRU;
|
|
99
|
+
/**
|
|
100
|
+
* Clear all cache
|
|
101
|
+
*/
|
|
102
|
+
clear(): void;
|
|
103
|
+
/**
|
|
104
|
+
* Invalidate entries matching pattern
|
|
105
|
+
*/
|
|
106
|
+
invalidate(pattern?: string): number;
|
|
107
|
+
/**
|
|
108
|
+
* Warm up cache with common system prompts
|
|
109
|
+
*/
|
|
110
|
+
warmup(common_prefixes: string[], model?: string): void;
|
|
111
|
+
}
|
|
112
|
+
export default PrefixCache;
|
|
113
|
+
export declare function createWarmedCache(): PrefixCache;
|
|
114
|
+
//# sourceMappingURL=prefixCache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prefixCache.d.ts","sourceRoot":"","sources":["../../src/cache/prefixCache.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,qBAAa,WAAW;IACtB,OAAO,CAAC,OAAO,CAAsC;IACrD,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,aAAa,CAAS;gBAElB,OAAO,CAAC,EAAE;QACpB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,aAAa,CAAC,EAAE,MAAM,CAAC;KACxB;IAKD;;OAEG;IACH,OAAO,CAAC,WAAW;IAenB;;OAEG;IACH,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO;IAK5C;;OAEG;IACH,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAc3D;;OAEG;IACH,KAAK,CACH,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE;QACR,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAChC,GACA,MAAM;IAsCT;;OAEG;IACH,MAAM,CACJ,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,GAC3B,MAAM;IAoBT;;OAEG;IACH,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM;IAgBtD;;;OAGG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG;QAAE,MAAM,EAAE,OAAO,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE;IAsB9F;;OAEG;IACH,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC;QAAE,MAAM,EAAE,OAAO,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAI7G;;OAEG;IACH,QAAQ,IAAI,gBAAgB;IAwB5B;;OAEG;IACH,OAAO,CAAC,cAAc;IAsBtB;;OAEG;IACH,OAAO,CAAC,SAAS;IAQjB;;OAEG;IACH,OAAO,CAAC,QAAQ;IAuBhB;;OAEG;IACH,KAAK,IAAI,IAAI;IAKb;;OAEG;IACH,UAAU,CAAC,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM;IAqBpC;;OAEG;IACH,MAAM,CAAC,eAAe,EAAE,MAAM,EAAE,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI;CAMxD;AAcD,eAAe,WAAW,CAAC;AAG3B,wBAAgB,iBAAiB,IAAI,WAAW,CAI/C"}
|