adaptive-memory-multi-model-router 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +146 -66
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/integrations/airtable.js +20 -0
- package/dist/integrations/discord.js +18 -0
- package/dist/integrations/github.js +23 -0
- package/dist/integrations/gmail.js +19 -0
- package/dist/integrations/google-calendar.js +18 -0
- package/dist/integrations/index.js +61 -0
- package/dist/integrations/jira.js +21 -0
- package/dist/integrations/linear.js +19 -0
- package/dist/integrations/notion.js +19 -0
- package/dist/integrations/slack.js +18 -0
- package/dist/integrations/telegram.js +19 -0
- package/dist/providers/registry.js +7 -3
- package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
- package/docs/CONFIGURATION.md +476 -0
- package/docs/COUNCIL_DECISION.json +308 -0
- package/docs/COUNCIL_SUMMARY.md +265 -0
- package/docs/COUNCIL_V2.2_DECISION.md +416 -0
- package/docs/IMPROVEMENT_ROADMAP.md +515 -0
- package/docs/LLM_COUNCIL_DECISION.md +508 -0
- package/docs/QUICK_START_VISIBILITY.md +782 -0
- package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
- package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
- package/docs/TMLPD_QNA.md +751 -0
- package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
- package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
- package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
- package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
- package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
- package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
- package/docs/launch-content/README.md +457 -0
- package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
- package/docs/launch-content/assets/cumulative_savings.png +0 -0
- package/docs/launch-content/assets/parallel_speedup.png +0 -0
- package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
- package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
- package/docs/launch-content/generate_charts.py +313 -0
- package/docs/launch-content/hn_show_post.md +139 -0
- package/docs/launch-content/partner_outreach_templates.md +745 -0
- package/docs/launch-content/reddit_posts.md +467 -0
- package/docs/launch-content/twitter_thread.txt +460 -0
- package/examples/QUICKSTART.md +1 -1
- package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
- package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
- package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
- package/openclaw-alexa-bridge/test_fixes.js +77 -0
- package/package.json +120 -29
- package/package.json.tmp +0 -0
- package/qna/TMLPD_QNA.md +3 -3
- package/skill/SKILL.md +2 -2
- package/src/__tests__/integration/tmpld_integration.test.py +540 -0
- package/src/agents/skill_enhanced_agent.py +318 -0
- package/src/memory/__init__.py +15 -0
- package/src/memory/agentic_memory.py +353 -0
- package/src/memory/semantic_memory.py +444 -0
- package/src/memory/simple_memory.py +466 -0
- package/src/memory/working_memory.py +447 -0
- package/src/orchestration/__init__.py +52 -0
- package/src/orchestration/execution_engine.py +353 -0
- package/src/orchestration/halo_orchestrator.py +367 -0
- package/src/orchestration/mcts_workflow.py +498 -0
- package/src/orchestration/role_assigner.py +473 -0
- package/src/orchestration/task_planner.py +522 -0
- package/src/providers/__init__.py +67 -0
- package/src/providers/anthropic.py +304 -0
- package/src/providers/base.py +241 -0
- package/src/providers/cerebras.py +373 -0
- package/src/providers/registry.py +476 -0
- package/src/routing/__init__.py +30 -0
- package/src/routing/universal_router.py +621 -0
- package/src/skills/TMLPD-QUICKREF.md +210 -0
- package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
- package/src/skills/TMLPD.md +540 -0
- package/src/skills/__tests__/skill_manager.test.ts +328 -0
- package/src/skills/skill_manager.py +385 -0
- package/src/skills/test-tmlpd.sh +108 -0
- package/src/skills/tmlpd-category.yaml +67 -0
- package/src/skills/tmlpd-monitoring.yaml +188 -0
- package/src/skills/tmlpd-phase.yaml +132 -0
- package/src/state/__init__.py +17 -0
- package/src/state/simple_checkpoint.py +508 -0
- package/src/tmlpd_agent.py +464 -0
- package/src/tmpld_v2.py +427 -0
- package/src/workflows/__init__.py +18 -0
- package/src/workflows/advanced_difficulty_classifier.py +377 -0
- package/src/workflows/chaining_executor.py +417 -0
- package/src/workflows/difficulty_integration.py +209 -0
- package/src/workflows/orchestrator.py +469 -0
- package/src/workflows/orchestrator_executor.py +456 -0
- package/src/workflows/parallelization_executor.py +382 -0
- package/src/workflows/router.py +311 -0
- package/test_integration_simple.py +86 -0
- package/test_mcts_workflow.py +150 -0
- package/test_templd_integration.py +262 -0
- package/test_universal_router.py +275 -0
- package/tmlpd-pi-extension/README.md +36 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cli.js +59 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
- package/tmlpd-pi-extension/dist/index.d.ts +723 -0
- package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/index.js +239 -0
- package/tmlpd-pi-extension/dist/index.js.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
- package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
- package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
- package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
- package/tmlpd-pi-extension/package-lock.json +75 -0
- package/tmlpd-pi-extension/package.json +172 -0
- package/tmlpd-pi-extension/python/examples.py +53 -0
- package/tmlpd-pi-extension/python/integrations.py +330 -0
- package/tmlpd-pi-extension/python/setup.py +28 -0
- package/tmlpd-pi-extension/python/tmlpd.py +369 -0
- package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
- package/tmlpd-pi-extension/skill/SKILL.md +238 -0
- package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
- package/tmlpd-pi-extension/tsconfig.json +18 -0
- package/demo/research-demo.js +0 -266
- package/notebooks/quickstart.ipynb +0 -157
- package/rust/tmlpd.h +0 -268
- package/src/cache/prefixCache.ts +0 -365
- package/src/routing/advancedRouter.ts +0 -406
- package/src/utils/speculativeDecoding.ts +0 -344
- /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cost-Optimized Providers - Agent 3 Output
|
|
3
|
+
|
|
4
|
+
Implements Cerebras and Groq providers for cost-effective inference.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import time
|
|
9
|
+
from typing import Dict, Any
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
from .base import BaseProvider, ProviderConfig, ProviderResponse, ProviderHealth
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CerebrasProvider(BaseProvider):
|
|
17
|
+
"""
|
|
18
|
+
Cerebras provider - Extremely fast LLaMA inference.
|
|
19
|
+
|
|
20
|
+
Key advantage: 10x faster than standard inference, very low cost.
|
|
21
|
+
Ideal for: Simple tasks, prototyping, high-volume requests.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# Pricing (per 1M tokens - significantly cheaper)
|
|
25
|
+
PRICING = {
|
|
26
|
+
"llama-3.3-70b": {"input": 0.10, "output": 0.10},
|
|
27
|
+
"llama-3.1-8b": {"input": 0.05, "output": 0.05},
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def __init__(self, config: ProviderConfig):
|
|
31
|
+
super().__init__(config)
|
|
32
|
+
self.api_key = os.getenv(config.api_key_env)
|
|
33
|
+
|
|
34
|
+
if not self.api_key:
|
|
35
|
+
raise ValueError(f"API key not found: {config.api_key_env}")
|
|
36
|
+
|
|
37
|
+
async def execute(
|
|
38
|
+
self,
|
|
39
|
+
prompt: str,
|
|
40
|
+
max_tokens: int = 2000,
|
|
41
|
+
temperature: float = 0.7,
|
|
42
|
+
**kwargs
|
|
43
|
+
) -> ProviderResponse:
|
|
44
|
+
"""
|
|
45
|
+
Execute prompt with Cerebras (LLaMA on Cerebras hardware).
|
|
46
|
+
|
|
47
|
+
Note: Placeholder implementation.
|
|
48
|
+
In production, use actual Cerebras SDK or API.
|
|
49
|
+
"""
|
|
50
|
+
start_time = time.time()
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
# Simulate Cerebras fast inference (0.1s - much faster!)
|
|
54
|
+
response_content = await self._mock_api_call(prompt, max_tokens)
|
|
55
|
+
|
|
56
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
57
|
+
estimated_tokens = len(prompt) // 4 + len(response_content) // 4
|
|
58
|
+
|
|
59
|
+
response = ProviderResponse(
|
|
60
|
+
success=True,
|
|
61
|
+
content=response_content,
|
|
62
|
+
tokens_used=estimated_tokens,
|
|
63
|
+
cost=self.calculate_cost(estimated_tokens),
|
|
64
|
+
latency_ms=latency_ms,
|
|
65
|
+
model=self.config.model,
|
|
66
|
+
provider=self.config.name,
|
|
67
|
+
timestamp=datetime.now().isoformat(),
|
|
68
|
+
metadata={
|
|
69
|
+
"temperature": temperature,
|
|
70
|
+
"max_tokens": max_tokens,
|
|
71
|
+
"inference_speed": "ultra_fast"
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return response
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
79
|
+
|
|
80
|
+
return ProviderResponse(
|
|
81
|
+
success=False,
|
|
82
|
+
content="",
|
|
83
|
+
tokens_used=0,
|
|
84
|
+
cost=0.0,
|
|
85
|
+
latency_ms=latency_ms,
|
|
86
|
+
model=self.config.model,
|
|
87
|
+
provider=self.config.name,
|
|
88
|
+
timestamp=datetime.now().isoformat(),
|
|
89
|
+
metadata={},
|
|
90
|
+
error=str(e)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
|
|
94
|
+
"""Mock Cerebras API call (ultra fast!)"""
|
|
95
|
+
await asyncio.sleep(0.1) # 10x faster than others
|
|
96
|
+
return f"[Cerebras {self.config.model}] Fast response to: {prompt[:100]}..."
|
|
97
|
+
|
|
98
|
+
def calculate_cost(self, tokens: int) -> float:
|
|
99
|
+
"""Calculate cost - VERY LOW compared to others"""
|
|
100
|
+
pricing = self.PRICING.get(self.config.model, {"input": 0.10, "output": 0.10})
|
|
101
|
+
|
|
102
|
+
input_cost = (tokens / 2) * pricing["input"] / 1_000_000
|
|
103
|
+
output_cost = (tokens / 2) * pricing["output"] / 1_000_000
|
|
104
|
+
|
|
105
|
+
return input_cost + output_cost
|
|
106
|
+
|
|
107
|
+
async def health_check(self) -> ProviderHealth:
|
|
108
|
+
"""Check Cerebras API health"""
|
|
109
|
+
try:
|
|
110
|
+
start = time.time()
|
|
111
|
+
await asyncio.sleep(0.05) # Very fast health check
|
|
112
|
+
latency_ms = (time.time() - start) * 1000
|
|
113
|
+
|
|
114
|
+
self.health = ProviderHealth(
|
|
115
|
+
status="healthy",
|
|
116
|
+
last_check=datetime.now().isoformat(),
|
|
117
|
+
consecutive_failures=0,
|
|
118
|
+
latency_ms=latency_ms,
|
|
119
|
+
uptime_percentage=100.0
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return self.health
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
self.health = ProviderHealth(
|
|
126
|
+
status="unhealthy",
|
|
127
|
+
last_check=datetime.now().isoformat(),
|
|
128
|
+
consecutive_failures=self.health.consecutive_failures + 1,
|
|
129
|
+
last_error=str(e)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return self.health
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class GroqProvider(BaseProvider):
|
|
136
|
+
"""
|
|
137
|
+
Groq provider - Fast inference on LPU (Language Processing Units).
|
|
138
|
+
|
|
139
|
+
Key advantage: Very fast, very low cost, open models.
|
|
140
|
+
Ideal for: Trivial/simple tasks, prototyping, high-volume.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
# Pricing (per 1M tokens - extremely low)
|
|
144
|
+
PRICING = {
|
|
145
|
+
"llama-3.3-70b-8192": {"input": 0.59, "output": 0.79},
|
|
146
|
+
"llama-3.1-70b-8192": {"input": 0.59, "output": 0.79},
|
|
147
|
+
"mixtral-8x7b-32768": {"input": 0.27, "output": 0.27},
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
def __init__(self, config: ProviderConfig):
|
|
151
|
+
super().__init__(config)
|
|
152
|
+
self.api_key = os.getenv(config.api_key_env)
|
|
153
|
+
|
|
154
|
+
if not self.api_key:
|
|
155
|
+
raise ValueError(f"API key not found: {config.api_key_env}")
|
|
156
|
+
|
|
157
|
+
async def execute(
|
|
158
|
+
self,
|
|
159
|
+
prompt: str,
|
|
160
|
+
max_tokens: int = 2000,
|
|
161
|
+
temperature: float = 0.7,
|
|
162
|
+
**kwargs
|
|
163
|
+
) -> ProviderResponse:
|
|
164
|
+
"""
|
|
165
|
+
Execute prompt with Groq.
|
|
166
|
+
|
|
167
|
+
Note: Placeholder implementation.
|
|
168
|
+
In production, use Groq SDK:
|
|
169
|
+
```python
|
|
170
|
+
from groq import Groq
|
|
171
|
+
client = Groq(api_key=self.api_key)
|
|
172
|
+
response = client.chat.completions.create(
|
|
173
|
+
model=self.config.model,
|
|
174
|
+
messages=[{"role": "user", "content": prompt}],
|
|
175
|
+
max_tokens=max_tokens,
|
|
176
|
+
temperature=temperature
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
"""
|
|
180
|
+
start_time = time.time()
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
# Simulate Groq fast inference
|
|
184
|
+
response_content = await self._mock_api_call(prompt, max_tokens)
|
|
185
|
+
|
|
186
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
187
|
+
estimated_tokens = len(prompt) // 4 + len(response_content) // 4
|
|
188
|
+
|
|
189
|
+
response = ProviderResponse(
|
|
190
|
+
success=True,
|
|
191
|
+
content=response_content,
|
|
192
|
+
tokens_used=estimated_tokens,
|
|
193
|
+
cost=self.calculate_cost(estimated_tokens),
|
|
194
|
+
latency_ms=latency_ms,
|
|
195
|
+
model=self.config.model,
|
|
196
|
+
provider=self.config.name,
|
|
197
|
+
timestamp=datetime.now().isoformat(),
|
|
198
|
+
metadata={
|
|
199
|
+
"temperature": temperature,
|
|
200
|
+
"max_tokens": max_tokens,
|
|
201
|
+
"inference_speed": "very_fast"
|
|
202
|
+
}
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return response
|
|
206
|
+
|
|
207
|
+
except Exception as e:
|
|
208
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
209
|
+
|
|
210
|
+
return ProviderResponse(
|
|
211
|
+
success=False,
|
|
212
|
+
content="",
|
|
213
|
+
tokens_used=0,
|
|
214
|
+
cost=0.0,
|
|
215
|
+
latency_ms=latency_ms,
|
|
216
|
+
model=self.config.model,
|
|
217
|
+
provider=self.config.name,
|
|
218
|
+
timestamp=datetime.now().isoformat(),
|
|
219
|
+
metadata={},
|
|
220
|
+
error=str(e)
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
|
|
224
|
+
"""Mock Groq API call (fast!)"""
|
|
225
|
+
await asyncio.sleep(0.15)
|
|
226
|
+
return f"[Groq {self.config.model}] Fast response to: {prompt[:100]}..."
|
|
227
|
+
|
|
228
|
+
def calculate_cost(self, tokens: int) -> float:
|
|
229
|
+
"""Calculate cost - LOW compared to premium providers"""
|
|
230
|
+
pricing = self.PRICING.get(self.config.model, {"input": 0.59, "output": 0.79})
|
|
231
|
+
|
|
232
|
+
input_cost = (tokens / 2) * pricing["input"] / 1_000_000
|
|
233
|
+
output_cost = (tokens / 2) * pricing["output"] / 1_000_000
|
|
234
|
+
|
|
235
|
+
return input_cost + output_cost
|
|
236
|
+
|
|
237
|
+
async def health_check(self) -> ProviderHealth:
|
|
238
|
+
"""Check Groq API health"""
|
|
239
|
+
try:
|
|
240
|
+
start = time.time()
|
|
241
|
+
await asyncio.sleep(0.1)
|
|
242
|
+
latency_ms = (time.time() - start) * 1000
|
|
243
|
+
|
|
244
|
+
self.health = ProviderHealth(
|
|
245
|
+
status="healthy",
|
|
246
|
+
last_check=datetime.now().isoformat(),
|
|
247
|
+
consecutive_failures=0,
|
|
248
|
+
latency_ms=latency_ms,
|
|
249
|
+
uptime_percentage=100.0
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return self.health
|
|
253
|
+
|
|
254
|
+
except Exception as e:
|
|
255
|
+
self.health = ProviderHealth(
|
|
256
|
+
status="unhealthy",
|
|
257
|
+
last_check=datetime.now().isoformat(),
|
|
258
|
+
consecutive_failures=self.health.consecutive_failures + 1,
|
|
259
|
+
last_error=str(e)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
return self.health
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class TogetherProvider(BaseProvider):
|
|
266
|
+
"""
|
|
267
|
+
Together AI provider - Open models at competitive pricing.
|
|
268
|
+
|
|
269
|
+
Key advantage: Wide variety of open-source models, good price/performance.
|
|
270
|
+
Ideal for: Specific model requirements, cost optimization.
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
# Pricing (per 1M tokens)
|
|
274
|
+
PRICING = {
|
|
275
|
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": {"input": 0.50, "output": 0.50},
|
|
276
|
+
"meta-llama/Llama-3-70b-chat-hf": {"input": 0.70, "output": 0.70},
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
def __init__(self, config: ProviderConfig):
|
|
280
|
+
super().__init__(config)
|
|
281
|
+
self.api_key = os.getenv(config.api_key_env)
|
|
282
|
+
|
|
283
|
+
if not self.api_key:
|
|
284
|
+
raise ValueError(f"API key not found: {config.api_key_env}")
|
|
285
|
+
|
|
286
|
+
async def execute(
|
|
287
|
+
self,
|
|
288
|
+
prompt: str,
|
|
289
|
+
max_tokens: int = 2000,
|
|
290
|
+
temperature: float = 0.7,
|
|
291
|
+
**kwargs
|
|
292
|
+
) -> ProviderResponse:
|
|
293
|
+
"""Execute prompt with Together AI"""
|
|
294
|
+
start_time = time.time()
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
response_content = await self._mock_api_call(prompt, max_tokens)
|
|
298
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
299
|
+
estimated_tokens = len(prompt) // 4 + len(response_content) // 4
|
|
300
|
+
|
|
301
|
+
response = ProviderResponse(
|
|
302
|
+
success=True,
|
|
303
|
+
content=response_content,
|
|
304
|
+
tokens_used=estimated_tokens,
|
|
305
|
+
cost=self.calculate_cost(estimated_tokens),
|
|
306
|
+
latency_ms=latency_ms,
|
|
307
|
+
model=self.config.model,
|
|
308
|
+
provider=self.config.name,
|
|
309
|
+
timestamp=datetime.now().isoformat(),
|
|
310
|
+
metadata={
|
|
311
|
+
"temperature": temperature,
|
|
312
|
+
"max_tokens": max_tokens
|
|
313
|
+
}
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
return response
|
|
317
|
+
|
|
318
|
+
except Exception as e:
|
|
319
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
320
|
+
|
|
321
|
+
return ProviderResponse(
|
|
322
|
+
success=False,
|
|
323
|
+
content="",
|
|
324
|
+
tokens_used=0,
|
|
325
|
+
cost=0.0,
|
|
326
|
+
latency_ms=latency_ms,
|
|
327
|
+
model=self.config.model,
|
|
328
|
+
provider=self.config.name,
|
|
329
|
+
timestamp=datetime.now().isoformat(),
|
|
330
|
+
metadata={},
|
|
331
|
+
error=str(e)
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
|
|
335
|
+
"""Mock Together API call"""
|
|
336
|
+
await asyncio.sleep(0.3)
|
|
337
|
+
return f"[Together {self.config.model}] Response to: {prompt[:100]}..."
|
|
338
|
+
|
|
339
|
+
def calculate_cost(self, tokens: int) -> float:
|
|
340
|
+
"""Calculate cost"""
|
|
341
|
+
pricing = self.PRICING.get(self.config.model, {"input": 0.50, "output": 0.50})
|
|
342
|
+
|
|
343
|
+
input_cost = (tokens / 2) * pricing["input"] / 1_000_000
|
|
344
|
+
output_cost = (tokens / 2) * pricing["output"] / 1_000_000
|
|
345
|
+
|
|
346
|
+
return input_cost + output_cost
|
|
347
|
+
|
|
348
|
+
async def health_check(self) -> ProviderHealth:
|
|
349
|
+
"""Check Together API health"""
|
|
350
|
+
try:
|
|
351
|
+
start = time.time()
|
|
352
|
+
await asyncio.sleep(0.2)
|
|
353
|
+
latency_ms = (time.time() - start) * 1000
|
|
354
|
+
|
|
355
|
+
self.health = ProviderHealth(
|
|
356
|
+
status="healthy",
|
|
357
|
+
last_check=datetime.now().isoformat(),
|
|
358
|
+
consecutive_failures=0,
|
|
359
|
+
latency_ms=latency_ms,
|
|
360
|
+
uptime_percentage=100.0
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
return self.health
|
|
364
|
+
|
|
365
|
+
except Exception as e:
|
|
366
|
+
self.health = ProviderHealth(
|
|
367
|
+
status="unhealthy",
|
|
368
|
+
last_check=datetime.now().isoformat(),
|
|
369
|
+
consecutive_failures=self.health.consecutive_failures + 1,
|
|
370
|
+
last_error=str(e)
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
return self.health
|