adaptive-memory-multi-model-router 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -66
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/integrations/airtable.js +20 -0
  6. package/dist/integrations/discord.js +18 -0
  7. package/dist/integrations/github.js +23 -0
  8. package/dist/integrations/gmail.js +19 -0
  9. package/dist/integrations/google-calendar.js +18 -0
  10. package/dist/integrations/index.js +61 -0
  11. package/dist/integrations/jira.js +21 -0
  12. package/dist/integrations/linear.js +19 -0
  13. package/dist/integrations/notion.js +19 -0
  14. package/dist/integrations/slack.js +18 -0
  15. package/dist/integrations/telegram.js +19 -0
  16. package/dist/providers/registry.js +7 -3
  17. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
  18. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
  19. package/docs/CONFIGURATION.md +476 -0
  20. package/docs/COUNCIL_DECISION.json +308 -0
  21. package/docs/COUNCIL_SUMMARY.md +265 -0
  22. package/docs/COUNCIL_V2.2_DECISION.md +416 -0
  23. package/docs/IMPROVEMENT_ROADMAP.md +515 -0
  24. package/docs/LLM_COUNCIL_DECISION.md +508 -0
  25. package/docs/QUICK_START_VISIBILITY.md +782 -0
  26. package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
  27. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
  28. package/docs/TMLPD_QNA.md +751 -0
  29. package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
  30. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
  31. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
  32. package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
  33. package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
  34. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
  35. package/docs/launch-content/README.md +457 -0
  36. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  37. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  38. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  39. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  40. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  41. package/docs/launch-content/generate_charts.py +313 -0
  42. package/docs/launch-content/hn_show_post.md +139 -0
  43. package/docs/launch-content/partner_outreach_templates.md +745 -0
  44. package/docs/launch-content/reddit_posts.md +467 -0
  45. package/docs/launch-content/twitter_thread.txt +460 -0
  46. package/examples/QUICKSTART.md +1 -1
  47. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
  48. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
  49. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
  50. package/openclaw-alexa-bridge/test_fixes.js +77 -0
  51. package/package.json +120 -29
  52. package/package.json.tmp +0 -0
  53. package/qna/TMLPD_QNA.md +3 -3
  54. package/skill/SKILL.md +2 -2
  55. package/src/__tests__/integration/tmpld_integration.test.py +540 -0
  56. package/src/agents/skill_enhanced_agent.py +318 -0
  57. package/src/memory/__init__.py +15 -0
  58. package/src/memory/agentic_memory.py +353 -0
  59. package/src/memory/semantic_memory.py +444 -0
  60. package/src/memory/simple_memory.py +466 -0
  61. package/src/memory/working_memory.py +447 -0
  62. package/src/orchestration/__init__.py +52 -0
  63. package/src/orchestration/execution_engine.py +353 -0
  64. package/src/orchestration/halo_orchestrator.py +367 -0
  65. package/src/orchestration/mcts_workflow.py +498 -0
  66. package/src/orchestration/role_assigner.py +473 -0
  67. package/src/orchestration/task_planner.py +522 -0
  68. package/src/providers/__init__.py +67 -0
  69. package/src/providers/anthropic.py +304 -0
  70. package/src/providers/base.py +241 -0
  71. package/src/providers/cerebras.py +373 -0
  72. package/src/providers/registry.py +476 -0
  73. package/src/routing/__init__.py +30 -0
  74. package/src/routing/universal_router.py +621 -0
  75. package/src/skills/TMLPD-QUICKREF.md +210 -0
  76. package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
  77. package/src/skills/TMLPD.md +540 -0
  78. package/src/skills/__tests__/skill_manager.test.ts +328 -0
  79. package/src/skills/skill_manager.py +385 -0
  80. package/src/skills/test-tmlpd.sh +108 -0
  81. package/src/skills/tmlpd-category.yaml +67 -0
  82. package/src/skills/tmlpd-monitoring.yaml +188 -0
  83. package/src/skills/tmlpd-phase.yaml +132 -0
  84. package/src/state/__init__.py +17 -0
  85. package/src/state/simple_checkpoint.py +508 -0
  86. package/src/tmlpd_agent.py +464 -0
  87. package/src/tmpld_v2.py +427 -0
  88. package/src/workflows/__init__.py +18 -0
  89. package/src/workflows/advanced_difficulty_classifier.py +377 -0
  90. package/src/workflows/chaining_executor.py +417 -0
  91. package/src/workflows/difficulty_integration.py +209 -0
  92. package/src/workflows/orchestrator.py +469 -0
  93. package/src/workflows/orchestrator_executor.py +456 -0
  94. package/src/workflows/parallelization_executor.py +382 -0
  95. package/src/workflows/router.py +311 -0
  96. package/test_integration_simple.py +86 -0
  97. package/test_mcts_workflow.py +150 -0
  98. package/test_templd_integration.py +262 -0
  99. package/test_universal_router.py +275 -0
  100. package/tmlpd-pi-extension/README.md +36 -0
  101. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
  102. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
  103. package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
  104. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
  105. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
  106. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
  107. package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
  108. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
  109. package/tmlpd-pi-extension/dist/cli.js +59 -0
  110. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
  111. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
  112. package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
  113. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
  114. package/tmlpd-pi-extension/dist/index.d.ts +723 -0
  115. package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
  116. package/tmlpd-pi-extension/dist/index.js +239 -0
  117. package/tmlpd-pi-extension/dist/index.js.map +1 -0
  118. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
  119. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
  120. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
  121. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
  122. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
  123. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  124. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
  125. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
  126. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
  127. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  128. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
  129. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
  130. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
  131. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
  132. package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
  133. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
  134. package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
  135. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
  136. package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
  137. package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
  138. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
  139. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
  140. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
  141. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
  142. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
  143. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
  144. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
  145. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
  146. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
  147. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
  148. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
  149. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
  150. package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
  151. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
  152. package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
  153. package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
  154. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
  155. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
  156. package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
  157. package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
  158. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
  159. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
  160. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
  161. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
  162. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
  163. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
  164. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
  165. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
  166. package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
  167. package/tmlpd-pi-extension/package-lock.json +75 -0
  168. package/tmlpd-pi-extension/package.json +172 -0
  169. package/tmlpd-pi-extension/python/examples.py +53 -0
  170. package/tmlpd-pi-extension/python/integrations.py +330 -0
  171. package/tmlpd-pi-extension/python/setup.py +28 -0
  172. package/tmlpd-pi-extension/python/tmlpd.py +369 -0
  173. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  174. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
  175. package/tmlpd-pi-extension/skill/SKILL.md +238 -0
  176. package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
  177. package/tmlpd-pi-extension/tsconfig.json +18 -0
  178. package/demo/research-demo.js +0 -266
  179. package/notebooks/quickstart.ipynb +0 -157
  180. package/rust/tmlpd.h +0 -268
  181. package/src/cache/prefixCache.ts +0 -365
  182. package/src/routing/advancedRouter.ts +0 -406
  183. package/src/utils/speculativeDecoding.ts +0 -344
  184. /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
  185. /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
  186. /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
  187. /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
  188. /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
  189. /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
  190. /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
  191. /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
  192. /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
  193. /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
  194. /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
  195. /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
@@ -0,0 +1,373 @@
1
+ """
2
+ Cost-Optimized Providers - Agent 3 Output
3
+
4
+ Implements Cerebras and Groq providers for cost-effective inference.
5
+ """
6
+
7
+ import os
8
+ import time
9
+ from typing import Dict, Any
10
+ from datetime import datetime
11
+ import asyncio
12
+
13
+ from .base import BaseProvider, ProviderConfig, ProviderResponse, ProviderHealth
14
+
15
+
16
+ class CerebrasProvider(BaseProvider):
17
+ """
18
+ Cerebras provider - Extremely fast LLaMA inference.
19
+
20
+ Key advantage: 10x faster than standard inference, very low cost.
21
+ Ideal for: Simple tasks, prototyping, high-volume requests.
22
+ """
23
+
24
+ # Pricing (per 1M tokens - significantly cheaper)
25
+ PRICING = {
26
+ "llama-3.3-70b": {"input": 0.10, "output": 0.10},
27
+ "llama-3.1-8b": {"input": 0.05, "output": 0.05},
28
+ }
29
+
30
+ def __init__(self, config: ProviderConfig):
31
+ super().__init__(config)
32
+ self.api_key = os.getenv(config.api_key_env)
33
+
34
+ if not self.api_key:
35
+ raise ValueError(f"API key not found: {config.api_key_env}")
36
+
37
+ async def execute(
38
+ self,
39
+ prompt: str,
40
+ max_tokens: int = 2000,
41
+ temperature: float = 0.7,
42
+ **kwargs
43
+ ) -> ProviderResponse:
44
+ """
45
+ Execute prompt with Cerebras (LLaMA on Cerebras hardware).
46
+
47
+ Note: Placeholder implementation.
48
+ In production, use actual Cerebras SDK or API.
49
+ """
50
+ start_time = time.time()
51
+
52
+ try:
53
+ # Simulate Cerebras fast inference (0.1s - much faster!)
54
+ response_content = await self._mock_api_call(prompt, max_tokens)
55
+
56
+ latency_ms = (time.time() - start_time) * 1000
57
+ estimated_tokens = len(prompt) // 4 + len(response_content) // 4
58
+
59
+ response = ProviderResponse(
60
+ success=True,
61
+ content=response_content,
62
+ tokens_used=estimated_tokens,
63
+ cost=self.calculate_cost(estimated_tokens),
64
+ latency_ms=latency_ms,
65
+ model=self.config.model,
66
+ provider=self.config.name,
67
+ timestamp=datetime.now().isoformat(),
68
+ metadata={
69
+ "temperature": temperature,
70
+ "max_tokens": max_tokens,
71
+ "inference_speed": "ultra_fast"
72
+ }
73
+ )
74
+
75
+ return response
76
+
77
+ except Exception as e:
78
+ latency_ms = (time.time() - start_time) * 1000
79
+
80
+ return ProviderResponse(
81
+ success=False,
82
+ content="",
83
+ tokens_used=0,
84
+ cost=0.0,
85
+ latency_ms=latency_ms,
86
+ model=self.config.model,
87
+ provider=self.config.name,
88
+ timestamp=datetime.now().isoformat(),
89
+ metadata={},
90
+ error=str(e)
91
+ )
92
+
93
+ async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
94
+ """Mock Cerebras API call (ultra fast!)"""
95
+ await asyncio.sleep(0.1) # 10x faster than others
96
+ return f"[Cerebras {self.config.model}] Fast response to: {prompt[:100]}..."
97
+
98
+ def calculate_cost(self, tokens: int) -> float:
99
+ """Calculate cost - VERY LOW compared to others"""
100
+ pricing = self.PRICING.get(self.config.model, {"input": 0.10, "output": 0.10})
101
+
102
+ input_cost = (tokens / 2) * pricing["input"] / 1_000_000
103
+ output_cost = (tokens / 2) * pricing["output"] / 1_000_000
104
+
105
+ return input_cost + output_cost
106
+
107
+ async def health_check(self) -> ProviderHealth:
108
+ """Check Cerebras API health"""
109
+ try:
110
+ start = time.time()
111
+ await asyncio.sleep(0.05) # Very fast health check
112
+ latency_ms = (time.time() - start) * 1000
113
+
114
+ self.health = ProviderHealth(
115
+ status="healthy",
116
+ last_check=datetime.now().isoformat(),
117
+ consecutive_failures=0,
118
+ latency_ms=latency_ms,
119
+ uptime_percentage=100.0
120
+ )
121
+
122
+ return self.health
123
+
124
+ except Exception as e:
125
+ self.health = ProviderHealth(
126
+ status="unhealthy",
127
+ last_check=datetime.now().isoformat(),
128
+ consecutive_failures=self.health.consecutive_failures + 1,
129
+ last_error=str(e)
130
+ )
131
+
132
+ return self.health
133
+
134
+
135
+ class GroqProvider(BaseProvider):
136
+ """
137
+ Groq provider - Fast inference on LPU (Language Processing Units).
138
+
139
+ Key advantage: Very fast, very low cost, open models.
140
+ Ideal for: Trivial/simple tasks, prototyping, high-volume.
141
+ """
142
+
143
+ # Pricing (per 1M tokens - extremely low)
144
+ PRICING = {
145
+ "llama-3.3-70b-8192": {"input": 0.59, "output": 0.79},
146
+ "llama-3.1-70b-8192": {"input": 0.59, "output": 0.79},
147
+ "mixtral-8x7b-32768": {"input": 0.27, "output": 0.27},
148
+ }
149
+
150
+ def __init__(self, config: ProviderConfig):
151
+ super().__init__(config)
152
+ self.api_key = os.getenv(config.api_key_env)
153
+
154
+ if not self.api_key:
155
+ raise ValueError(f"API key not found: {config.api_key_env}")
156
+
157
+ async def execute(
158
+ self,
159
+ prompt: str,
160
+ max_tokens: int = 2000,
161
+ temperature: float = 0.7,
162
+ **kwargs
163
+ ) -> ProviderResponse:
164
+ """
165
+ Execute prompt with Groq.
166
+
167
+ Note: Placeholder implementation.
168
+ In production, use Groq SDK:
169
+ ```python
170
+ from groq import Groq
171
+ client = Groq(api_key=self.api_key)
172
+ response = client.chat.completions.create(
173
+ model=self.config.model,
174
+ messages=[{"role": "user", "content": prompt}],
175
+ max_tokens=max_tokens,
176
+ temperature=temperature
177
+ )
178
+ ```
179
+ """
180
+ start_time = time.time()
181
+
182
+ try:
183
+ # Simulate Groq fast inference
184
+ response_content = await self._mock_api_call(prompt, max_tokens)
185
+
186
+ latency_ms = (time.time() - start_time) * 1000
187
+ estimated_tokens = len(prompt) // 4 + len(response_content) // 4
188
+
189
+ response = ProviderResponse(
190
+ success=True,
191
+ content=response_content,
192
+ tokens_used=estimated_tokens,
193
+ cost=self.calculate_cost(estimated_tokens),
194
+ latency_ms=latency_ms,
195
+ model=self.config.model,
196
+ provider=self.config.name,
197
+ timestamp=datetime.now().isoformat(),
198
+ metadata={
199
+ "temperature": temperature,
200
+ "max_tokens": max_tokens,
201
+ "inference_speed": "very_fast"
202
+ }
203
+ )
204
+
205
+ return response
206
+
207
+ except Exception as e:
208
+ latency_ms = (time.time() - start_time) * 1000
209
+
210
+ return ProviderResponse(
211
+ success=False,
212
+ content="",
213
+ tokens_used=0,
214
+ cost=0.0,
215
+ latency_ms=latency_ms,
216
+ model=self.config.model,
217
+ provider=self.config.name,
218
+ timestamp=datetime.now().isoformat(),
219
+ metadata={},
220
+ error=str(e)
221
+ )
222
+
223
+ async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
224
+ """Mock Groq API call (fast!)"""
225
+ await asyncio.sleep(0.15)
226
+ return f"[Groq {self.config.model}] Fast response to: {prompt[:100]}..."
227
+
228
+ def calculate_cost(self, tokens: int) -> float:
229
+ """Calculate cost - LOW compared to premium providers"""
230
+ pricing = self.PRICING.get(self.config.model, {"input": 0.59, "output": 0.79})
231
+
232
+ input_cost = (tokens / 2) * pricing["input"] / 1_000_000
233
+ output_cost = (tokens / 2) * pricing["output"] / 1_000_000
234
+
235
+ return input_cost + output_cost
236
+
237
+ async def health_check(self) -> ProviderHealth:
238
+ """Check Groq API health"""
239
+ try:
240
+ start = time.time()
241
+ await asyncio.sleep(0.1)
242
+ latency_ms = (time.time() - start) * 1000
243
+
244
+ self.health = ProviderHealth(
245
+ status="healthy",
246
+ last_check=datetime.now().isoformat(),
247
+ consecutive_failures=0,
248
+ latency_ms=latency_ms,
249
+ uptime_percentage=100.0
250
+ )
251
+
252
+ return self.health
253
+
254
+ except Exception as e:
255
+ self.health = ProviderHealth(
256
+ status="unhealthy",
257
+ last_check=datetime.now().isoformat(),
258
+ consecutive_failures=self.health.consecutive_failures + 1,
259
+ last_error=str(e)
260
+ )
261
+
262
+ return self.health
263
+
264
+
265
+ class TogetherProvider(BaseProvider):
266
+ """
267
+ Together AI provider - Open models at competitive pricing.
268
+
269
+ Key advantage: Wide variety of open-source models, good price/performance.
270
+ Ideal for: Specific model requirements, cost optimization.
271
+ """
272
+
273
+ # Pricing (per 1M tokens)
274
+ PRICING = {
275
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": {"input": 0.50, "output": 0.50},
276
+ "meta-llama/Llama-3-70b-chat-hf": {"input": 0.70, "output": 0.70},
277
+ }
278
+
279
+ def __init__(self, config: ProviderConfig):
280
+ super().__init__(config)
281
+ self.api_key = os.getenv(config.api_key_env)
282
+
283
+ if not self.api_key:
284
+ raise ValueError(f"API key not found: {config.api_key_env}")
285
+
286
+ async def execute(
287
+ self,
288
+ prompt: str,
289
+ max_tokens: int = 2000,
290
+ temperature: float = 0.7,
291
+ **kwargs
292
+ ) -> ProviderResponse:
293
+ """Execute prompt with Together AI"""
294
+ start_time = time.time()
295
+
296
+ try:
297
+ response_content = await self._mock_api_call(prompt, max_tokens)
298
+ latency_ms = (time.time() - start_time) * 1000
299
+ estimated_tokens = len(prompt) // 4 + len(response_content) // 4
300
+
301
+ response = ProviderResponse(
302
+ success=True,
303
+ content=response_content,
304
+ tokens_used=estimated_tokens,
305
+ cost=self.calculate_cost(estimated_tokens),
306
+ latency_ms=latency_ms,
307
+ model=self.config.model,
308
+ provider=self.config.name,
309
+ timestamp=datetime.now().isoformat(),
310
+ metadata={
311
+ "temperature": temperature,
312
+ "max_tokens": max_tokens
313
+ }
314
+ )
315
+
316
+ return response
317
+
318
+ except Exception as e:
319
+ latency_ms = (time.time() - start_time) * 1000
320
+
321
+ return ProviderResponse(
322
+ success=False,
323
+ content="",
324
+ tokens_used=0,
325
+ cost=0.0,
326
+ latency_ms=latency_ms,
327
+ model=self.config.model,
328
+ provider=self.config.name,
329
+ timestamp=datetime.now().isoformat(),
330
+ metadata={},
331
+ error=str(e)
332
+ )
333
+
334
+ async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
335
+ """Mock Together API call"""
336
+ await asyncio.sleep(0.3)
337
+ return f"[Together {self.config.model}] Response to: {prompt[:100]}..."
338
+
339
+ def calculate_cost(self, tokens: int) -> float:
340
+ """Calculate cost"""
341
+ pricing = self.PRICING.get(self.config.model, {"input": 0.50, "output": 0.50})
342
+
343
+ input_cost = (tokens / 2) * pricing["input"] / 1_000_000
344
+ output_cost = (tokens / 2) * pricing["output"] / 1_000_000
345
+
346
+ return input_cost + output_cost
347
+
348
+ async def health_check(self) -> ProviderHealth:
349
+ """Check Together API health"""
350
+ try:
351
+ start = time.time()
352
+ await asyncio.sleep(0.2)
353
+ latency_ms = (time.time() - start) * 1000
354
+
355
+ self.health = ProviderHealth(
356
+ status="healthy",
357
+ last_check=datetime.now().isoformat(),
358
+ consecutive_failures=0,
359
+ latency_ms=latency_ms,
360
+ uptime_percentage=100.0
361
+ )
362
+
363
+ return self.health
364
+
365
+ except Exception as e:
366
+ self.health = ProviderHealth(
367
+ status="unhealthy",
368
+ last_check=datetime.now().isoformat(),
369
+ consecutive_failures=self.health.consecutive_failures + 1,
370
+ last_error=str(e)
371
+ )
372
+
373
+ return self.health