adaptive-memory-multi-model-router 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -66
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/integrations/airtable.js +20 -0
  6. package/dist/integrations/discord.js +18 -0
  7. package/dist/integrations/github.js +23 -0
  8. package/dist/integrations/gmail.js +19 -0
  9. package/dist/integrations/google-calendar.js +18 -0
  10. package/dist/integrations/index.js +61 -0
  11. package/dist/integrations/jira.js +21 -0
  12. package/dist/integrations/linear.js +19 -0
  13. package/dist/integrations/notion.js +19 -0
  14. package/dist/integrations/slack.js +18 -0
  15. package/dist/integrations/telegram.js +19 -0
  16. package/dist/providers/registry.js +7 -3
  17. package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
  18. package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
  19. package/docs/CONFIGURATION.md +476 -0
  20. package/docs/COUNCIL_DECISION.json +308 -0
  21. package/docs/COUNCIL_SUMMARY.md +265 -0
  22. package/docs/COUNCIL_V2.2_DECISION.md +416 -0
  23. package/docs/IMPROVEMENT_ROADMAP.md +515 -0
  24. package/docs/LLM_COUNCIL_DECISION.md +508 -0
  25. package/docs/QUICK_START_VISIBILITY.md +782 -0
  26. package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
  27. package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
  28. package/docs/TMLPD_QNA.md +751 -0
  29. package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
  30. package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
  31. package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
  32. package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
  33. package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
  34. package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
  35. package/docs/launch-content/README.md +457 -0
  36. package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
  37. package/docs/launch-content/assets/cumulative_savings.png +0 -0
  38. package/docs/launch-content/assets/parallel_speedup.png +0 -0
  39. package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
  40. package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
  41. package/docs/launch-content/generate_charts.py +313 -0
  42. package/docs/launch-content/hn_show_post.md +139 -0
  43. package/docs/launch-content/partner_outreach_templates.md +745 -0
  44. package/docs/launch-content/reddit_posts.md +467 -0
  45. package/docs/launch-content/twitter_thread.txt +460 -0
  46. package/examples/QUICKSTART.md +1 -1
  47. package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
  48. package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
  49. package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
  50. package/openclaw-alexa-bridge/test_fixes.js +77 -0
  51. package/package.json +120 -29
  52. package/package.json.tmp +0 -0
  53. package/qna/TMLPD_QNA.md +3 -3
  54. package/skill/SKILL.md +2 -2
  55. package/src/__tests__/integration/tmpld_integration.test.py +540 -0
  56. package/src/agents/skill_enhanced_agent.py +318 -0
  57. package/src/memory/__init__.py +15 -0
  58. package/src/memory/agentic_memory.py +353 -0
  59. package/src/memory/semantic_memory.py +444 -0
  60. package/src/memory/simple_memory.py +466 -0
  61. package/src/memory/working_memory.py +447 -0
  62. package/src/orchestration/__init__.py +52 -0
  63. package/src/orchestration/execution_engine.py +353 -0
  64. package/src/orchestration/halo_orchestrator.py +367 -0
  65. package/src/orchestration/mcts_workflow.py +498 -0
  66. package/src/orchestration/role_assigner.py +473 -0
  67. package/src/orchestration/task_planner.py +522 -0
  68. package/src/providers/__init__.py +67 -0
  69. package/src/providers/anthropic.py +304 -0
  70. package/src/providers/base.py +241 -0
  71. package/src/providers/cerebras.py +373 -0
  72. package/src/providers/registry.py +476 -0
  73. package/src/routing/__init__.py +30 -0
  74. package/src/routing/universal_router.py +621 -0
  75. package/src/skills/TMLPD-QUICKREF.md +210 -0
  76. package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
  77. package/src/skills/TMLPD.md +540 -0
  78. package/src/skills/__tests__/skill_manager.test.ts +328 -0
  79. package/src/skills/skill_manager.py +385 -0
  80. package/src/skills/test-tmlpd.sh +108 -0
  81. package/src/skills/tmlpd-category.yaml +67 -0
  82. package/src/skills/tmlpd-monitoring.yaml +188 -0
  83. package/src/skills/tmlpd-phase.yaml +132 -0
  84. package/src/state/__init__.py +17 -0
  85. package/src/state/simple_checkpoint.py +508 -0
  86. package/src/tmlpd_agent.py +464 -0
  87. package/src/tmpld_v2.py +427 -0
  88. package/src/workflows/__init__.py +18 -0
  89. package/src/workflows/advanced_difficulty_classifier.py +377 -0
  90. package/src/workflows/chaining_executor.py +417 -0
  91. package/src/workflows/difficulty_integration.py +209 -0
  92. package/src/workflows/orchestrator.py +469 -0
  93. package/src/workflows/orchestrator_executor.py +456 -0
  94. package/src/workflows/parallelization_executor.py +382 -0
  95. package/src/workflows/router.py +311 -0
  96. package/test_integration_simple.py +86 -0
  97. package/test_mcts_workflow.py +150 -0
  98. package/test_templd_integration.py +262 -0
  99. package/test_universal_router.py +275 -0
  100. package/tmlpd-pi-extension/README.md +36 -0
  101. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
  102. package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
  103. package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
  104. package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
  105. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
  106. package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
  107. package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
  108. package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
  109. package/tmlpd-pi-extension/dist/cli.js +59 -0
  110. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
  111. package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
  112. package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
  113. package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
  114. package/tmlpd-pi-extension/dist/index.d.ts +723 -0
  115. package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
  116. package/tmlpd-pi-extension/dist/index.js +239 -0
  117. package/tmlpd-pi-extension/dist/index.js.map +1 -0
  118. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
  119. package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
  120. package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
  121. package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
  122. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
  123. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
  124. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
  125. package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
  126. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
  127. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
  128. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
  129. package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
  130. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
  131. package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
  132. package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
  133. package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
  134. package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
  135. package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
  136. package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
  137. package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
  138. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
  139. package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
  140. package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
  141. package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
  142. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
  143. package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
  144. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
  145. package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
  146. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
  147. package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
  148. package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
  149. package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
  150. package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
  151. package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
  152. package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
  153. package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
  154. package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
  155. package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
  156. package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
  157. package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
  158. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
  159. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
  160. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
  161. package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
  162. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
  163. package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
  164. package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
  165. package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
  166. package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
  167. package/tmlpd-pi-extension/package-lock.json +75 -0
  168. package/tmlpd-pi-extension/package.json +172 -0
  169. package/tmlpd-pi-extension/python/examples.py +53 -0
  170. package/tmlpd-pi-extension/python/integrations.py +330 -0
  171. package/tmlpd-pi-extension/python/setup.py +28 -0
  172. package/tmlpd-pi-extension/python/tmlpd.py +369 -0
  173. package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
  174. package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
  175. package/tmlpd-pi-extension/skill/SKILL.md +238 -0
  176. package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
  177. package/tmlpd-pi-extension/tsconfig.json +18 -0
  178. package/demo/research-demo.js +0 -266
  179. package/notebooks/quickstart.ipynb +0 -157
  180. package/rust/tmlpd.h +0 -268
  181. package/src/cache/prefixCache.ts +0 -365
  182. package/src/routing/advancedRouter.ts +0 -406
  183. package/src/utils/speculativeDecoding.ts +0 -344
  184. /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
  185. /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
  186. /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
  187. /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
  188. /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
  189. /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
  190. /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
  191. /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
  192. /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
  193. /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
  194. /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
  195. /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
@@ -0,0 +1,304 @@
1
+ """
2
+ Anthropic Provider Implementation - Agent 2 Output (Part 1)
3
+
4
+ Implements Anthropic Claude API provider.
5
+ """
6
+
7
+ import os
8
+ import time
9
+ from typing import Dict, Any
10
+ from datetime import datetime
11
+
12
+ from .base import BaseProvider, ProviderConfig, ProviderResponse, ProviderHealth
13
+
14
+
15
+ class AnthropicProvider(BaseProvider):
16
+ """
17
+ Anthropic Claude API provider.
18
+
19
+ Supports models: claude-sonnet-4, claude-opus-4, claude-haiku-4
20
+ """
21
+
22
+ # Pricing (per 1M tokens as of 2025)
23
+ PRICING = {
24
+ "claude-sonnet-4": {"input": 3.0, "output": 15.0},
25
+ "claude-opus-4": {"input": 15.0, "output": 75.0},
26
+ "claude-haiku-4": {"input": 0.25, "output": 1.25},
27
+ }
28
+
29
+ def __init__(self, config: ProviderConfig):
30
+ super().__init__(config)
31
+ self.api_key = os.getenv(config.api_key_env)
32
+
33
+ if not self.api_key:
34
+ raise ValueError(f"API key not found: {config.api_key_env}")
35
+
36
+ async def execute(
37
+ self,
38
+ prompt: str,
39
+ max_tokens: int = 2000,
40
+ temperature: float = 0.7,
41
+ **kwargs
42
+ ) -> ProviderResponse:
43
+ """
44
+ Execute prompt with Anthropic Claude.
45
+
46
+ Note: This is a placeholder implementation.
47
+ In production, use actual Anthropic API:
48
+ ```python
49
+ import anthropic
50
+ client = anthropic.Anthropic(api_key=self.api_key)
51
+ message = client.messages.create(
52
+ model=self.config.model,
53
+ max_tokens=max_tokens,
54
+ temperature=temperature,
55
+ messages=[{"role": "user", "content": prompt}]
56
+ )
57
+ ```
58
+ """
59
+ start_time = time.time()
60
+
61
+ try:
62
+ # Placeholder: Simulate API call
63
+ # In production, use actual Anthropic SDK
64
+ response_content = await self._mock_api_call(prompt, max_tokens)
65
+
66
+ latency_ms = (time.time() - start_time) * 1000
67
+
68
+ # Estimate tokens (rough approximation: 1 token ≈ 4 chars)
69
+ estimated_tokens = len(prompt) // 4 + len(response_content) // 4
70
+
71
+ response = ProviderResponse(
72
+ success=True,
73
+ content=response_content,
74
+ tokens_used=estimated_tokens,
75
+ cost=self.calculate_cost(estimated_tokens),
76
+ latency_ms=latency_ms,
77
+ model=self.config.model,
78
+ provider=self.config.name,
79
+ timestamp=datetime.now().isoformat(),
80
+ metadata={
81
+ "temperature": temperature,
82
+ "max_tokens": max_tokens
83
+ }
84
+ )
85
+
86
+ return response
87
+
88
+ except Exception as e:
89
+ latency_ms = (time.time() - start_time) * 1000
90
+
91
+ return ProviderResponse(
92
+ success=False,
93
+ content="",
94
+ tokens_used=0,
95
+ cost=0.0,
96
+ latency_ms=latency_ms,
97
+ model=self.config.model,
98
+ provider=self.config.name,
99
+ timestamp=datetime.now().isoformat(),
100
+ metadata={},
101
+ error=str(e)
102
+ )
103
+
104
+ async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
105
+ """Mock API call for development (replace with real API)"""
106
+ # Simulate network delay
107
+ import asyncio
108
+ await asyncio.sleep(0.5)
109
+
110
+ return f"[Anthropic {self.config.model}] Response to: {prompt[:100]}..."
111
+
112
+ def calculate_cost(self, tokens: int) -> float:
113
+ """
114
+ Calculate cost in USD.
115
+
116
+ Anthropic charges separately for input and output tokens.
117
+ We estimate 50/50 split for simplicity.
118
+ """
119
+ pricing = self.PRICING.get(self.config.model, {"input": 3.0, "output": 15.0})
120
+
121
+ # Assume 50/50 input/output split
122
+ input_cost = (tokens / 2) * pricing["input"] / 1_000_000
123
+ output_cost = (tokens / 2) * pricing["output"] / 1_000_000
124
+
125
+ return input_cost + output_cost
126
+
127
+ async def health_check(self) -> ProviderHealth:
128
+ """
129
+ Check Anthropic API health.
130
+
131
+ In production, make actual API call:
132
+ ```python
133
+ try:
134
+ client = anthropic.Anthropic(api_key=self.api_key)
135
+ # Simple ping
136
+ start = time.time()
137
+ # Make minimal request
138
+ latency = (time.time() - start) * 1000
139
+ return ProviderHealth(status="healthy", latency_ms=latency, ...)
140
+ except Exception:
141
+ return ProviderHealth(status="unhealthy", ...)
142
+ ```
143
+ """
144
+ import asyncio
145
+
146
+ try:
147
+ start = time.time()
148
+
149
+ # Mock health check (replace with real API ping)
150
+ await asyncio.sleep(0.2)
151
+
152
+ latency_ms = (time.time() - start) * 1000
153
+
154
+ self.health = ProviderHealth(
155
+ status="healthy",
156
+ last_check=datetime.now().isoformat(),
157
+ consecutive_failures=0,
158
+ latency_ms=latency_ms,
159
+ uptime_percentage=100.0
160
+ )
161
+
162
+ return self.health
163
+
164
+ except Exception as e:
165
+ self.health = ProviderHealth(
166
+ status="unhealthy",
167
+ last_check=datetime.now().isoformat(),
168
+ consecutive_failures=self.health.consecutive_failures + 1,
169
+ last_error=str(e)
170
+ )
171
+
172
+ return self.health
173
+
174
+
175
+ class OpenAIProvider(BaseProvider):
176
+ """
177
+ OpenAI GPT API provider.
178
+
179
+ Supports models: gpt-4o, gpt-4-turbo, gpt-3.5-turbo
180
+ """
181
+
182
+ # Pricing (per 1M tokens as of 2025)
183
+ PRICING = {
184
+ "gpt-4o": {"input": 2.50, "output": 10.0},
185
+ "gpt-4-turbo": {"input": 10.0, "output": 30.0},
186
+ "gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
187
+ }
188
+
189
+ def __init__(self, config: ProviderConfig):
190
+ super().__init__(config)
191
+ self.api_key = os.getenv(config.api_key_env)
192
+
193
+ if not self.api_key:
194
+ raise ValueError(f"API key not found: {config.api_key_env}")
195
+
196
+ async def execute(
197
+ self,
198
+ prompt: str,
199
+ max_tokens: int = 2000,
200
+ temperature: float = 0.7,
201
+ **kwargs
202
+ ) -> ProviderResponse:
203
+ """
204
+ Execute prompt with OpenAI GPT.
205
+
206
+ Note: This is a placeholder implementation.
207
+ In production, use actual OpenAI API:
208
+ ```python
209
+ from openai import AsyncOpenAI
210
+ client = AsyncOpenAI(api_key=self.api_key)
211
+ response = await client.chat.completions.create(
212
+ model=self.config.model,
213
+ messages=[{"role": "user", "content": prompt}],
214
+ max_tokens=max_tokens,
215
+ temperature=temperature
216
+ )
217
+ ```
218
+ """
219
+ start_time = time.time()
220
+
221
+ try:
222
+ # Placeholder: Simulate API call
223
+ response_content = await self._mock_api_call(prompt, max_tokens)
224
+
225
+ latency_ms = (time.time() - start_time) * 1000
226
+ estimated_tokens = len(prompt) // 4 + len(response_content) // 4
227
+
228
+ response = ProviderResponse(
229
+ success=True,
230
+ content=response_content,
231
+ tokens_used=estimated_tokens,
232
+ cost=self.calculate_cost(estimated_tokens),
233
+ latency_ms=latency_ms,
234
+ model=self.config.model,
235
+ provider=self.config.name,
236
+ timestamp=datetime.now().isoformat(),
237
+ metadata={
238
+ "temperature": temperature,
239
+ "max_tokens": max_tokens
240
+ }
241
+ )
242
+
243
+ return response
244
+
245
+ except Exception as e:
246
+ latency_ms = (time.time() - start_time) * 1000
247
+
248
+ return ProviderResponse(
249
+ success=False,
250
+ content="",
251
+ tokens_used=0,
252
+ cost=0.0,
253
+ latency_ms=latency_ms,
254
+ model=self.config.model,
255
+ provider=self.config.name,
256
+ timestamp=datetime.now().isoformat(),
257
+ metadata={},
258
+ error=str(e)
259
+ )
260
+
261
+ async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
262
+ """Mock API call for development"""
263
+ import asyncio
264
+ await asyncio.sleep(0.4)
265
+
266
+ return f"[OpenAI {self.config.model}] Response to: {prompt[:100]}..."
267
+
268
+ def calculate_cost(self, tokens: int) -> float:
269
+ """Calculate cost in USD"""
270
+ pricing = self.PRICING.get(self.config.model, {"input": 2.50, "output": 10.0})
271
+
272
+ input_cost = (tokens / 2) * pricing["input"] / 1_000_000
273
+ output_cost = (tokens / 2) * pricing["output"] / 1_000_000
274
+
275
+ return input_cost + output_cost
276
+
277
+ async def health_check(self) -> ProviderHealth:
278
+ """Check OpenAI API health"""
279
+ import asyncio
280
+
281
+ try:
282
+ start = time.time()
283
+ await asyncio.sleep(0.2)
284
+ latency_ms = (time.time() - start) * 1000
285
+
286
+ self.health = ProviderHealth(
287
+ status="healthy",
288
+ last_check=datetime.now().isoformat(),
289
+ consecutive_failures=0,
290
+ latency_ms=latency_ms,
291
+ uptime_percentage=100.0
292
+ )
293
+
294
+ return self.health
295
+
296
+ except Exception as e:
297
+ self.health = ProviderHealth(
298
+ status="unhealthy",
299
+ last_check=datetime.now().isoformat(),
300
+ consecutive_failures=self.health.consecutive_failures + 1,
301
+ last_error=str(e)
302
+ )
303
+
304
+ return self.health
@@ -0,0 +1,241 @@
1
+ """
2
+ Base Provider Interface - Agent 1 Output
3
+
4
+ Abstract base class for all LLM providers.
5
+ Provides unified interface for multi-provider system.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import Dict, Any, Optional
10
+ from datetime import datetime
11
+ from dataclasses import dataclass
12
+ import asyncio
13
+
14
+
15
+ @dataclass
16
+ class ProviderConfig:
17
+ """Configuration for a provider"""
18
+ name: str
19
+ model: str
20
+ api_key_env: str
21
+ priority: int = 1
22
+ max_retries: int = 3
23
+ timeout: int = 30
24
+ enabled: bool = True
25
+
26
+
27
+ @dataclass
28
+ class ProviderHealth:
29
+ """Health status of a provider"""
30
+ status: str # "healthy", "degraded", "unhealthy"
31
+ last_check: str
32
+ consecutive_failures: int
33
+ last_error: Optional[str] = None
34
+ latency_ms: Optional[float] = None
35
+ uptime_percentage: float = 100.0
36
+
37
+
38
+ @dataclass
39
+ class ProviderResponse:
40
+ """Standardized response from any provider"""
41
+ success: bool
42
+ content: str
43
+ tokens_used: int
44
+ cost: float
45
+ latency_ms: float
46
+ model: str
47
+ provider: str
48
+ timestamp: str
49
+ metadata: Dict[str, Any]
50
+ error: Optional[str] = None
51
+
52
+
53
+ class BaseProvider(ABC):
54
+ """
55
+ Abstract base class for all LLM providers.
56
+
57
+ All providers must implement these methods to ensure
58
+ consistent interface across the multi-provider system.
59
+ """
60
+
61
+ def __init__(self, config: ProviderConfig):
62
+ self.config = config
63
+ self.health = ProviderHealth(
64
+ status="healthy",
65
+ last_check=datetime.now().isoformat(),
66
+ consecutive_failures=0
67
+ )
68
+ self._request_count = 0
69
+ self._success_count = 0
70
+ self._total_tokens = 0
71
+ self._total_cost = 0.0
72
+
73
+ @abstractmethod
74
+ async def execute(
75
+ self,
76
+ prompt: str,
77
+ max_tokens: int = 2000,
78
+ temperature: float = 0.7,
79
+ **kwargs
80
+ ) -> ProviderResponse:
81
+ """
82
+ Execute a prompt with this provider.
83
+
84
+ Args:
85
+ prompt: The prompt to execute
86
+ max_tokens: Maximum tokens in response
87
+ temperature: Sampling temperature
88
+ **kwargs: Provider-specific parameters
89
+
90
+ Returns:
91
+ ProviderResponse with standardized format
92
+ """
93
+ pass
94
+
95
+ @abstractmethod
96
+ def calculate_cost(self, tokens: int) -> float:
97
+ """
98
+ Calculate cost for token usage.
99
+
100
+ Args:
101
+ tokens: Number of tokens used
102
+
103
+ Returns:
104
+ Cost in USD
105
+ """
106
+ pass
107
+
108
+ @abstractmethod
109
+ async def health_check(self) -> ProviderHealth:
110
+ """
111
+ Check if provider is healthy and accessible.
112
+
113
+ Returns:
114
+ ProviderHealth status
115
+ """
116
+ pass
117
+
118
+ async def execute_with_retry(
119
+ self,
120
+ prompt: str,
121
+ max_retries: Optional[int] = None,
122
+ **kwargs
123
+ ) -> ProviderResponse:
124
+ """
125
+ Execute with automatic retry on failure.
126
+
127
+ Implements exponential backoff and circuit breaker pattern.
128
+ """
129
+ max_retries = max_retries or self.config.max_retries
130
+ last_error = None
131
+
132
+ for attempt in range(max_retries + 1):
133
+ try:
134
+ # Check if circuit is open (too many failures)
135
+ if self._is_circuit_open():
136
+ await self._wait_for_circuit_close()
137
+ continue
138
+
139
+ # Execute request
140
+ response = await self.execute(prompt, **kwargs)
141
+
142
+ # Update health on success
143
+ if response.success:
144
+ self._record_success()
145
+ self.health.consecutive_failures = 0
146
+ else:
147
+ self._record_failure(response.error)
148
+ raise Exception(response.error)
149
+
150
+ return response
151
+
152
+ except Exception as e:
153
+ last_error = str(e)
154
+ self._record_failure(last_error)
155
+
156
+ if attempt < max_retries:
157
+ # Exponential backoff
158
+ wait_time = 2 ** attempt
159
+ await asyncio.sleep(wait_time)
160
+ continue
161
+
162
+ # All retries failed
163
+ return ProviderResponse(
164
+ success=False,
165
+ content="",
166
+ tokens_used=0,
167
+ cost=0.0,
168
+ latency_ms=0.0,
169
+ model=self.config.model,
170
+ provider=self.config.name,
171
+ timestamp=datetime.now().isoformat(),
172
+ metadata={},
173
+ error=f"Failed after {max_retries} retries: {last_error}"
174
+ )
175
+
176
+ def get_health(self) -> ProviderHealth:
177
+ """Get current health status"""
178
+ return self.health
179
+
180
+ def get_stats(self) -> Dict[str, Any]:
181
+ """Get provider statistics"""
182
+ success_rate = (
183
+ (self._success_count / self._request_count * 100)
184
+ if self._request_count > 0
185
+ else 0
186
+ )
187
+
188
+ return {
189
+ "provider": self.config.name,
190
+ "model": self.config.model,
191
+ "requests": self._request_count,
192
+ "successes": self._success_count,
193
+ "success_rate": success_rate,
194
+ "total_tokens": self._total_tokens,
195
+ "total_cost": self._total_cost,
196
+ "avg_cost_per_1k_tokens": (
197
+ (self._total_cost / self._total_tokens * 1000)
198
+ if self._total_tokens > 0
199
+ else 0
200
+ ),
201
+ "health_status": self.health.status,
202
+ "uptime": self.health.uptime_percentage
203
+ }
204
+
205
+ def _record_success(self):
206
+ """Record successful request"""
207
+ self._request_count += 1
208
+ self._success_count += 1
209
+ self.health.last_check = datetime.now().isoformat()
210
+
211
+ def _record_failure(self, error: str):
212
+ """Record failed request"""
213
+ self._request_count += 1
214
+ self.health.consecutive_failures += 1
215
+ self.health.last_error = error
216
+ self.health.last_check = datetime.now().isoformat()
217
+
218
+ # Update status based on failures
219
+ if self.health.consecutive_failures >= 5:
220
+ self.health.status = "unhealthy"
221
+ elif self.health.consecutive_failures >= 2:
222
+ self.health.status = "degraded"
223
+
224
+ def _is_circuit_open(self) -> bool:
225
+ """Check if circuit breaker is open (too many failures)"""
226
+ return (
227
+ self.health.consecutive_failures >= 3 and
228
+ self.health.status == "unhealthy"
229
+ )
230
+
231
+ async def _wait_for_circuit_close(self):
232
+ """Wait before retrying when circuit is open"""
233
+ await asyncio.sleep(5) # Wait 5 seconds
234
+
235
+ def __repr__(self) -> str:
236
+ return (
237
+ f"{self.__class__.__name__}("
238
+ f"name={self.config.name}, "
239
+ f"model={self.config.model}, "
240
+ f"status={self.health.status})"
241
+ )