adaptive-memory-multi-model-router 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +146 -66
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/integrations/airtable.js +20 -0
- package/dist/integrations/discord.js +18 -0
- package/dist/integrations/github.js +23 -0
- package/dist/integrations/gmail.js +19 -0
- package/dist/integrations/google-calendar.js +18 -0
- package/dist/integrations/index.js +61 -0
- package/dist/integrations/jira.js +21 -0
- package/dist/integrations/linear.js +19 -0
- package/dist/integrations/notion.js +19 -0
- package/dist/integrations/slack.js +18 -0
- package/dist/integrations/telegram.js +19 -0
- package/dist/providers/registry.js +7 -3
- package/docs/ARCHITECTURAL-IMPROVEMENTS-2025.md +1391 -0
- package/docs/ARCHITECTURAL-IMPROVEMENTS-REVISED-2025.md +1051 -0
- package/docs/CONFIGURATION.md +476 -0
- package/docs/COUNCIL_DECISION.json +308 -0
- package/docs/COUNCIL_SUMMARY.md +265 -0
- package/docs/COUNCIL_V2.2_DECISION.md +416 -0
- package/docs/IMPROVEMENT_ROADMAP.md +515 -0
- package/docs/LLM_COUNCIL_DECISION.md +508 -0
- package/docs/QUICK_START_VISIBILITY.md +782 -0
- package/docs/REDDIT_GAP_ANALYSIS.md +299 -0
- package/docs/RESEARCH_BACKED_IMPROVEMENTS.md +1180 -0
- package/docs/TMLPD_QNA.md +751 -0
- package/docs/TMLPD_V2.1_COMPLETE.md +763 -0
- package/docs/TMLPD_V2.2_RESEARCH_ROADMAP.md +754 -0
- package/docs/V2.2_IMPLEMENTATION_COMPLETE.md +446 -0
- package/docs/V2_IMPLEMENTATION_GUIDE.md +388 -0
- package/docs/VISIBILITY_ADOPTION_PLAN.md +1005 -0
- package/docs/launch-content/LAUNCH_EXECUTION_CHECKLIST.md +421 -0
- package/docs/launch-content/README.md +457 -0
- package/docs/launch-content/assets/cost_comparison_100_tasks.png +0 -0
- package/docs/launch-content/assets/cumulative_savings.png +0 -0
- package/docs/launch-content/assets/parallel_speedup.png +0 -0
- package/docs/launch-content/assets/provider_pricing_comparison.png +0 -0
- package/docs/launch-content/assets/task_breakdown_comparison.png +0 -0
- package/docs/launch-content/generate_charts.py +313 -0
- package/docs/launch-content/hn_show_post.md +139 -0
- package/docs/launch-content/partner_outreach_templates.md +745 -0
- package/docs/launch-content/reddit_posts.md +467 -0
- package/docs/launch-content/twitter_thread.txt +460 -0
- package/examples/QUICKSTART.md +1 -1
- package/openclaw-alexa-bridge/ALL_REMAINING_FIXES_PLAN.md +313 -0
- package/openclaw-alexa-bridge/REMAINING_FIXES_SUMMARY.md +277 -0
- package/openclaw-alexa-bridge/src/alexa_handler_no_tmlpd.js +1234 -0
- package/openclaw-alexa-bridge/test_fixes.js +77 -0
- package/package.json +120 -29
- package/package.json.tmp +0 -0
- package/qna/TMLPD_QNA.md +3 -3
- package/skill/SKILL.md +2 -2
- package/src/__tests__/integration/tmpld_integration.test.py +540 -0
- package/src/agents/skill_enhanced_agent.py +318 -0
- package/src/memory/__init__.py +15 -0
- package/src/memory/agentic_memory.py +353 -0
- package/src/memory/semantic_memory.py +444 -0
- package/src/memory/simple_memory.py +466 -0
- package/src/memory/working_memory.py +447 -0
- package/src/orchestration/__init__.py +52 -0
- package/src/orchestration/execution_engine.py +353 -0
- package/src/orchestration/halo_orchestrator.py +367 -0
- package/src/orchestration/mcts_workflow.py +498 -0
- package/src/orchestration/role_assigner.py +473 -0
- package/src/orchestration/task_planner.py +522 -0
- package/src/providers/__init__.py +67 -0
- package/src/providers/anthropic.py +304 -0
- package/src/providers/base.py +241 -0
- package/src/providers/cerebras.py +373 -0
- package/src/providers/registry.py +476 -0
- package/src/routing/__init__.py +30 -0
- package/src/routing/universal_router.py +621 -0
- package/src/skills/TMLPD-QUICKREF.md +210 -0
- package/src/skills/TMLPD-SETUP-SUMMARY.md +157 -0
- package/src/skills/TMLPD.md +540 -0
- package/src/skills/__tests__/skill_manager.test.ts +328 -0
- package/src/skills/skill_manager.py +385 -0
- package/src/skills/test-tmlpd.sh +108 -0
- package/src/skills/tmlpd-category.yaml +67 -0
- package/src/skills/tmlpd-monitoring.yaml +188 -0
- package/src/skills/tmlpd-phase.yaml +132 -0
- package/src/state/__init__.py +17 -0
- package/src/state/simple_checkpoint.py +508 -0
- package/src/tmlpd_agent.py +464 -0
- package/src/tmpld_v2.py +427 -0
- package/src/workflows/__init__.py +18 -0
- package/src/workflows/advanced_difficulty_classifier.py +377 -0
- package/src/workflows/chaining_executor.py +417 -0
- package/src/workflows/difficulty_integration.py +209 -0
- package/src/workflows/orchestrator.py +469 -0
- package/src/workflows/orchestrator_executor.py +456 -0
- package/src/workflows/parallelization_executor.py +382 -0
- package/src/workflows/router.py +311 -0
- package/test_integration_simple.py +86 -0
- package/test_mcts_workflow.py +150 -0
- package/test_templd_integration.py +262 -0
- package/test_universal_router.py +275 -0
- package/tmlpd-pi-extension/README.md +36 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts +114 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js +285 -0
- package/tmlpd-pi-extension/dist/cache/prefixCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts +58 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js +153 -0
- package/tmlpd-pi-extension/dist/cache/responseCache.js.map +1 -0
- package/tmlpd-pi-extension/dist/cli.js +59 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts +95 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js +240 -0
- package/tmlpd-pi-extension/dist/cost/costTracker.js.map +1 -0
- package/tmlpd-pi-extension/dist/index.d.ts +723 -0
- package/tmlpd-pi-extension/dist/index.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/index.js +239 -0
- package/tmlpd-pi-extension/dist/index.js.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts +82 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js +145 -0
- package/tmlpd-pi-extension/dist/memory/episodicMemory.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts +102 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js +207 -0
- package/tmlpd-pi-extension/dist/orchestration/haloOrchestrator.js.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts +85 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js +210 -0
- package/tmlpd-pi-extension/dist/orchestration/mctsWorkflow.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts +102 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js +338 -0
- package/tmlpd-pi-extension/dist/providers/localProvider.js.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts +55 -0
- package/tmlpd-pi-extension/dist/providers/registry.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/providers/registry.js +138 -0
- package/tmlpd-pi-extension/dist/providers/registry.js.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts +68 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js +332 -0
- package/tmlpd-pi-extension/dist/routing/advancedRouter.js.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts +101 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js +368 -0
- package/tmlpd-pi-extension/dist/tools/tmlpdTools.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts +96 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js +170 -0
- package/tmlpd-pi-extension/dist/utils/batchProcessor.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts +61 -0
- package/tmlpd-pi-extension/dist/utils/compression.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/compression.js +281 -0
- package/tmlpd-pi-extension/dist/utils/compression.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts +74 -0
- package/tmlpd-pi-extension/dist/utils/reliability.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js +177 -0
- package/tmlpd-pi-extension/dist/utils/reliability.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts +117 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js +246 -0
- package/tmlpd-pi-extension/dist/utils/speculativeDecoding.js.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts +50 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.d.ts.map +1 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js +124 -0
- package/tmlpd-pi-extension/dist/utils/tokenUtils.js.map +1 -0
- package/tmlpd-pi-extension/examples/QUICKSTART.md +183 -0
- package/tmlpd-pi-extension/package-lock.json +75 -0
- package/tmlpd-pi-extension/package.json +172 -0
- package/tmlpd-pi-extension/python/examples.py +53 -0
- package/tmlpd-pi-extension/python/integrations.py +330 -0
- package/tmlpd-pi-extension/python/setup.py +28 -0
- package/tmlpd-pi-extension/python/tmlpd.py +369 -0
- package/tmlpd-pi-extension/qna/REDDIT_GAP_ANALYSIS.md +299 -0
- package/tmlpd-pi-extension/qna/TMLPD_QNA.md +751 -0
- package/tmlpd-pi-extension/skill/SKILL.md +238 -0
- package/{src → tmlpd-pi-extension/src}/index.ts +1 -1
- package/tmlpd-pi-extension/tsconfig.json +18 -0
- package/demo/research-demo.js +0 -266
- package/notebooks/quickstart.ipynb +0 -157
- package/rust/tmlpd.h +0 -268
- package/src/cache/prefixCache.ts +0 -365
- package/src/routing/advancedRouter.ts +0 -406
- package/src/utils/speculativeDecoding.ts +0 -344
- /package/{src → tmlpd-pi-extension/src}/cache/responseCache.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/cost/costTracker.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/memory/episodicMemory.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/haloOrchestrator.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/orchestration/mctsWorkflow.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/localProvider.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/providers/registry.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/tools/tmlpdTools.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/batchProcessor.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/compression.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/reliability.ts +0 -0
- /package/{src → tmlpd-pi-extension/src}/utils/tokenUtils.ts +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Anthropic Provider Implementation - Agent 2 Output (Part 1)
|
|
3
|
+
|
|
4
|
+
Implements Anthropic Claude API provider.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import time
|
|
9
|
+
from typing import Dict, Any
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
|
|
12
|
+
from .base import BaseProvider, ProviderConfig, ProviderResponse, ProviderHealth
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AnthropicProvider(BaseProvider):
|
|
16
|
+
"""
|
|
17
|
+
Anthropic Claude API provider.
|
|
18
|
+
|
|
19
|
+
Supports models: claude-sonnet-4, claude-opus-4, claude-haiku-4
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
# Pricing (per 1M tokens as of 2025)
|
|
23
|
+
PRICING = {
|
|
24
|
+
"claude-sonnet-4": {"input": 3.0, "output": 15.0},
|
|
25
|
+
"claude-opus-4": {"input": 15.0, "output": 75.0},
|
|
26
|
+
"claude-haiku-4": {"input": 0.25, "output": 1.25},
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def __init__(self, config: ProviderConfig):
|
|
30
|
+
super().__init__(config)
|
|
31
|
+
self.api_key = os.getenv(config.api_key_env)
|
|
32
|
+
|
|
33
|
+
if not self.api_key:
|
|
34
|
+
raise ValueError(f"API key not found: {config.api_key_env}")
|
|
35
|
+
|
|
36
|
+
async def execute(
|
|
37
|
+
self,
|
|
38
|
+
prompt: str,
|
|
39
|
+
max_tokens: int = 2000,
|
|
40
|
+
temperature: float = 0.7,
|
|
41
|
+
**kwargs
|
|
42
|
+
) -> ProviderResponse:
|
|
43
|
+
"""
|
|
44
|
+
Execute prompt with Anthropic Claude.
|
|
45
|
+
|
|
46
|
+
Note: This is a placeholder implementation.
|
|
47
|
+
In production, use actual Anthropic API:
|
|
48
|
+
```python
|
|
49
|
+
import anthropic
|
|
50
|
+
client = anthropic.Anthropic(api_key=self.api_key)
|
|
51
|
+
message = client.messages.create(
|
|
52
|
+
model=self.config.model,
|
|
53
|
+
max_tokens=max_tokens,
|
|
54
|
+
temperature=temperature,
|
|
55
|
+
messages=[{"role": "user", "content": prompt}]
|
|
56
|
+
)
|
|
57
|
+
```
|
|
58
|
+
"""
|
|
59
|
+
start_time = time.time()
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
# Placeholder: Simulate API call
|
|
63
|
+
# In production, use actual Anthropic SDK
|
|
64
|
+
response_content = await self._mock_api_call(prompt, max_tokens)
|
|
65
|
+
|
|
66
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
67
|
+
|
|
68
|
+
# Estimate tokens (rough approximation: 1 token ≈ 4 chars)
|
|
69
|
+
estimated_tokens = len(prompt) // 4 + len(response_content) // 4
|
|
70
|
+
|
|
71
|
+
response = ProviderResponse(
|
|
72
|
+
success=True,
|
|
73
|
+
content=response_content,
|
|
74
|
+
tokens_used=estimated_tokens,
|
|
75
|
+
cost=self.calculate_cost(estimated_tokens),
|
|
76
|
+
latency_ms=latency_ms,
|
|
77
|
+
model=self.config.model,
|
|
78
|
+
provider=self.config.name,
|
|
79
|
+
timestamp=datetime.now().isoformat(),
|
|
80
|
+
metadata={
|
|
81
|
+
"temperature": temperature,
|
|
82
|
+
"max_tokens": max_tokens
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return response
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
90
|
+
|
|
91
|
+
return ProviderResponse(
|
|
92
|
+
success=False,
|
|
93
|
+
content="",
|
|
94
|
+
tokens_used=0,
|
|
95
|
+
cost=0.0,
|
|
96
|
+
latency_ms=latency_ms,
|
|
97
|
+
model=self.config.model,
|
|
98
|
+
provider=self.config.name,
|
|
99
|
+
timestamp=datetime.now().isoformat(),
|
|
100
|
+
metadata={},
|
|
101
|
+
error=str(e)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
|
|
105
|
+
"""Mock API call for development (replace with real API)"""
|
|
106
|
+
# Simulate network delay
|
|
107
|
+
import asyncio
|
|
108
|
+
await asyncio.sleep(0.5)
|
|
109
|
+
|
|
110
|
+
return f"[Anthropic {self.config.model}] Response to: {prompt[:100]}..."
|
|
111
|
+
|
|
112
|
+
def calculate_cost(self, tokens: int) -> float:
|
|
113
|
+
"""
|
|
114
|
+
Calculate cost in USD.
|
|
115
|
+
|
|
116
|
+
Anthropic charges separately for input and output tokens.
|
|
117
|
+
We estimate 50/50 split for simplicity.
|
|
118
|
+
"""
|
|
119
|
+
pricing = self.PRICING.get(self.config.model, {"input": 3.0, "output": 15.0})
|
|
120
|
+
|
|
121
|
+
# Assume 50/50 input/output split
|
|
122
|
+
input_cost = (tokens / 2) * pricing["input"] / 1_000_000
|
|
123
|
+
output_cost = (tokens / 2) * pricing["output"] / 1_000_000
|
|
124
|
+
|
|
125
|
+
return input_cost + output_cost
|
|
126
|
+
|
|
127
|
+
async def health_check(self) -> ProviderHealth:
|
|
128
|
+
"""
|
|
129
|
+
Check Anthropic API health.
|
|
130
|
+
|
|
131
|
+
In production, make actual API call:
|
|
132
|
+
```python
|
|
133
|
+
try:
|
|
134
|
+
client = anthropic.Anthropic(api_key=self.api_key)
|
|
135
|
+
# Simple ping
|
|
136
|
+
start = time.time()
|
|
137
|
+
# Make minimal request
|
|
138
|
+
latency = (time.time() - start) * 1000
|
|
139
|
+
return ProviderHealth(status="healthy", latency_ms=latency, ...)
|
|
140
|
+
except Exception:
|
|
141
|
+
return ProviderHealth(status="unhealthy", ...)
|
|
142
|
+
```
|
|
143
|
+
"""
|
|
144
|
+
import asyncio
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
start = time.time()
|
|
148
|
+
|
|
149
|
+
# Mock health check (replace with real API ping)
|
|
150
|
+
await asyncio.sleep(0.2)
|
|
151
|
+
|
|
152
|
+
latency_ms = (time.time() - start) * 1000
|
|
153
|
+
|
|
154
|
+
self.health = ProviderHealth(
|
|
155
|
+
status="healthy",
|
|
156
|
+
last_check=datetime.now().isoformat(),
|
|
157
|
+
consecutive_failures=0,
|
|
158
|
+
latency_ms=latency_ms,
|
|
159
|
+
uptime_percentage=100.0
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return self.health
|
|
163
|
+
|
|
164
|
+
except Exception as e:
|
|
165
|
+
self.health = ProviderHealth(
|
|
166
|
+
status="unhealthy",
|
|
167
|
+
last_check=datetime.now().isoformat(),
|
|
168
|
+
consecutive_failures=self.health.consecutive_failures + 1,
|
|
169
|
+
last_error=str(e)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return self.health
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class OpenAIProvider(BaseProvider):
|
|
176
|
+
"""
|
|
177
|
+
OpenAI GPT API provider.
|
|
178
|
+
|
|
179
|
+
Supports models: gpt-4o, gpt-4-turbo, gpt-3.5-turbo
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
# Pricing (per 1M tokens as of 2025)
|
|
183
|
+
PRICING = {
|
|
184
|
+
"gpt-4o": {"input": 2.50, "output": 10.0},
|
|
185
|
+
"gpt-4-turbo": {"input": 10.0, "output": 30.0},
|
|
186
|
+
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
def __init__(self, config: ProviderConfig):
|
|
190
|
+
super().__init__(config)
|
|
191
|
+
self.api_key = os.getenv(config.api_key_env)
|
|
192
|
+
|
|
193
|
+
if not self.api_key:
|
|
194
|
+
raise ValueError(f"API key not found: {config.api_key_env}")
|
|
195
|
+
|
|
196
|
+
async def execute(
|
|
197
|
+
self,
|
|
198
|
+
prompt: str,
|
|
199
|
+
max_tokens: int = 2000,
|
|
200
|
+
temperature: float = 0.7,
|
|
201
|
+
**kwargs
|
|
202
|
+
) -> ProviderResponse:
|
|
203
|
+
"""
|
|
204
|
+
Execute prompt with OpenAI GPT.
|
|
205
|
+
|
|
206
|
+
Note: This is a placeholder implementation.
|
|
207
|
+
In production, use actual OpenAI API:
|
|
208
|
+
```python
|
|
209
|
+
from openai import AsyncOpenAI
|
|
210
|
+
client = AsyncOpenAI(api_key=self.api_key)
|
|
211
|
+
response = await client.chat.completions.create(
|
|
212
|
+
model=self.config.model,
|
|
213
|
+
messages=[{"role": "user", "content": prompt}],
|
|
214
|
+
max_tokens=max_tokens,
|
|
215
|
+
temperature=temperature
|
|
216
|
+
)
|
|
217
|
+
```
|
|
218
|
+
"""
|
|
219
|
+
start_time = time.time()
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
# Placeholder: Simulate API call
|
|
223
|
+
response_content = await self._mock_api_call(prompt, max_tokens)
|
|
224
|
+
|
|
225
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
226
|
+
estimated_tokens = len(prompt) // 4 + len(response_content) // 4
|
|
227
|
+
|
|
228
|
+
response = ProviderResponse(
|
|
229
|
+
success=True,
|
|
230
|
+
content=response_content,
|
|
231
|
+
tokens_used=estimated_tokens,
|
|
232
|
+
cost=self.calculate_cost(estimated_tokens),
|
|
233
|
+
latency_ms=latency_ms,
|
|
234
|
+
model=self.config.model,
|
|
235
|
+
provider=self.config.name,
|
|
236
|
+
timestamp=datetime.now().isoformat(),
|
|
237
|
+
metadata={
|
|
238
|
+
"temperature": temperature,
|
|
239
|
+
"max_tokens": max_tokens
|
|
240
|
+
}
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return response
|
|
244
|
+
|
|
245
|
+
except Exception as e:
|
|
246
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
247
|
+
|
|
248
|
+
return ProviderResponse(
|
|
249
|
+
success=False,
|
|
250
|
+
content="",
|
|
251
|
+
tokens_used=0,
|
|
252
|
+
cost=0.0,
|
|
253
|
+
latency_ms=latency_ms,
|
|
254
|
+
model=self.config.model,
|
|
255
|
+
provider=self.config.name,
|
|
256
|
+
timestamp=datetime.now().isoformat(),
|
|
257
|
+
metadata={},
|
|
258
|
+
error=str(e)
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
async def _mock_api_call(self, prompt: str, max_tokens: int) -> str:
|
|
262
|
+
"""Mock API call for development"""
|
|
263
|
+
import asyncio
|
|
264
|
+
await asyncio.sleep(0.4)
|
|
265
|
+
|
|
266
|
+
return f"[OpenAI {self.config.model}] Response to: {prompt[:100]}..."
|
|
267
|
+
|
|
268
|
+
def calculate_cost(self, tokens: int) -> float:
|
|
269
|
+
"""Calculate cost in USD"""
|
|
270
|
+
pricing = self.PRICING.get(self.config.model, {"input": 2.50, "output": 10.0})
|
|
271
|
+
|
|
272
|
+
input_cost = (tokens / 2) * pricing["input"] / 1_000_000
|
|
273
|
+
output_cost = (tokens / 2) * pricing["output"] / 1_000_000
|
|
274
|
+
|
|
275
|
+
return input_cost + output_cost
|
|
276
|
+
|
|
277
|
+
async def health_check(self) -> ProviderHealth:
|
|
278
|
+
"""Check OpenAI API health"""
|
|
279
|
+
import asyncio
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
start = time.time()
|
|
283
|
+
await asyncio.sleep(0.2)
|
|
284
|
+
latency_ms = (time.time() - start) * 1000
|
|
285
|
+
|
|
286
|
+
self.health = ProviderHealth(
|
|
287
|
+
status="healthy",
|
|
288
|
+
last_check=datetime.now().isoformat(),
|
|
289
|
+
consecutive_failures=0,
|
|
290
|
+
latency_ms=latency_ms,
|
|
291
|
+
uptime_percentage=100.0
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return self.health
|
|
295
|
+
|
|
296
|
+
except Exception as e:
|
|
297
|
+
self.health = ProviderHealth(
|
|
298
|
+
status="unhealthy",
|
|
299
|
+
last_check=datetime.now().isoformat(),
|
|
300
|
+
consecutive_failures=self.health.consecutive_failures + 1,
|
|
301
|
+
last_error=str(e)
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
return self.health
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base Provider Interface - Agent 1 Output
|
|
3
|
+
|
|
4
|
+
Abstract base class for all LLM providers.
|
|
5
|
+
Provides unified interface for multi-provider system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Dict, Any, Optional
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
import asyncio
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ProviderConfig:
|
|
17
|
+
"""Configuration for a provider"""
|
|
18
|
+
name: str
|
|
19
|
+
model: str
|
|
20
|
+
api_key_env: str
|
|
21
|
+
priority: int = 1
|
|
22
|
+
max_retries: int = 3
|
|
23
|
+
timeout: int = 30
|
|
24
|
+
enabled: bool = True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ProviderHealth:
|
|
29
|
+
"""Health status of a provider"""
|
|
30
|
+
status: str # "healthy", "degraded", "unhealthy"
|
|
31
|
+
last_check: str
|
|
32
|
+
consecutive_failures: int
|
|
33
|
+
last_error: Optional[str] = None
|
|
34
|
+
latency_ms: Optional[float] = None
|
|
35
|
+
uptime_percentage: float = 100.0
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ProviderResponse:
|
|
40
|
+
"""Standardized response from any provider"""
|
|
41
|
+
success: bool
|
|
42
|
+
content: str
|
|
43
|
+
tokens_used: int
|
|
44
|
+
cost: float
|
|
45
|
+
latency_ms: float
|
|
46
|
+
model: str
|
|
47
|
+
provider: str
|
|
48
|
+
timestamp: str
|
|
49
|
+
metadata: Dict[str, Any]
|
|
50
|
+
error: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BaseProvider(ABC):
|
|
54
|
+
"""
|
|
55
|
+
Abstract base class for all LLM providers.
|
|
56
|
+
|
|
57
|
+
All providers must implement these methods to ensure
|
|
58
|
+
consistent interface across the multi-provider system.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, config: ProviderConfig):
|
|
62
|
+
self.config = config
|
|
63
|
+
self.health = ProviderHealth(
|
|
64
|
+
status="healthy",
|
|
65
|
+
last_check=datetime.now().isoformat(),
|
|
66
|
+
consecutive_failures=0
|
|
67
|
+
)
|
|
68
|
+
self._request_count = 0
|
|
69
|
+
self._success_count = 0
|
|
70
|
+
self._total_tokens = 0
|
|
71
|
+
self._total_cost = 0.0
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
async def execute(
|
|
75
|
+
self,
|
|
76
|
+
prompt: str,
|
|
77
|
+
max_tokens: int = 2000,
|
|
78
|
+
temperature: float = 0.7,
|
|
79
|
+
**kwargs
|
|
80
|
+
) -> ProviderResponse:
|
|
81
|
+
"""
|
|
82
|
+
Execute a prompt with this provider.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
prompt: The prompt to execute
|
|
86
|
+
max_tokens: Maximum tokens in response
|
|
87
|
+
temperature: Sampling temperature
|
|
88
|
+
**kwargs: Provider-specific parameters
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
ProviderResponse with standardized format
|
|
92
|
+
"""
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def calculate_cost(self, tokens: int) -> float:
|
|
97
|
+
"""
|
|
98
|
+
Calculate cost for token usage.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
tokens: Number of tokens used
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Cost in USD
|
|
105
|
+
"""
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
async def health_check(self) -> ProviderHealth:
|
|
110
|
+
"""
|
|
111
|
+
Check if provider is healthy and accessible.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
ProviderHealth status
|
|
115
|
+
"""
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
async def execute_with_retry(
|
|
119
|
+
self,
|
|
120
|
+
prompt: str,
|
|
121
|
+
max_retries: Optional[int] = None,
|
|
122
|
+
**kwargs
|
|
123
|
+
) -> ProviderResponse:
|
|
124
|
+
"""
|
|
125
|
+
Execute with automatic retry on failure.
|
|
126
|
+
|
|
127
|
+
Implements exponential backoff and circuit breaker pattern.
|
|
128
|
+
"""
|
|
129
|
+
max_retries = max_retries or self.config.max_retries
|
|
130
|
+
last_error = None
|
|
131
|
+
|
|
132
|
+
for attempt in range(max_retries + 1):
|
|
133
|
+
try:
|
|
134
|
+
# Check if circuit is open (too many failures)
|
|
135
|
+
if self._is_circuit_open():
|
|
136
|
+
await self._wait_for_circuit_close()
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# Execute request
|
|
140
|
+
response = await self.execute(prompt, **kwargs)
|
|
141
|
+
|
|
142
|
+
# Update health on success
|
|
143
|
+
if response.success:
|
|
144
|
+
self._record_success()
|
|
145
|
+
self.health.consecutive_failures = 0
|
|
146
|
+
else:
|
|
147
|
+
self._record_failure(response.error)
|
|
148
|
+
raise Exception(response.error)
|
|
149
|
+
|
|
150
|
+
return response
|
|
151
|
+
|
|
152
|
+
except Exception as e:
|
|
153
|
+
last_error = str(e)
|
|
154
|
+
self._record_failure(last_error)
|
|
155
|
+
|
|
156
|
+
if attempt < max_retries:
|
|
157
|
+
# Exponential backoff
|
|
158
|
+
wait_time = 2 ** attempt
|
|
159
|
+
await asyncio.sleep(wait_time)
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# All retries failed
|
|
163
|
+
return ProviderResponse(
|
|
164
|
+
success=False,
|
|
165
|
+
content="",
|
|
166
|
+
tokens_used=0,
|
|
167
|
+
cost=0.0,
|
|
168
|
+
latency_ms=0.0,
|
|
169
|
+
model=self.config.model,
|
|
170
|
+
provider=self.config.name,
|
|
171
|
+
timestamp=datetime.now().isoformat(),
|
|
172
|
+
metadata={},
|
|
173
|
+
error=f"Failed after {max_retries} retries: {last_error}"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def get_health(self) -> ProviderHealth:
|
|
177
|
+
"""Get current health status"""
|
|
178
|
+
return self.health
|
|
179
|
+
|
|
180
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
181
|
+
"""Get provider statistics"""
|
|
182
|
+
success_rate = (
|
|
183
|
+
(self._success_count / self._request_count * 100)
|
|
184
|
+
if self._request_count > 0
|
|
185
|
+
else 0
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return {
|
|
189
|
+
"provider": self.config.name,
|
|
190
|
+
"model": self.config.model,
|
|
191
|
+
"requests": self._request_count,
|
|
192
|
+
"successes": self._success_count,
|
|
193
|
+
"success_rate": success_rate,
|
|
194
|
+
"total_tokens": self._total_tokens,
|
|
195
|
+
"total_cost": self._total_cost,
|
|
196
|
+
"avg_cost_per_1k_tokens": (
|
|
197
|
+
(self._total_cost / self._total_tokens * 1000)
|
|
198
|
+
if self._total_tokens > 0
|
|
199
|
+
else 0
|
|
200
|
+
),
|
|
201
|
+
"health_status": self.health.status,
|
|
202
|
+
"uptime": self.health.uptime_percentage
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
def _record_success(self):
|
|
206
|
+
"""Record successful request"""
|
|
207
|
+
self._request_count += 1
|
|
208
|
+
self._success_count += 1
|
|
209
|
+
self.health.last_check = datetime.now().isoformat()
|
|
210
|
+
|
|
211
|
+
def _record_failure(self, error: str):
|
|
212
|
+
"""Record failed request"""
|
|
213
|
+
self._request_count += 1
|
|
214
|
+
self.health.consecutive_failures += 1
|
|
215
|
+
self.health.last_error = error
|
|
216
|
+
self.health.last_check = datetime.now().isoformat()
|
|
217
|
+
|
|
218
|
+
# Update status based on failures
|
|
219
|
+
if self.health.consecutive_failures >= 5:
|
|
220
|
+
self.health.status = "unhealthy"
|
|
221
|
+
elif self.health.consecutive_failures >= 2:
|
|
222
|
+
self.health.status = "degraded"
|
|
223
|
+
|
|
224
|
+
def _is_circuit_open(self) -> bool:
|
|
225
|
+
"""Check if circuit breaker is open (too many failures)"""
|
|
226
|
+
return (
|
|
227
|
+
self.health.consecutive_failures >= 3 and
|
|
228
|
+
self.health.status == "unhealthy"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
async def _wait_for_circuit_close(self):
|
|
232
|
+
"""Wait before retrying when circuit is open"""
|
|
233
|
+
await asyncio.sleep(5) # Wait 5 seconds
|
|
234
|
+
|
|
235
|
+
def __repr__(self) -> str:
|
|
236
|
+
return (
|
|
237
|
+
f"{self.__class__.__name__}("
|
|
238
|
+
f"name={self.config.name}, "
|
|
239
|
+
f"model={self.config.model}, "
|
|
240
|
+
f"status={self.health.status})"
|
|
241
|
+
)
|