llmcycle 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmcycle/__init__.py +25 -0
- llmcycle/cli.py +17 -0
- llmcycle/client.py +73 -0
- llmcycle/core/errors.py +71 -0
- llmcycle/core/keys.py +184 -0
- llmcycle/core/router.py +37 -0
- llmcycle/core/stream.py +79 -0
- llmcycle/providers/base.py +21 -0
- llmcycle/providers/openai_compatible.py +56 -0
- llmcycle/providers/registry.py +105 -0
- llmcycle/schema.py +47 -0
- llmcycle/ui/__init__.py +1 -0
- llmcycle/ui/app.py +88 -0
- llmcycle/ui/templates/dashboard.html +378 -0
- llmcycle-0.1.0.dist-info/METADATA +100 -0
- llmcycle-0.1.0.dist-info/RECORD +18 -0
- llmcycle-0.1.0.dist-info/WHEEL +4 -0
- llmcycle-0.1.0.dist-info/entry_points.txt +2 -0
llmcycle/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLMCycle - Production-Grade Universal LLM Router
|
|
3
|
+
=================================================
|
|
4
|
+
Auto multi-key rotation, intelligent error handling, streaming resilience,
|
|
5
|
+
and support for 50+ providers out-of-the-box.
|
|
6
|
+
"""
|
|
7
|
+
from .client import LLMCycle
|
|
8
|
+
from .schema import CompletionRequest, Message, CompletionResponse, StreamChunk
|
|
9
|
+
from .core.keys import KeyManager, KeyStatus
|
|
10
|
+
from .core.router import ModelRouter, RoutingStrategy
|
|
11
|
+
from .core.errors import (
|
|
12
|
+
LLMCycleError, RateLimitError, AuthenticationError,
|
|
13
|
+
ProviderError, AllProvidersFailedError, StreamInterruptedError,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"LLMCycle",
|
|
18
|
+
"CompletionRequest", "Message", "CompletionResponse", "StreamChunk",
|
|
19
|
+
"KeyManager", "KeyStatus",
|
|
20
|
+
"ModelRouter", "RoutingStrategy",
|
|
21
|
+
"LLMCycleError", "RateLimitError", "AuthenticationError",
|
|
22
|
+
"ProviderError", "AllProvidersFailedError", "StreamInterruptedError",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
__version__ = "0.2.0"
|
llmcycle/cli.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import uvicorn
|
|
3
|
+
|
|
4
|
+
def main():
|
|
5
|
+
parser = argparse.ArgumentParser(description="LLMCycle CLI")
|
|
6
|
+
parser.add_argument("command", choices=["ui"], help="Command to run")
|
|
7
|
+
parser.add_argument("--host", default="127.0.0.1", help="Host for the UI")
|
|
8
|
+
parser.add_argument("--port", type=int, default=8000, help="Port for the UI")
|
|
9
|
+
|
|
10
|
+
args = parser.parse_args()
|
|
11
|
+
|
|
12
|
+
if args.command == "ui":
|
|
13
|
+
print(f"Starting LLMCycle Dashboard on http://{args.host}:{args.port}")
|
|
14
|
+
uvicorn.run("llmcycle.ui.app:app", host=args.host, port=args.port, reload=True)
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
main()
|
llmcycle/client.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
from llmcycle.core.keys import KeyManager
|
|
4
|
+
from llmcycle.core.router import ModelRouter, FallbackRouter
|
|
5
|
+
from llmcycle.core.stream import StreamResilienceManager
|
|
6
|
+
from llmcycle.providers.openai_compatible import OpenAICompatibleProvider
|
|
7
|
+
|
|
8
|
+
# Massive default registry
|
|
9
|
+
PROVIDER_BASE_URLS = {
|
|
10
|
+
"OPENAI": "https://api.openai.com/v1",
|
|
11
|
+
"DEEPSEEK": "https://api.deepseek.com/v1",
|
|
12
|
+
"ANTHROPIC": "https://api.anthropic.com/v1",
|
|
13
|
+
"TOGETHER": "https://api.together.xyz/v1",
|
|
14
|
+
"GROQ": "https://api.groq.com/openai/v1",
|
|
15
|
+
"MISTRAL": "https://api.mistral.ai/v1",
|
|
16
|
+
"PERPLEXITY": "https://api.perplexity.ai",
|
|
17
|
+
"ANYSCALE": "https://api.endpoints.anyscale.com/v1",
|
|
18
|
+
"FIREWORKS": "https://api.fireworks.ai/inference/v1",
|
|
19
|
+
"COHERE": "https://api.cohere.com/v1",
|
|
20
|
+
"DATABRICKS": "https://serving.api.databricks.com/serving-endpoints",
|
|
21
|
+
"HUGGINGFACE": "https://api-inference.huggingface.co/models",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
class LLMCycle:
|
|
25
|
+
"""Main entrypoint for LLMCycle with Universal Provider Support."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, env_path: str = ".env", custom_fallbacks: dict = None):
|
|
28
|
+
load_dotenv(env_path)
|
|
29
|
+
|
|
30
|
+
self.key_manager = KeyManager()
|
|
31
|
+
self.providers = {}
|
|
32
|
+
|
|
33
|
+
# Auto-discover
|
|
34
|
+
self._auto_load_keys()
|
|
35
|
+
|
|
36
|
+
# Setup Routing Strategy
|
|
37
|
+
fallbacks = custom_fallbacks or {}
|
|
38
|
+
self.router = ModelRouter(FallbackRouter(fallbacks))
|
|
39
|
+
self.stream_manager = StreamResilienceManager(self.router, self.key_manager, self.providers)
|
|
40
|
+
|
|
41
|
+
def _auto_load_keys(self):
|
|
42
|
+
"""Finds any env var ending with _API_KEYS and universally registers the provider."""
|
|
43
|
+
for key, val in os.environ.items():
|
|
44
|
+
if key.endswith("_API_KEYS"):
|
|
45
|
+
provider_name = key.replace("_API_KEYS", "").upper()
|
|
46
|
+
keys = [k.strip() for k in val.split(",") if k.strip()]
|
|
47
|
+
|
|
48
|
+
if not keys:
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
# 1. Check if user explicitly defined a BASE URL for this provider
|
|
52
|
+
# 2. Check the massive default registry
|
|
53
|
+
# 3. Fallback: Assume a standard OpenAI compatible format
|
|
54
|
+
base_url = os.environ.get(f"{provider_name}_BASE_URL")
|
|
55
|
+
if not base_url:
|
|
56
|
+
base_url = PROVIDER_BASE_URLS.get(provider_name, f"https://api.{provider_name.lower()}.com/v1")
|
|
57
|
+
|
|
58
|
+
if base_url:
|
|
59
|
+
self.providers[provider_name.lower()] = OpenAICompatibleProvider(base_url)
|
|
60
|
+
for k in keys:
|
|
61
|
+
self.key_manager.add_key(provider_name.lower(), k)
|
|
62
|
+
|
|
63
|
+
def get_available_providers(self) -> list[str]:
|
|
64
|
+
return list(self.providers.keys())
|
|
65
|
+
|
|
66
|
+
async def get_provider_models(self, provider_name: str) -> list[str]:
|
|
67
|
+
p_name = provider_name.lower()
|
|
68
|
+
if p_name not in self.providers:
|
|
69
|
+
return []
|
|
70
|
+
key = self.key_manager.get_next_key(p_name)
|
|
71
|
+
if not key:
|
|
72
|
+
return []
|
|
73
|
+
return await self.providers[p_name].get_models(key)
|
llmcycle/core/errors.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for LLMCycle.
|
|
3
|
+
All errors map from HTTP status codes so the router knows exactly
|
|
4
|
+
what to do: retry, rotate key, skip provider, or give up.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
class LLMCycleError(Exception):
|
|
8
|
+
"""Base error for all LLMCycle exceptions."""
|
|
9
|
+
def __init__(self, message: str, provider: str = "", model: str = "", status_code: int = 0):
|
|
10
|
+
super().__init__(message)
|
|
11
|
+
self.provider = provider
|
|
12
|
+
self.model = model
|
|
13
|
+
self.status_code = status_code
|
|
14
|
+
|
|
15
|
+
class RateLimitError(LLMCycleError):
|
|
16
|
+
"""429: Rate limit exceeded. Rotate key and retry."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
class AuthenticationError(LLMCycleError):
|
|
20
|
+
"""401: Invalid API key. Disable key permanently."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
class ProviderError(LLMCycleError):
|
|
24
|
+
"""400/500+: Provider-side error. Try next provider."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
class QuotaExceededError(LLMCycleError):
|
|
28
|
+
"""402/429 with quota message: Key quota exhausted. Rotate key."""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
class ContentPolicyError(LLMCycleError):
|
|
32
|
+
"""400: Content policy violation. Do NOT retry - fail fast."""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
class StreamInterruptedError(LLMCycleError):
|
|
36
|
+
"""Mid-stream disconnection. Contains partial text generated so far."""
|
|
37
|
+
def __init__(self, message: str, partial_text: str = "", **kwargs):
|
|
38
|
+
super().__init__(message, **kwargs)
|
|
39
|
+
self.partial_text = partial_text
|
|
40
|
+
|
|
41
|
+
class AllProvidersFailedError(LLMCycleError):
|
|
42
|
+
"""Raised when every provider in the fallback chain fails."""
|
|
43
|
+
def __init__(self, errors: list):
|
|
44
|
+
msg = f"All {len(errors)} providers failed. Last error: {errors[-1]}"
|
|
45
|
+
super().__init__(msg)
|
|
46
|
+
self.errors = errors
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Maps HTTP status codes to exception types
|
|
50
|
+
def classify_http_error(status_code: int, response_text: str, provider: str, model: str) -> LLMCycleError:
|
|
51
|
+
"""Factory: Convert an HTTP error into the correct LLMCycle exception."""
|
|
52
|
+
text_lower = response_text.lower()
|
|
53
|
+
|
|
54
|
+
if status_code == 401:
|
|
55
|
+
return AuthenticationError(f"[{provider}] Auth failed (401): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
56
|
+
|
|
57
|
+
if status_code == 429:
|
|
58
|
+
if any(w in text_lower for w in ("quota", "limit exceeded", "exhausted", "billing")):
|
|
59
|
+
return QuotaExceededError(f"[{provider}] Quota exceeded (429): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
60
|
+
return RateLimitError(f"[{provider}] Rate limited (429): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
61
|
+
|
|
62
|
+
if status_code == 402:
|
|
63
|
+
return QuotaExceededError(f"[{provider}] Payment required (402): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
64
|
+
|
|
65
|
+
if status_code == 400:
|
|
66
|
+
if any(w in text_lower for w in ("content_policy", "safety", "harmful", "violat")):
|
|
67
|
+
return ContentPolicyError(f"[{provider}] Content policy (400): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
68
|
+
return ProviderError(f"[{provider}] Bad request (400): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
69
|
+
|
|
70
|
+
# All other 4xx/5xx
|
|
71
|
+
return ProviderError(f"[{provider}] HTTP {status_code}: {response_text[:200]}", provider=provider, model=model, status_code=status_code)
|
llmcycle/core/keys.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KeyManager - Production Multi-Key Rotation
|
|
3
|
+
==========================================
|
|
4
|
+
Supports:
|
|
5
|
+
- Round-robin across multiple keys
|
|
6
|
+
- Rate-limit cooldown per key (auto re-enable after cooldown)
|
|
7
|
+
- Permanent disable on auth failure (401)
|
|
8
|
+
- Per-key error tracking with thresholds
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
import time
|
|
12
|
+
import threading
|
|
13
|
+
import hashlib
|
|
14
|
+
import logging
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Optional, Dict, List
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
RATE_LIMIT_COOLDOWN_SECS = 60
|
|
22
|
+
QUOTA_COOLDOWN_SECS = 3600
|
|
23
|
+
|
|
24
|
+
class KeyStatus(Enum):
|
|
25
|
+
ACTIVE = "active"
|
|
26
|
+
RATE_LIMITED = "rate_limited" # 429 – temporary, retry after cooldown
|
|
27
|
+
QUOTA_EXHAUSTED = "quota_exhausted" # 402/429 quota – long cooldown
|
|
28
|
+
INVALID = "invalid" # 401 – permanent disable
|
|
29
|
+
DISABLED = "disabled" # manual disable
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class KeyRecord:
|
|
33
|
+
key: str
|
|
34
|
+
provider: str
|
|
35
|
+
status: KeyStatus = KeyStatus.ACTIVE
|
|
36
|
+
total_requests: int = 0
|
|
37
|
+
total_errors: int = 0
|
|
38
|
+
consecutive_errors: int = 0
|
|
39
|
+
rate_limit_until: float = 0.0
|
|
40
|
+
added_at: float = field(default_factory=time.time)
|
|
41
|
+
last_used: float = 0.0
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def key_hint(self) -> str:
|
|
45
|
+
"""Safe display: show first 6 and last 4 chars."""
|
|
46
|
+
if len(self.key) <= 10:
|
|
47
|
+
return "***"
|
|
48
|
+
return f"{self.key[:6]}...{self.key[-4:]}"
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def is_usable(self) -> bool:
|
|
52
|
+
if self.status == KeyStatus.ACTIVE:
|
|
53
|
+
return True
|
|
54
|
+
if self.status in (KeyStatus.RATE_LIMITED, KeyStatus.QUOTA_EXHAUSTED):
|
|
55
|
+
return time.time() >= self.rate_limit_until
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class KeyManager:
|
|
60
|
+
"""Thread-safe multi-key manager with auto rotation and error classification."""
|
|
61
|
+
|
|
62
|
+
def __init__(self):
|
|
63
|
+
self._lock = threading.Lock()
|
|
64
|
+
# provider → list of KeyRecord (in insertion order for round-robin)
|
|
65
|
+
self._keys: Dict[str, List[KeyRecord]] = {}
|
|
66
|
+
# Round-robin pointer per provider
|
|
67
|
+
self._indexes: Dict[str, int] = {}
|
|
68
|
+
|
|
69
|
+
def add_key(self, provider: str, key: str) -> None:
|
|
70
|
+
"""Register one API key for a provider."""
|
|
71
|
+
p = provider.lower()
|
|
72
|
+
with self._lock:
|
|
73
|
+
if p not in self._keys:
|
|
74
|
+
self._keys[p] = []
|
|
75
|
+
self._indexes[p] = 0
|
|
76
|
+
# Don't add duplicates
|
|
77
|
+
existing = {r.key for r in self._keys[p]}
|
|
78
|
+
if key not in existing:
|
|
79
|
+
self._keys[p].append(KeyRecord(key=key, provider=p))
|
|
80
|
+
logger.debug(f"[{p}] Registered key {KeyRecord(key=key, provider=p).key_hint}")
|
|
81
|
+
|
|
82
|
+
def add_keys(self, provider: str, keys: List[str]) -> None:
|
|
83
|
+
for k in keys:
|
|
84
|
+
self.add_key(provider, k)
|
|
85
|
+
|
|
86
|
+
def get_next_key(self, provider: str) -> Optional[str]:
|
|
87
|
+
"""Round-robin rotation. Auto-recovers rate-limited keys after cooldown."""
|
|
88
|
+
p = provider.lower()
|
|
89
|
+
with self._lock:
|
|
90
|
+
records = self._keys.get(p, [])
|
|
91
|
+
if not records:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
n = len(records)
|
|
95
|
+
start = self._indexes[p]
|
|
96
|
+
|
|
97
|
+
for i in range(n):
|
|
98
|
+
idx = (start + i) % n
|
|
99
|
+
rec = records[idx]
|
|
100
|
+
|
|
101
|
+
# Auto-recover temporarily banned keys
|
|
102
|
+
if rec.status in (KeyStatus.RATE_LIMITED, KeyStatus.QUOTA_EXHAUSTED):
|
|
103
|
+
if time.time() >= rec.rate_limit_until:
|
|
104
|
+
rec.status = KeyStatus.ACTIVE
|
|
105
|
+
rec.consecutive_errors = 0
|
|
106
|
+
logger.info(f"[{p}] Key {rec.key_hint} auto-recovered from {rec.status.value}")
|
|
107
|
+
|
|
108
|
+
if rec.is_usable:
|
|
109
|
+
self._indexes[p] = (idx + 1) % n
|
|
110
|
+
rec.last_used = time.time()
|
|
111
|
+
rec.total_requests += 1
|
|
112
|
+
return rec.key
|
|
113
|
+
|
|
114
|
+
logger.warning(f"[{p}] No usable keys available!")
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def report_success(self, provider: str, key: str) -> None:
|
|
118
|
+
rec = self._find(provider, key)
|
|
119
|
+
if rec:
|
|
120
|
+
rec.consecutive_errors = 0
|
|
121
|
+
|
|
122
|
+
def report_error(self, provider: str, key: str, error_type: str) -> None:
|
|
123
|
+
"""
|
|
124
|
+
error_type: "rate_limit" | "quota" | "auth" | "server" | "connection"
|
|
125
|
+
"""
|
|
126
|
+
rec = self._find(provider, key)
|
|
127
|
+
if not rec:
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
rec.total_errors += 1
|
|
131
|
+
rec.consecutive_errors += 1
|
|
132
|
+
|
|
133
|
+
if error_type == "auth":
|
|
134
|
+
rec.status = KeyStatus.INVALID
|
|
135
|
+
logger.warning(f"[{provider}] Key {rec.key_hint} permanently disabled (401 Auth)")
|
|
136
|
+
|
|
137
|
+
elif error_type == "quota":
|
|
138
|
+
rec.status = KeyStatus.QUOTA_EXHAUSTED
|
|
139
|
+
rec.rate_limit_until = time.time() + QUOTA_COOLDOWN_SECS
|
|
140
|
+
logger.warning(f"[{provider}] Key {rec.key_hint} quota exhausted. Retry after {QUOTA_COOLDOWN_SECS}s")
|
|
141
|
+
|
|
142
|
+
elif error_type == "rate_limit":
|
|
143
|
+
rec.status = KeyStatus.RATE_LIMITED
|
|
144
|
+
rec.rate_limit_until = time.time() + RATE_LIMIT_COOLDOWN_SECS
|
|
145
|
+
logger.warning(f"[{provider}] Key {rec.key_hint} rate limited. Retry after {RATE_LIMIT_COOLDOWN_SECS}s")
|
|
146
|
+
|
|
147
|
+
elif error_type in ("server", "connection"):
|
|
148
|
+
# Don't ban the key, just note the error
|
|
149
|
+
logger.debug(f"[{provider}] Key {rec.key_hint} got {error_type} error (key kept active)")
|
|
150
|
+
|
|
151
|
+
def get_stats(self, provider: str) -> List[dict]:
|
|
152
|
+
p = provider.lower()
|
|
153
|
+
with self._lock:
|
|
154
|
+
return [
|
|
155
|
+
{
|
|
156
|
+
"hint": r.key_hint,
|
|
157
|
+
"status": r.status.value,
|
|
158
|
+
"total_requests": r.total_requests,
|
|
159
|
+
"total_errors": r.total_errors,
|
|
160
|
+
"last_used": r.last_used,
|
|
161
|
+
}
|
|
162
|
+
for r in self._keys.get(p, [])
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
def list_providers(self) -> List[str]:
|
|
166
|
+
return list(self._keys.keys())
|
|
167
|
+
|
|
168
|
+
def key_count(self, provider: str) -> dict:
|
|
169
|
+
p = provider.lower()
|
|
170
|
+
records = self._keys.get(p, [])
|
|
171
|
+
return {
|
|
172
|
+
"total": len(records),
|
|
173
|
+
"active": sum(1 for r in records if r.is_usable),
|
|
174
|
+
"invalid": sum(1 for r in records if r.status == KeyStatus.INVALID),
|
|
175
|
+
"rate_limited": sum(1 for r in records if r.status == KeyStatus.RATE_LIMITED),
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
def _find(self, provider: str, key: str) -> Optional[KeyRecord]:
|
|
179
|
+
p = provider.lower()
|
|
180
|
+
with self._lock:
|
|
181
|
+
for rec in self._keys.get(p, []):
|
|
182
|
+
if rec.key == key:
|
|
183
|
+
return rec
|
|
184
|
+
return None
|
llmcycle/core/router.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import List, Dict, Optional
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
class RouterStrategy(ABC):
|
|
8
|
+
"""Abstract strategy for sorting and selecting models."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def sort_models(self, requested_model: str) -> List[str]:
|
|
12
|
+
"""Return a sorted list of fallback models."""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
class FallbackRouter(RouterStrategy):
|
|
16
|
+
"""A simple router that uses a pre-defined fallback list."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, fallbacks: Dict[str, List[str]]):
|
|
19
|
+
# e.g. {"gpt-4": ["gpt-4-turbo", "gpt-3.5-turbo"]}
|
|
20
|
+
self.fallbacks = fallbacks
|
|
21
|
+
|
|
22
|
+
def sort_models(self, requested_model: str) -> List[str]:
|
|
23
|
+
# Always try the requested model first, then the fallbacks
|
|
24
|
+
models = [requested_model]
|
|
25
|
+
if requested_model in self.fallbacks:
|
|
26
|
+
models.extend(self.fallbacks[requested_model])
|
|
27
|
+
return models
|
|
28
|
+
|
|
29
|
+
class ModelRouter:
|
|
30
|
+
"""Main router class that manages strategies and routes requests."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, strategy: RouterStrategy):
|
|
33
|
+
self.strategy = strategy
|
|
34
|
+
|
|
35
|
+
def get_route(self, requested_model: str) -> List[str]:
|
|
36
|
+
"""Get ordered list of models to try."""
|
|
37
|
+
return self.strategy.sort_models(requested_model)
|
llmcycle/core/stream.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import AsyncGenerator
|
|
3
|
+
from llmcycle.schema import CompletionRequest, Message
|
|
4
|
+
from llmcycle.providers.base import LLMProvider
|
|
5
|
+
from llmcycle.core.router import ModelRouter
|
|
6
|
+
from llmcycle.core.keys import KeyManager
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
class StreamResilienceManager:
|
|
11
|
+
"""
|
|
12
|
+
Handles streaming failover.
|
|
13
|
+
If a stream disconnects mid-response, this manager will capture
|
|
14
|
+
the text already generated, switch to a fallback model, append
|
|
15
|
+
the generated text to the assistant's context, and resume the stream.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
router: ModelRouter,
|
|
21
|
+
key_manager: KeyManager,
|
|
22
|
+
providers: dict[str, LLMProvider]
|
|
23
|
+
):
|
|
24
|
+
self.router = router
|
|
25
|
+
self.key_manager = key_manager
|
|
26
|
+
self.providers = providers # map of model -> LLMProvider instance (simplified)
|
|
27
|
+
|
|
28
|
+
async def safe_stream(self, request: CompletionRequest) -> AsyncGenerator[str, None]:
|
|
29
|
+
models_to_try = self.router.get_route(request.model)
|
|
30
|
+
generated_text_so_far = ""
|
|
31
|
+
|
|
32
|
+
for model in models_to_try:
|
|
33
|
+
if model not in self.providers:
|
|
34
|
+
logger.warning(f"No provider found for model {model}")
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
provider = self.providers[model]
|
|
38
|
+
api_key = self.key_manager.get_next_key(model) # Assumes provider uses model name for key lookups for simplicity
|
|
39
|
+
|
|
40
|
+
if not api_key:
|
|
41
|
+
logger.warning(f"No active API keys available for model {model}")
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
# If we're failing over mid-stream, we must update the prompt
|
|
46
|
+
# to include the generated_text_so_far
|
|
47
|
+
current_request = request.model_copy(deep=True)
|
|
48
|
+
current_request.model = model
|
|
49
|
+
|
|
50
|
+
if generated_text_so_far:
|
|
51
|
+
current_request.messages.append(
|
|
52
|
+
Message(role="assistant", content=generated_text_so_far)
|
|
53
|
+
)
|
|
54
|
+
# Ideally, you'd instruct the fallback model to continue from here
|
|
55
|
+
current_request.messages.append(
|
|
56
|
+
Message(role="user", content="Continue exactly from the last assistant message. Do not repeat anything. Just continue.")
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
logger.info(f"Attempting stream with model {model}")
|
|
60
|
+
stream_gen = provider.generate_stream(current_request, api_key)
|
|
61
|
+
|
|
62
|
+
async for chunk in stream_gen:
|
|
63
|
+
generated_text_so_far += chunk
|
|
64
|
+
yield chunk
|
|
65
|
+
|
|
66
|
+
# If we finish the stream without exceptions, we are done!
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logger.error(f"Stream interrupted on model {model}: {e}")
|
|
71
|
+
self.key_manager.report_error(api_key, "connection_error")
|
|
72
|
+
logger.info("Failing over to next model in sequence...")
|
|
73
|
+
# The loop will continue and try the next model
|
|
74
|
+
|
|
75
|
+
# If we exit the loop, all models failed
|
|
76
|
+
if not generated_text_so_far:
|
|
77
|
+
raise RuntimeError("All models failed and no text was generated.")
|
|
78
|
+
else:
|
|
79
|
+
logger.error("All models failed, but some text was generated.")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import AsyncGenerator
|
|
3
|
+
from llmcycle.schema import CompletionRequest
|
|
4
|
+
|
|
5
|
+
class LLMProvider(ABC):
|
|
6
|
+
"""Base class for all specific LLM implementations."""
|
|
7
|
+
|
|
8
|
+
@abstractmethod
|
|
9
|
+
async def generate(self, request: CompletionRequest, api_key: str) -> str:
|
|
10
|
+
"""Generate a complete string response."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
async def generate_stream(self, request: CompletionRequest, api_key: str) -> AsyncGenerator[str, None]:
|
|
15
|
+
"""Generate a streaming response."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
async def get_models(self, api_key: str) -> list[str]:
|
|
20
|
+
"""Return a list of models supported by this provider."""
|
|
21
|
+
pass
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
from typing import AsyncGenerator
|
|
3
|
+
from llmcycle.schema import CompletionRequest
|
|
4
|
+
from llmcycle.providers.base import LLMProvider
|
|
5
|
+
|
|
6
|
+
class OpenAICompatibleProvider(LLMProvider):
|
|
7
|
+
"""A generic provider for OpenAI-compatible APIs (OpenAI, DeepSeek, Together, etc)."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, base_url: str):
|
|
10
|
+
# Ensure base_url ends with /v1 or whatever is passed
|
|
11
|
+
self.base_url = base_url.rstrip('/')
|
|
12
|
+
|
|
13
|
+
async def get_models(self, api_key: str) -> list[str]:
|
|
14
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
15
|
+
async with httpx.AsyncClient() as client:
|
|
16
|
+
try:
|
|
17
|
+
response = await client.get(f"{self.base_url}/models", headers=headers, timeout=5.0)
|
|
18
|
+
response.raise_for_status()
|
|
19
|
+
data = response.json()
|
|
20
|
+
if "data" in data:
|
|
21
|
+
return [model["id"] for model in data["data"]]
|
|
22
|
+
return []
|
|
23
|
+
except Exception as e:
|
|
24
|
+
print(f"Failed to fetch models from {self.base_url}: {e}")
|
|
25
|
+
return []
|
|
26
|
+
|
|
27
|
+
async def generate(self, request: CompletionRequest, api_key: str) -> str:
|
|
28
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
29
|
+
async with httpx.AsyncClient() as client:
|
|
30
|
+
payload = request.model_dump(exclude_none=True)
|
|
31
|
+
payload["stream"] = False
|
|
32
|
+
response = await client.post(f"{self.base_url}/chat/completions", headers=headers, json=payload, timeout=60.0)
|
|
33
|
+
response.raise_for_status()
|
|
34
|
+
return response.json()["choices"][0]["message"]["content"]
|
|
35
|
+
|
|
36
|
+
async def generate_stream(self, request: CompletionRequest, api_key: str) -> AsyncGenerator[str, None]:
|
|
37
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
38
|
+
async with httpx.AsyncClient() as client:
|
|
39
|
+
payload = request.model_dump(exclude_none=True)
|
|
40
|
+
payload["stream"] = True
|
|
41
|
+
|
|
42
|
+
async with client.stream("POST", f"{self.base_url}/chat/completions", headers=headers, json=payload) as response:
|
|
43
|
+
response.raise_for_status()
|
|
44
|
+
async for chunk in response.aiter_text():
|
|
45
|
+
# Simplified parsing of Server-Sent Events (SSE)
|
|
46
|
+
if chunk.startswith("data: "):
|
|
47
|
+
import json
|
|
48
|
+
data_str = chunk[6:].strip()
|
|
49
|
+
if data_str == "[DONE]":
|
|
50
|
+
break
|
|
51
|
+
try:
|
|
52
|
+
data = json.loads(data_str)
|
|
53
|
+
if data["choices"][0]["delta"].get("content"):
|
|
54
|
+
yield data["choices"][0]["delta"]["content"]
|
|
55
|
+
except json.JSONDecodeError:
|
|
56
|
+
pass
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Universal Provider Registry
|
|
3
|
+
============================
|
|
4
|
+
50+ providers mapped by their env var prefix → base URL.
|
|
5
|
+
All are OpenAI-compatible REST API endpoints.
|
|
6
|
+
Custom provider: set MYPROVIDER_API_KEYS + MYPROVIDER_BASE_URL.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
PROVIDER_REGISTRY: dict[str, str] = {
|
|
10
|
+
# ── Frontier / Big Cloud ──────────────────────────────────────────────
|
|
11
|
+
"OPENAI": "https://api.openai.com/v1",
|
|
12
|
+
"AZURE": "https://{resource}.openai.azure.com/openai", # needs override
|
|
13
|
+
"ANTHROPIC": "https://api.anthropic.com/v1",
|
|
14
|
+
"GOOGLE": "https://generativelanguage.googleapis.com/v1beta",
|
|
15
|
+
"VERTEXAI": "https://us-central1-aiplatform.googleapis.com/v1",
|
|
16
|
+
"AWS_BEDROCK": "https://bedrock-runtime.us-east-1.amazonaws.com",
|
|
17
|
+
|
|
18
|
+
# ── Chinese / Asia ────────────────────────────────────────────────────
|
|
19
|
+
"DEEPSEEK": "https://api.deepseek.com/v1",
|
|
20
|
+
"QWEN": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
21
|
+
"DASHSCOPE": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
22
|
+
"MOONSHOT": "https://api.moonshot.cn/v1",
|
|
23
|
+
"MINIMAX": "https://api.minimax.chat/v1",
|
|
24
|
+
"ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
|
|
25
|
+
"BAIDU": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop",
|
|
26
|
+
"VOLCANO": "https://ark.cn-beijing.volces.com/api/v3",
|
|
27
|
+
"XIAOMI": "https://api.mimo.xiaomi.com/v1",
|
|
28
|
+
"ZAI": "https://open.bigmodel.cn/api/paas/v4",
|
|
29
|
+
|
|
30
|
+
# ── Fast Inference / Aggregators ─────────────────────────────────────
|
|
31
|
+
"GROQ": "https://api.groq.com/openai/v1",
|
|
32
|
+
"TOGETHER": "https://api.together.xyz/v1",
|
|
33
|
+
"FIREWORKS": "https://api.fireworks.ai/inference/v1",
|
|
34
|
+
"ANYSCALE": "https://api.endpoints.anyscale.com/v1",
|
|
35
|
+
"PERPLEXITY": "https://api.perplexity.ai",
|
|
36
|
+
"OPENROUTER": "https://openrouter.ai/api/v1",
|
|
37
|
+
"REPLICATE": "https://api.replicate.com/v1",
|
|
38
|
+
"DEEPINFRA": "https://api.deepinfra.com/v1/openai",
|
|
39
|
+
"FEATHERLESS": "https://api.featherless.ai/v1",
|
|
40
|
+
"NOVITA": "https://api.novita.ai/v3/openai",
|
|
41
|
+
"CHUTES": "https://llm.chutes.ai/v1",
|
|
42
|
+
"NSCALE": "https://inference.api.nscale.com/v1",
|
|
43
|
+
"NEBIUS": "https://api.studio.nebius.ai/v1",
|
|
44
|
+
"HYPERBOLIC": "https://api.hyperbolic.xyz/v1",
|
|
45
|
+
"LAMBDA": "https://api.lambdalabs.com/v1",
|
|
46
|
+
"SAMBANOVA": "https://api.sambanova.ai/v1",
|
|
47
|
+
"CEREBRAS": "https://api.cerebras.ai/v1",
|
|
48
|
+
"FRIENDLIAI": "https://inference.friendli.ai/v1",
|
|
49
|
+
"GALADRIEL": "https://api.galadriel.com/v1",
|
|
50
|
+
"GMI": "https://api.gmi.cloud/v1",
|
|
51
|
+
"PUBLICAI": "https://api.public.ai/v1",
|
|
52
|
+
"COMETAPI": "https://api.comet.ai/v1",
|
|
53
|
+
"AIML": "https://api.aimlapi.com/v1",
|
|
54
|
+
|
|
55
|
+
# ── Specialized Providers ─────────────────────────────────────────────
|
|
56
|
+
"MISTRAL": "https://api.mistral.ai/v1",
|
|
57
|
+
"CODESTRAL": "https://codestral.mistral.ai/v1",
|
|
58
|
+
"COHERE": "https://api.cohere.com/v1",
|
|
59
|
+
"AI21": "https://api.ai21.com/studio/v1",
|
|
60
|
+
"NLP_CLOUD": "https://api.nlpcloud.io/v1",
|
|
61
|
+
"ALEPH_ALPHA": "https://api.aleph-alpha.com",
|
|
62
|
+
"PREDIBASE": "https://serving.app.predibase.com",
|
|
63
|
+
"CLARIFAI": "https://api.clarifai.com/v2",
|
|
64
|
+
"HUGGINGFACE": "https://api-inference.huggingface.co/models",
|
|
65
|
+
"BASETEN": "https://model-{model_id}.api.baseten.co/environments/production/predict",
|
|
66
|
+
"GRADIENT": "https://api.gradient.ai/api",
|
|
67
|
+
|
|
68
|
+
# ── Local / Self-Hosted ───────────────────────────────────────────────
|
|
69
|
+
"OLLAMA": "http://localhost:11434/v1",
|
|
70
|
+
"LM_STUDIO": "http://localhost:1234/v1",
|
|
71
|
+
"VLLM": "http://localhost:8000/v1",
|
|
72
|
+
"LLAMAFILE": "http://localhost:8080/v1",
|
|
73
|
+
"TRITON": "http://localhost:8001/v2",
|
|
74
|
+
"XINFERENCE": "http://localhost:9997/v1",
|
|
75
|
+
"DOCKER_MODEL": "http://localhost:12434/engines/llama.cpp/v1",
|
|
76
|
+
|
|
77
|
+
# ── Enterprise / Cloud ────────────────────────────────────────────────
|
|
78
|
+
"DATABRICKS": "https://{workspace}.azuredatabricks.net/serving-endpoints",
|
|
79
|
+
"SAGEMAKER": "https://runtime.sagemaker.us-east-1.amazonaws.com",
|
|
80
|
+
"SNOWFLAKE": "https://{account}.snowflakecomputing.com/api/v2",
|
|
81
|
+
"WATSONX": "https://us-south.ml.cloud.ibm.com/ml/v1",
|
|
82
|
+
"SAP": "https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com",
|
|
83
|
+
"OCI": "https://inference.generativeai.us-chicago-1.oci.customer-oci.com/20231130",
|
|
84
|
+
"CLOUDFLARE": "https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run",
|
|
85
|
+
"HEROKU": "https://llm.api.heroku.com/v1",
|
|
86
|
+
"OVH": "https://oai.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1",
|
|
87
|
+
"SCALEWAY": "https://api.scaleway.ai/v1",
|
|
88
|
+
"DATAROBOT": "https://app.datarobot.com/api/v2",
|
|
89
|
+
|
|
90
|
+
# ── Nvidia ────────────────────────────────────────────────────────────
|
|
91
|
+
"NVIDIA": "https://integrate.api.nvidia.com/v1",
|
|
92
|
+
"NVIDIA_NIM": "https://integrate.api.nvidia.com/v1",
|
|
93
|
+
|
|
94
|
+
# ── GitHub / Microsoft ────────────────────────────────────────────────
|
|
95
|
+
"GITHUB": "https://models.inference.ai.azure.com",
|
|
96
|
+
"VERCEL": "https://ai-gateway.vercel.sh",
|
|
97
|
+
"XAI": "https://api.x.ai/v1",
|
|
98
|
+
|
|
99
|
+
# ── Image Generation ─────────────────────────────────────────────────
|
|
100
|
+
"STABILITY": "https://api.stability.ai/v1",
|
|
101
|
+
"FAL": "https://fal.run",
|
|
102
|
+
"RECRAFT": "https://external.api.recraft.ai/v1",
|
|
103
|
+
"RUNWAYML": "https://api.dev.runwayml.com/v1",
|
|
104
|
+
"BLACK_FOREST": "https://api.us1.bfl.ai/v1",
|
|
105
|
+
}
|
llmcycle/schema.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Production schema models with Pydantic v2.
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
from typing import Optional, List, Dict, Any, Union
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
class Message(BaseModel):
|
|
10
|
+
role: str
|
|
11
|
+
content: str
|
|
12
|
+
|
|
13
|
+
class CompletionRequest(BaseModel):
|
|
14
|
+
model: str
|
|
15
|
+
messages: List[Message]
|
|
16
|
+
stream: bool = False
|
|
17
|
+
temperature: Optional[float] = None
|
|
18
|
+
max_tokens: Optional[int] = None
|
|
19
|
+
top_p: Optional[float] = None
|
|
20
|
+
frequency_penalty: Optional[float] = None
|
|
21
|
+
presence_penalty: Optional[float] = None
|
|
22
|
+
stop: Optional[Union[str, List[str]]] = None
|
|
23
|
+
extra: Optional[Dict[str, Any]] = None
|
|
24
|
+
|
|
25
|
+
def to_api_dict(self) -> dict:
|
|
26
|
+
"""Serialize for sending to OpenAI-compatible API."""
|
|
27
|
+
d = self.model_dump(exclude_none=True, exclude={"extra"})
|
|
28
|
+
d["messages"] = [m.model_dump() for m in self.messages]
|
|
29
|
+
if self.extra:
|
|
30
|
+
d.update(self.extra)
|
|
31
|
+
return d
|
|
32
|
+
|
|
33
|
+
class CompletionResponse(BaseModel):
|
|
34
|
+
id: str
|
|
35
|
+
model: str
|
|
36
|
+
provider: str
|
|
37
|
+
content: str
|
|
38
|
+
prompt_tokens: int = 0
|
|
39
|
+
completion_tokens: int = 0
|
|
40
|
+
latency_ms: float = 0.0
|
|
41
|
+
created_at: float = Field(default_factory=time.time)
|
|
42
|
+
|
|
43
|
+
class StreamChunk(BaseModel):
|
|
44
|
+
content: str
|
|
45
|
+
model: str
|
|
46
|
+
provider: str
|
|
47
|
+
done: bool = False
|
llmcycle/ui/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""UI Package."""
|
llmcycle/ui/app.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import secrets
|
|
3
|
+
from fastapi import FastAPI, Depends, HTTPException, status
|
|
4
|
+
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
|
5
|
+
from fastapi.responses import FileResponse
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from typing import List, Dict, Any
|
|
8
|
+
from llmcycle import LLMCycle
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
app = FastAPI(title="LLMCycle API Dashboard")
|
|
12
|
+
|
|
13
|
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="api/token")
|
|
14
|
+
|
|
15
|
+
# Static token for simplicity in this MVP. For prod, use JWTs.
|
|
16
|
+
SESSION_TOKEN = secrets.token_urlsafe(32)
|
|
17
|
+
|
|
18
|
+
# Path to templates/static
|
|
19
|
+
BASE_DIR = Path(__file__).resolve().parent
|
|
20
|
+
templates_dir = BASE_DIR / "templates"
|
|
21
|
+
|
|
22
|
+
# Global Client Instance
|
|
23
|
+
llm_client = LLMCycle()
|
|
24
|
+
|
|
25
|
+
# Models
|
|
26
|
+
class ProviderInfo(BaseModel):
|
|
27
|
+
name: str
|
|
28
|
+
base_url: str
|
|
29
|
+
total_keys: int
|
|
30
|
+
active_keys: int
|
|
31
|
+
|
|
32
|
+
class DashboardData(BaseModel):
|
|
33
|
+
providers: List[ProviderInfo]
|
|
34
|
+
fallbacks: Dict[str, List[str]]
|
|
35
|
+
|
|
36
|
+
@app.post("/api/token")
|
|
37
|
+
async def login(form_data: OAuth2PasswordRequestForm = Depends()):
|
|
38
|
+
correct_username = os.environ.get("LLMCYCLE_USER_ADMIN", "admin")
|
|
39
|
+
correct_password = os.environ.get("LLMCYCLE_USER_ADMIN_PAASWORD", "admin")
|
|
40
|
+
|
|
41
|
+
if not (secrets.compare_digest(form_data.username, correct_username) and
|
|
42
|
+
secrets.compare_digest(form_data.password, correct_password)):
|
|
43
|
+
raise HTTPException(
|
|
44
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
45
|
+
detail="Incorrect username or password",
|
|
46
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return {"access_token": SESSION_TOKEN, "token_type": "bearer"}
|
|
50
|
+
|
|
51
|
+
async def verify_token(token: str = Depends(oauth2_scheme)):
|
|
52
|
+
if not secrets.compare_digest(token, SESSION_TOKEN):
|
|
53
|
+
raise HTTPException(
|
|
54
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
55
|
+
detail="Invalid authentication credentials",
|
|
56
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
57
|
+
)
|
|
58
|
+
return token
|
|
59
|
+
|
|
60
|
+
@app.get("/api/dashboard", response_model=DashboardData)
|
|
61
|
+
async def get_dashboard_data(token: str = Depends(verify_token)):
|
|
62
|
+
"""Protected API endpoint returning all dashboard data as JSON."""
|
|
63
|
+
providers = llm_client.get_available_providers()
|
|
64
|
+
|
|
65
|
+
provider_details = []
|
|
66
|
+
for p in providers:
|
|
67
|
+
keys_list = llm_client.key_manager._keys.get(p, [])
|
|
68
|
+
active_keys = 0
|
|
69
|
+
for k in keys_list:
|
|
70
|
+
stats = llm_client.key_manager._stats.get(k)
|
|
71
|
+
if stats and stats.is_active:
|
|
72
|
+
active_keys += 1
|
|
73
|
+
|
|
74
|
+
provider_details.append(ProviderInfo(
|
|
75
|
+
name=p.upper(),
|
|
76
|
+
base_url=llm_client.providers[p].base_url,
|
|
77
|
+
total_keys=len(keys_list),
|
|
78
|
+
active_keys=active_keys
|
|
79
|
+
))
|
|
80
|
+
|
|
81
|
+
fallbacks = llm_client.router.strategy.fallbacks if hasattr(llm_client.router.strategy, 'fallbacks') else {}
|
|
82
|
+
|
|
83
|
+
return DashboardData(providers=provider_details, fallbacks=fallbacks)
|
|
84
|
+
|
|
85
|
+
@app.get("/")
|
|
86
|
+
async def serve_ui():
|
|
87
|
+
"""Serve the static HTML frontend."""
|
|
88
|
+
return FileResponse(templates_dir / "dashboard.html")
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>LLMCycle API Dashboard</title>
|
|
7
|
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;800&display=swap" rel="stylesheet">
|
|
8
|
+
<style>
|
|
9
|
+
:root {
|
|
10
|
+
--bg: #0a0a0f;
|
|
11
|
+
--glass-bg: rgba(255, 255, 255, 0.03);
|
|
12
|
+
--glass-border: rgba(255, 255, 255, 0.08);
|
|
13
|
+
--primary: #6366f1;
|
|
14
|
+
--primary-glow: rgba(99, 102, 241, 0.4);
|
|
15
|
+
--text-main: #f8fafc;
|
|
16
|
+
--text-muted: #94a3b8;
|
|
17
|
+
--success: #10b981;
|
|
18
|
+
--error: #ef4444;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
body {
|
|
22
|
+
margin: 0;
|
|
23
|
+
padding: 0;
|
|
24
|
+
background-color: var(--bg);
|
|
25
|
+
color: var(--text-main);
|
|
26
|
+
font-family: 'Inter', sans-serif;
|
|
27
|
+
min-height: 100vh;
|
|
28
|
+
background-image:
|
|
29
|
+
radial-gradient(circle at 15% 50%, rgba(99, 102, 241, 0.15), transparent 25%),
|
|
30
|
+
radial-gradient(circle at 85% 30%, rgba(168, 85, 247, 0.15), transparent 25%);
|
|
31
|
+
background-attachment: fixed;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
.container {
|
|
35
|
+
max-width: 1200px;
|
|
36
|
+
margin: 0 auto;
|
|
37
|
+
padding: 40px 20px;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
header {
|
|
41
|
+
display: flex;
|
|
42
|
+
justify-content: space-between;
|
|
43
|
+
align-items: center;
|
|
44
|
+
margin-bottom: 50px;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
h1 {
|
|
48
|
+
font-weight: 800;
|
|
49
|
+
font-size: 2.5rem;
|
|
50
|
+
margin: 0;
|
|
51
|
+
background: linear-gradient(135deg, #a855f7, #6366f1);
|
|
52
|
+
-webkit-background-clip: text;
|
|
53
|
+
-webkit-text-fill-color: transparent;
|
|
54
|
+
text-shadow: 0 0 30px var(--primary-glow);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
.user-badge {
|
|
58
|
+
background: var(--glass-bg);
|
|
59
|
+
border: 1px solid var(--glass-border);
|
|
60
|
+
padding: 8px 16px;
|
|
61
|
+
border-radius: 20px;
|
|
62
|
+
backdrop-filter: blur(10px);
|
|
63
|
+
font-size: 0.9rem;
|
|
64
|
+
color: var(--text-muted);
|
|
65
|
+
cursor: pointer;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
.user-badge:hover {
|
|
69
|
+
border-color: rgba(239, 68, 68, 0.5);
|
|
70
|
+
color: var(--error);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
.grid {
|
|
74
|
+
display: grid;
|
|
75
|
+
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
|
76
|
+
gap: 24px;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
.card {
|
|
80
|
+
background: var(--glass-bg);
|
|
81
|
+
border: 1px solid var(--glass-border);
|
|
82
|
+
border-radius: 16px;
|
|
83
|
+
padding: 24px;
|
|
84
|
+
backdrop-filter: blur(12px);
|
|
85
|
+
transition: transform 0.3s ease, box-shadow 0.3s ease;
|
|
86
|
+
position: relative;
|
|
87
|
+
overflow: hidden;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
.card::before {
|
|
91
|
+
content: "";
|
|
92
|
+
position: absolute;
|
|
93
|
+
top: 0; left: 0; right: 0;
|
|
94
|
+
height: 1px;
|
|
95
|
+
background: linear-gradient(90deg, transparent, rgba(255,255,255,0.2), transparent);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
.card:hover {
|
|
99
|
+
transform: translateY(-5px);
|
|
100
|
+
box-shadow: 0 10px 30px -10px var(--primary-glow);
|
|
101
|
+
border-color: rgba(99, 102, 241, 0.3);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
.card-header {
|
|
105
|
+
display: flex;
|
|
106
|
+
justify-content: space-between;
|
|
107
|
+
align-items: center;
|
|
108
|
+
margin-bottom: 16px;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
.card-title {
|
|
112
|
+
font-size: 1.25rem;
|
|
113
|
+
font-weight: 600;
|
|
114
|
+
margin: 0;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
.status-dot {
|
|
118
|
+
width: 10px;
|
|
119
|
+
height: 10px;
|
|
120
|
+
background-color: var(--success);
|
|
121
|
+
border-radius: 50%;
|
|
122
|
+
box-shadow: 0 0 10px var(--success);
|
|
123
|
+
animation: pulse 2s infinite;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
@keyframes pulse {
|
|
127
|
+
0% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.4); }
|
|
128
|
+
70% { box-shadow: 0 0 0 10px rgba(16, 185, 129, 0); }
|
|
129
|
+
100% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0); }
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
.metric {
|
|
133
|
+
display: flex;
|
|
134
|
+
justify-content: space-between;
|
|
135
|
+
margin-bottom: 8px;
|
|
136
|
+
color: var(--text-muted);
|
|
137
|
+
font-size: 0.95rem;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
.metric span.value {
|
|
141
|
+
color: var(--text-main);
|
|
142
|
+
font-weight: 600;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
.url {
|
|
146
|
+
font-size: 0.8rem;
|
|
147
|
+
color: var(--primary);
|
|
148
|
+
word-break: break-all;
|
|
149
|
+
background: rgba(99, 102, 241, 0.1);
|
|
150
|
+
padding: 4px 8px;
|
|
151
|
+
border-radius: 6px;
|
|
152
|
+
margin-top: 12px;
|
|
153
|
+
display: inline-block;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
.section-title {
|
|
157
|
+
margin: 40px 0 20px 0;
|
|
158
|
+
font-size: 1.5rem;
|
|
159
|
+
font-weight: 600;
|
|
160
|
+
color: var(--text-main);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/* Login Form Styles */
|
|
164
|
+
#login-view {
|
|
165
|
+
max-width: 400px;
|
|
166
|
+
margin: 100px auto;
|
|
167
|
+
text-align: center;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
input {
|
|
171
|
+
width: 100%;
|
|
172
|
+
padding: 12px;
|
|
173
|
+
margin-bottom: 16px;
|
|
174
|
+
border-radius: 8px;
|
|
175
|
+
border: 1px solid var(--glass-border);
|
|
176
|
+
background: rgba(0,0,0,0.2);
|
|
177
|
+
color: white;
|
|
178
|
+
box-sizing: border-box;
|
|
179
|
+
font-family: 'Inter', sans-serif;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
input:focus {
|
|
183
|
+
outline: none;
|
|
184
|
+
border-color: var(--primary);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
button {
|
|
188
|
+
width: 100%;
|
|
189
|
+
padding: 12px;
|
|
190
|
+
border-radius: 8px;
|
|
191
|
+
border: none;
|
|
192
|
+
background: linear-gradient(135deg, #6366f1, #a855f7);
|
|
193
|
+
color: white;
|
|
194
|
+
font-weight: 600;
|
|
195
|
+
cursor: pointer;
|
|
196
|
+
font-family: 'Inter', sans-serif;
|
|
197
|
+
transition: opacity 0.2s;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
button:hover {
|
|
201
|
+
opacity: 0.9;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
#error-msg {
|
|
205
|
+
color: var(--error);
|
|
206
|
+
margin-bottom: 16px;
|
|
207
|
+
font-size: 0.9rem;
|
|
208
|
+
display: none;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
</style>
|
|
212
|
+
</head>
|
|
213
|
+
<body>
|
|
214
|
+
<div class="container">
|
|
215
|
+
|
|
216
|
+
<!-- LOGIN VIEW -->
|
|
217
|
+
<div id="login-view">
|
|
218
|
+
<h1 style="margin-bottom: 30px;">LLMCycle Login</h1>
|
|
219
|
+
<div id="error-msg">Invalid username or password.</div>
|
|
220
|
+
<form id="login-form">
|
|
221
|
+
<input type="text" id="username" placeholder="Username" required>
|
|
222
|
+
<input type="password" id="password" placeholder="Password" required>
|
|
223
|
+
<button type="submit">Authenticate</button>
|
|
224
|
+
</form>
|
|
225
|
+
</div>
|
|
226
|
+
|
|
227
|
+
<!-- DASHBOARD VIEW -->
|
|
228
|
+
<div id="dashboard-view" style="display: none;">
|
|
229
|
+
<header>
|
|
230
|
+
<h1>LLMCycle ♻️</h1>
|
|
231
|
+
<div class="user-badge" id="logout-btn">Log Out</div>
|
|
232
|
+
</header>
|
|
233
|
+
|
|
234
|
+
<h2 class="section-title">Active Providers</h2>
|
|
235
|
+
<div class="grid" id="providers-container">
|
|
236
|
+
<!-- Dynamically populated -->
|
|
237
|
+
</div>
|
|
238
|
+
|
|
239
|
+
<h2 class="section-title">Fallback Routing Logic</h2>
|
|
240
|
+
<div class="card" style="max-width: 600px;" id="fallbacks-container">
|
|
241
|
+
<!-- Dynamically populated -->
|
|
242
|
+
</div>
|
|
243
|
+
</div>
|
|
244
|
+
|
|
245
|
+
</div>
|
|
246
|
+
|
|
247
|
+
<script>
|
|
248
|
+
const loginView = document.getElementById('login-view');
|
|
249
|
+
const dashboardView = document.getElementById('dashboard-view');
|
|
250
|
+
const loginForm = document.getElementById('login-form');
|
|
251
|
+
const errorMsg = document.getElementById('error-msg');
|
|
252
|
+
const logoutBtn = document.getElementById('logout-btn');
|
|
253
|
+
|
|
254
|
+
// Check if token exists
|
|
255
|
+
let token = localStorage.getItem('llmcycle_token');
|
|
256
|
+
|
|
257
|
+
if (token) {
|
|
258
|
+
loadDashboard();
|
|
259
|
+
} else {
|
|
260
|
+
loginView.style.display = 'block';
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Handle Login
|
|
264
|
+
loginForm.addEventListener('submit', async (e) => {
|
|
265
|
+
e.preventDefault();
|
|
266
|
+
const formData = new URLSearchParams();
|
|
267
|
+
formData.append('username', document.getElementById('username').value);
|
|
268
|
+
formData.append('password', document.getElementById('password').value);
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
const response = await fetch('/api/token', {
|
|
272
|
+
method: 'POST',
|
|
273
|
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
274
|
+
body: formData
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
if (response.ok) {
|
|
278
|
+
const data = await response.json();
|
|
279
|
+
localStorage.setItem('llmcycle_token', data.access_token);
|
|
280
|
+
token = data.access_token;
|
|
281
|
+
errorMsg.style.display = 'none';
|
|
282
|
+
loadDashboard();
|
|
283
|
+
} else {
|
|
284
|
+
errorMsg.style.display = 'block';
|
|
285
|
+
}
|
|
286
|
+
} catch (err) {
|
|
287
|
+
console.error(err);
|
|
288
|
+
errorMsg.style.display = 'block';
|
|
289
|
+
}
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
// Handle Logout
|
|
293
|
+
logoutBtn.addEventListener('click', () => {
|
|
294
|
+
localStorage.removeItem('llmcycle_token');
|
|
295
|
+
token = null;
|
|
296
|
+
dashboardView.style.display = 'none';
|
|
297
|
+
loginView.style.display = 'block';
|
|
298
|
+
document.getElementById('password').value = '';
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
// Fetch API and Build UI
|
|
302
|
+
async function loadDashboard() {
|
|
303
|
+
try {
|
|
304
|
+
const response = await fetch('/api/dashboard', {
|
|
305
|
+
headers: { 'Authorization': `Bearer ${token}` }
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
if (response.status === 401) {
|
|
309
|
+
// Token invalid
|
|
310
|
+
localStorage.removeItem('llmcycle_token');
|
|
311
|
+
loginView.style.display = 'block';
|
|
312
|
+
dashboardView.style.display = 'none';
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
const data = await response.json();
|
|
317
|
+
renderDashboard(data);
|
|
318
|
+
|
|
319
|
+
loginView.style.display = 'none';
|
|
320
|
+
dashboardView.style.display = 'block';
|
|
321
|
+
} catch (err) {
|
|
322
|
+
console.error('Failed to load dashboard', err);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
function renderDashboard(data) {
|
|
327
|
+
const pContainer = document.getElementById('providers-container');
|
|
328
|
+
const fContainer = document.getElementById('fallbacks-container');
|
|
329
|
+
|
|
330
|
+
pContainer.innerHTML = '';
|
|
331
|
+
fContainer.innerHTML = '';
|
|
332
|
+
|
|
333
|
+
// Providers
|
|
334
|
+
if (data.providers.length === 0) {
|
|
335
|
+
pContainer.innerHTML = `
|
|
336
|
+
<div class="card" style="grid-column: 1 / -1; text-align: center; border-style: dashed;">
|
|
337
|
+
<p style="color: var(--text-muted);">No providers detected. Add API keys to your .env file.</p>
|
|
338
|
+
</div>`;
|
|
339
|
+
} else {
|
|
340
|
+
data.providers.forEach(p => {
|
|
341
|
+
pContainer.innerHTML += `
|
|
342
|
+
<div class="card">
|
|
343
|
+
<div class="card-header">
|
|
344
|
+
<h3 class="card-title">${p.name}</h3>
|
|
345
|
+
<div class="status-dot"></div>
|
|
346
|
+
</div>
|
|
347
|
+
<div class="metric">
|
|
348
|
+
<span>Total Keys:</span>
|
|
349
|
+
<span class="value">${p.total_keys}</span>
|
|
350
|
+
</div>
|
|
351
|
+
<div class="metric">
|
|
352
|
+
<span>Active Keys:</span>
|
|
353
|
+
<span class="value">${p.active_keys}</span>
|
|
354
|
+
</div>
|
|
355
|
+
<div class="url">${p.base_url}</div>
|
|
356
|
+
</div>`;
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Fallbacks
|
|
361
|
+
const fKeys = Object.keys(data.fallbacks);
|
|
362
|
+
if (fKeys.length === 0) {
|
|
363
|
+
fContainer.innerHTML = `<p style="color: var(--text-muted); margin: 0;">No custom fallback routes configured.</p>`;
|
|
364
|
+
} else {
|
|
365
|
+
fKeys.forEach(primary => {
|
|
366
|
+
const secondaries = data.fallbacks[primary].join(', ');
|
|
367
|
+
fContainer.innerHTML += `
|
|
368
|
+
<div class="metric" style="align-items: center; border-bottom: 1px solid var(--glass-border); padding-bottom: 12px; margin-bottom: 12px;">
|
|
369
|
+
<span style="font-weight: 600; color: #a855f7;">${primary}</span>
|
|
370
|
+
<span style="color: var(--text-muted); margin: 0 10px;">➜ falls back to ➜</span>
|
|
371
|
+
<span style="font-weight: 600;">${secondaries}</span>
|
|
372
|
+
</div>`;
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
</script>
|
|
377
|
+
</body>
|
|
378
|
+
</html>
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llmcycle
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cycle through multiple LLM providers with smart fallback, load balancing, and unified API
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: fastapi>=0.136.1
|
|
7
|
+
Requires-Dist: httpx>=0.28.1
|
|
8
|
+
Requires-Dist: jinja2>=3.1.6
|
|
9
|
+
Requires-Dist: pydantic>=2.13.4
|
|
10
|
+
Requires-Dist: python-dotenv>=1.2.2
|
|
11
|
+
Requires-Dist: python-multipart>=0.0.29
|
|
12
|
+
Requires-Dist: uvicorn>=0.47.0
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# LLMCycle ♻️
|
|
16
|
+
|
|
17
|
+
An enterprise-grade, highly resilient LLM management and routing framework. Designed to be **better than LiteLLM** with advanced multi-key support, customized routing (sort order), robust mid-stream error failovers, and a premium **Web Dashboard**.
|
|
18
|
+
|
|
19
|
+
## 🚀 Key Features
|
|
20
|
+
|
|
21
|
+
* **🔑 Universal Provider Support**: Supports *any* provider on the market instantly. Just add `<PROVIDER_NAME>_API_KEYS` to your `.env`!
|
|
22
|
+
* **⚖️ Auto Load-Balancing**: Load multiple API keys for the same provider simply by comma-separating them in your `.env`. LLMCycle automatically round-robins across them and tracks rate limits locally.
|
|
23
|
+
* **🛣️ Custom Fallback Routing**: Configure custom routing. If a primary provider fails, it automatically falls back to your configured secondary.
|
|
24
|
+
* **🛡️ Streaming Time Resilience**: If an LLM disconnects *while streaming a response*, LLMCycle captures the generated text, silently switches to your fallback model, and resumes the stream seamlessly. The client never notices!
|
|
25
|
+
* **🖥️ Premium Web Dashboard**: Manage and view your keys, active providers, and fallback routes through a beautifully designed, secure UI.
|
|
26
|
+
|
|
27
|
+
## 📦 Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uv add llmcycle
|
|
31
|
+
uv add python-dotenv httpx fastapi uvicorn jinja2 python-multipart
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## ⚙️ Configuration (`.env`)
|
|
35
|
+
|
|
36
|
+
Drop your keys into a `.env` file. To use multiple keys for load balancing, just separate them with commas!
|
|
37
|
+
|
|
38
|
+
```env
|
|
39
|
+
DEEPSEEK_API_KEYS=sk-deepseek-1,sk-deepseek-2
|
|
40
|
+
OPENAI_API_KEYS=sk-openai-primary
|
|
41
|
+
TOGETHER_API_KEYS=sk-together-1
|
|
42
|
+
|
|
43
|
+
# You can even use completely custom providers!
|
|
44
|
+
# LLMCycle will default the base URL to https://api.mycustomai.com/v1
|
|
45
|
+
MYCUSTOMAI_API_KEYS=sk-custom
|
|
46
|
+
# Or explicitly define the base URL for custom providers
|
|
47
|
+
OLLAMA_API_KEYS=local
|
|
48
|
+
OLLAMA_BASE_URL=http://localhost:11434/v1
|
|
49
|
+
|
|
50
|
+
# UI Dashboard Auth
|
|
51
|
+
LLMCYCLE_USER_ADMIN=admin
|
|
52
|
+
LLMCYCLE_USER_ADMIN_PAASWORD=admin
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## 🖥️ Starting the Web Dashboard
|
|
56
|
+
|
|
57
|
+
We built a gorgeous, premium Glassmorphism dashboard to monitor your providers.
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Make sure your PYTHONPATH is set if running from source:
|
|
61
|
+
# Windows: $env:PYTHONPATH="src"
|
|
62
|
+
# Linux/Mac: export PYTHONPATH="src"
|
|
63
|
+
|
|
64
|
+
uv run llmcycle ui
|
|
65
|
+
```
|
|
66
|
+
*Navigate to `http://127.0.0.1:8000` and login with the credentials defined in your `.env`!*
|
|
67
|
+
|
|
68
|
+
## 💻 Usage: Everything in One!
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import asyncio
|
|
72
|
+
from llmcycle import LLMCycle
|
|
73
|
+
|
|
74
|
+
async def main():
|
|
75
|
+
# 1. Initialization (Auto-loads all providers & keys from .env)
|
|
76
|
+
client = LLMCycle(
|
|
77
|
+
env_path=".env",
|
|
78
|
+
custom_fallbacks={
|
|
79
|
+
"deepseek": ["openai", "together"] # Sort order / Fallback chain
|
|
80
|
+
}
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# 2. List all dynamically loaded providers
|
|
84
|
+
providers = client.get_available_providers()
|
|
85
|
+
print("Loaded Providers:", providers)
|
|
86
|
+
|
|
87
|
+
# 3. Query models supported by a provider
|
|
88
|
+
models = await client.get_provider_models("deepseek")
|
|
89
|
+
print("DeepSeek Models:", models)
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
asyncio.run(main())
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## 🔌 Massive Universal Provider Registry
|
|
96
|
+
|
|
97
|
+
LLMCycle is pre-configured with base URLs for the most popular platforms:
|
|
98
|
+
`OPENAI`, `DEEPSEEK`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `MISTRAL`, `PERPLEXITY`, `ANYSCALE`, `FIREWORKS`, `COHERE`, `DATABRICKS`, `HUGGINGFACE`.
|
|
99
|
+
|
|
100
|
+
**Wildcard Support:** If you type `RANDOM_API_KEYS`, LLMCycle will automatically assume `https://api.random.com/v1`. If that's wrong, just define `RANDOM_BASE_URL` in your `.env`!
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
llmcycle/__init__.py,sha256=JajfAd6eO65ZR5mcqSFaFH55qrlUtUTB25jIgVaMEXQ,921
|
|
2
|
+
llmcycle/cli.py,sha256=SFD1mMmVNlDPWjseSxpCIM3roVLccVxVRwKX2jZGTzw,613
|
|
3
|
+
llmcycle/client.py,sha256=WZepJ5-pBK26F7u1DPVQ-lcUMwop-dHEEldvnZCXhu4,3100
|
|
4
|
+
llmcycle/schema.py,sha256=en8KR8u4uhgd40OsnJ7gGtdGdbtUaVWGCndlgdQYS-8,1293
|
|
5
|
+
llmcycle/core/errors.py,sha256=yL8HWqKGTl8DIfr1wqOPBxXI4xXbbTz6KCy82QLQI5A,3208
|
|
6
|
+
llmcycle/core/keys.py,sha256=CUUqNXhP2VO7Ov3kIcAesBcMH7zdpmJ6Uh9zbeQ0Ews,6585
|
|
7
|
+
llmcycle/core/router.py,sha256=RNR1CE7lq4k9R_WS1szyf458MNTDh6-9O77pxpEt-DE,1291
|
|
8
|
+
llmcycle/core/stream.py,sha256=Cxr3G7-MEs_LoPgj2snDvairrgml7SOuFytEweEUFqQ,3397
|
|
9
|
+
llmcycle/providers/base.py,sha256=u7pegcCUA4HhLcvMVyW1JKqJvSdM_2OmLHzJ_6YKTHM,722
|
|
10
|
+
llmcycle/providers/openai_compatible.py,sha256=FR5AOAstzE9eDTKo3Gv2fRiyghVP_IxWYRUGlwsK93Y,2829
|
|
11
|
+
llmcycle/providers/registry.py,sha256=v1g0bpBqPI14tdrS0HM_F4B-16d0AfRVvOENiU-aWW4,6747
|
|
12
|
+
llmcycle/ui/__init__.py,sha256=tUoGY9CO2NW1BZ5oOJ1Es1QB_qEMRr2uTGDhreaB2Nw,18
|
|
13
|
+
llmcycle/ui/app.py,sha256=OAZK0Mo8Lj6loayG04G4nxNIXSLQvvBiNuCFxbkmV6Y,3040
|
|
14
|
+
llmcycle/ui/templates/dashboard.html,sha256=vt2E6qgVL7SR1NYIlcoNW45p6WhPUVXVXIrZa-_GFT4,12744
|
|
15
|
+
llmcycle-0.1.0.dist-info/METADATA,sha256=Jd_PQqNiLYZB1sZ9LrDku0M9zF0wIDQPqQsnHM3sihw,3874
|
|
16
|
+
llmcycle-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
17
|
+
llmcycle-0.1.0.dist-info/entry_points.txt,sha256=F8C9GmojlaRYpS_ru_t0MwL7Ju06fZLy1KqJlVaDuck,47
|
|
18
|
+
llmcycle-0.1.0.dist-info/RECORD,,
|