llmcycle 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmcycle/__init__.py ADDED
@@ -0,0 +1,25 @@
1
+ """
2
+ LLMCycle - Production-Grade Universal LLM Router
3
+ =================================================
4
+ Auto multi-key rotation, intelligent error handling, streaming resilience,
5
+ and support for 50+ providers out-of-the-box.
6
+ """
7
+ from .client import LLMCycle
8
+ from .schema import CompletionRequest, Message, CompletionResponse, StreamChunk
9
+ from .core.keys import KeyManager, KeyStatus
10
+ from .core.router import ModelRouter, RoutingStrategy
11
+ from .core.errors import (
12
+ LLMCycleError, RateLimitError, AuthenticationError,
13
+ ProviderError, AllProvidersFailedError, StreamInterruptedError,
14
+ )
15
+
16
+ __all__ = [
17
+ "LLMCycle",
18
+ "CompletionRequest", "Message", "CompletionResponse", "StreamChunk",
19
+ "KeyManager", "KeyStatus",
20
+ "ModelRouter", "RoutingStrategy",
21
+ "LLMCycleError", "RateLimitError", "AuthenticationError",
22
+ "ProviderError", "AllProvidersFailedError", "StreamInterruptedError",
23
+ ]
24
+
25
+ __version__ = "0.2.0"
llmcycle/cli.py ADDED
@@ -0,0 +1,17 @@
1
+ import argparse
2
+ import uvicorn
3
+
4
+ def main():
5
+ parser = argparse.ArgumentParser(description="LLMCycle CLI")
6
+ parser.add_argument("command", choices=["ui"], help="Command to run")
7
+ parser.add_argument("--host", default="127.0.0.1", help="Host for the UI")
8
+ parser.add_argument("--port", type=int, default=8000, help="Port for the UI")
9
+
10
+ args = parser.parse_args()
11
+
12
+ if args.command == "ui":
13
+ print(f"Starting LLMCycle Dashboard on http://{args.host}:{args.port}")
14
+ uvicorn.run("llmcycle.ui.app:app", host=args.host, port=args.port, reload=True)
15
+
16
+ if __name__ == "__main__":
17
+ main()
llmcycle/client.py ADDED
@@ -0,0 +1,73 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from llmcycle.core.keys import KeyManager
4
+ from llmcycle.core.router import ModelRouter, FallbackRouter
5
+ from llmcycle.core.stream import StreamResilienceManager
6
+ from llmcycle.providers.openai_compatible import OpenAICompatibleProvider
7
+
8
+ # Massive default registry
9
+ PROVIDER_BASE_URLS = {
10
+ "OPENAI": "https://api.openai.com/v1",
11
+ "DEEPSEEK": "https://api.deepseek.com/v1",
12
+ "ANTHROPIC": "https://api.anthropic.com/v1",
13
+ "TOGETHER": "https://api.together.xyz/v1",
14
+ "GROQ": "https://api.groq.com/openai/v1",
15
+ "MISTRAL": "https://api.mistral.ai/v1",
16
+ "PERPLEXITY": "https://api.perplexity.ai",
17
+ "ANYSCALE": "https://api.endpoints.anyscale.com/v1",
18
+ "FIREWORKS": "https://api.fireworks.ai/inference/v1",
19
+ "COHERE": "https://api.cohere.com/v1",
20
+ "DATABRICKS": "https://serving.api.databricks.com/serving-endpoints",
21
+ "HUGGINGFACE": "https://api-inference.huggingface.co/models",
22
+ }
23
+
24
+ class LLMCycle:
25
+ """Main entrypoint for LLMCycle with Universal Provider Support."""
26
+
27
+ def __init__(self, env_path: str = ".env", custom_fallbacks: dict = None):
28
+ load_dotenv(env_path)
29
+
30
+ self.key_manager = KeyManager()
31
+ self.providers = {}
32
+
33
+ # Auto-discover
34
+ self._auto_load_keys()
35
+
36
+ # Setup Routing Strategy
37
+ fallbacks = custom_fallbacks or {}
38
+ self.router = ModelRouter(FallbackRouter(fallbacks))
39
+ self.stream_manager = StreamResilienceManager(self.router, self.key_manager, self.providers)
40
+
41
+ def _auto_load_keys(self):
42
+ """Finds any env var ending with _API_KEYS and universally registers the provider."""
43
+ for key, val in os.environ.items():
44
+ if key.endswith("_API_KEYS"):
45
+ provider_name = key.replace("_API_KEYS", "").upper()
46
+ keys = [k.strip() for k in val.split(",") if k.strip()]
47
+
48
+ if not keys:
49
+ continue
50
+
51
+ # 1. Check if user explicitly defined a BASE URL for this provider
52
+ # 2. Check the massive default registry
53
+ # 3. Fallback: Assume a standard OpenAI compatible format
54
+ base_url = os.environ.get(f"{provider_name}_BASE_URL")
55
+ if not base_url:
56
+ base_url = PROVIDER_BASE_URLS.get(provider_name, f"https://api.{provider_name.lower()}.com/v1")
57
+
58
+ if base_url:
59
+ self.providers[provider_name.lower()] = OpenAICompatibleProvider(base_url)
60
+ for k in keys:
61
+ self.key_manager.add_key(provider_name.lower(), k)
62
+
63
+ def get_available_providers(self) -> list[str]:
64
+ return list(self.providers.keys())
65
+
66
+ async def get_provider_models(self, provider_name: str) -> list[str]:
67
+ p_name = provider_name.lower()
68
+ if p_name not in self.providers:
69
+ return []
70
+ key = self.key_manager.get_next_key(p_name)
71
+ if not key:
72
+ return []
73
+ return await self.providers[p_name].get_models(key)
@@ -0,0 +1,71 @@
1
+ """
2
+ Custom exceptions for LLMCycle.
3
+ All errors map from HTTP status codes so the router knows exactly
4
+ what to do: retry, rotate key, skip provider, or give up.
5
+ """
6
+
7
+ class LLMCycleError(Exception):
8
+ """Base error for all LLMCycle exceptions."""
9
+ def __init__(self, message: str, provider: str = "", model: str = "", status_code: int = 0):
10
+ super().__init__(message)
11
+ self.provider = provider
12
+ self.model = model
13
+ self.status_code = status_code
14
+
15
+ class RateLimitError(LLMCycleError):
16
+ """429: Rate limit exceeded. Rotate key and retry."""
17
+ pass
18
+
19
+ class AuthenticationError(LLMCycleError):
20
+ """401: Invalid API key. Disable key permanently."""
21
+ pass
22
+
23
+ class ProviderError(LLMCycleError):
24
+ """400/500+: Provider-side error. Try next provider."""
25
+ pass
26
+
27
+ class QuotaExceededError(LLMCycleError):
28
+ """402/429 with quota message: Key quota exhausted. Rotate key."""
29
+ pass
30
+
31
+ class ContentPolicyError(LLMCycleError):
32
+ """400: Content policy violation. Do NOT retry - fail fast."""
33
+ pass
34
+
35
+ class StreamInterruptedError(LLMCycleError):
36
+ """Mid-stream disconnection. Contains partial text generated so far."""
37
+ def __init__(self, message: str, partial_text: str = "", **kwargs):
38
+ super().__init__(message, **kwargs)
39
+ self.partial_text = partial_text
40
+
41
+ class AllProvidersFailedError(LLMCycleError):
42
+ """Raised when every provider in the fallback chain fails."""
43
+ def __init__(self, errors: list):
44
+ msg = f"All {len(errors)} providers failed. Last error: {errors[-1]}"
45
+ super().__init__(msg)
46
+ self.errors = errors
47
+
48
+
49
+ # Maps HTTP status codes to exception types
50
+ def classify_http_error(status_code: int, response_text: str, provider: str, model: str) -> LLMCycleError:
51
+ """Factory: Convert an HTTP error into the correct LLMCycle exception."""
52
+ text_lower = response_text.lower()
53
+
54
+ if status_code == 401:
55
+ return AuthenticationError(f"[{provider}] Auth failed (401): {response_text}", provider=provider, model=model, status_code=status_code)
56
+
57
+ if status_code == 429:
58
+ if any(w in text_lower for w in ("quota", "limit exceeded", "exhausted", "billing")):
59
+ return QuotaExceededError(f"[{provider}] Quota exceeded (429): {response_text}", provider=provider, model=model, status_code=status_code)
60
+ return RateLimitError(f"[{provider}] Rate limited (429): {response_text}", provider=provider, model=model, status_code=status_code)
61
+
62
+ if status_code == 402:
63
+ return QuotaExceededError(f"[{provider}] Payment required (402): {response_text}", provider=provider, model=model, status_code=status_code)
64
+
65
+ if status_code == 400:
66
+ if any(w in text_lower for w in ("content_policy", "safety", "harmful", "violat")):
67
+ return ContentPolicyError(f"[{provider}] Content policy (400): {response_text}", provider=provider, model=model, status_code=status_code)
68
+ return ProviderError(f"[{provider}] Bad request (400): {response_text}", provider=provider, model=model, status_code=status_code)
69
+
70
+ # All other 4xx/5xx
71
+ return ProviderError(f"[{provider}] HTTP {status_code}: {response_text[:200]}", provider=provider, model=model, status_code=status_code)
llmcycle/core/keys.py ADDED
@@ -0,0 +1,184 @@
1
+ """
2
+ KeyManager - Production Multi-Key Rotation
3
+ ==========================================
4
+ Supports:
5
+ - Round-robin across multiple keys
6
+ - Rate-limit cooldown per key (auto re-enable after cooldown)
7
+ - Permanent disable on auth failure (401)
8
+ - Per-key error tracking with thresholds
9
+ """
10
+ from __future__ import annotations
11
+ import time
12
+ import threading
13
+ import hashlib
14
+ import logging
15
+ from enum import Enum
16
+ from dataclasses import dataclass, field
17
+ from typing import Optional, Dict, List
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ RATE_LIMIT_COOLDOWN_SECS = 60
22
+ QUOTA_COOLDOWN_SECS = 3600
23
+
24
+ class KeyStatus(Enum):
25
+ ACTIVE = "active"
26
+ RATE_LIMITED = "rate_limited" # 429 – temporary, retry after cooldown
27
+ QUOTA_EXHAUSTED = "quota_exhausted" # 402/429 quota – long cooldown
28
+ INVALID = "invalid" # 401 – permanent disable
29
+ DISABLED = "disabled" # manual disable
30
+
31
+ @dataclass
32
+ class KeyRecord:
33
+ key: str
34
+ provider: str
35
+ status: KeyStatus = KeyStatus.ACTIVE
36
+ total_requests: int = 0
37
+ total_errors: int = 0
38
+ consecutive_errors: int = 0
39
+ rate_limit_until: float = 0.0
40
+ added_at: float = field(default_factory=time.time)
41
+ last_used: float = 0.0
42
+
43
+ @property
44
+ def key_hint(self) -> str:
45
+ """Safe display: show first 6 and last 4 chars."""
46
+ if len(self.key) <= 10:
47
+ return "***"
48
+ return f"{self.key[:6]}...{self.key[-4:]}"
49
+
50
+ @property
51
+ def is_usable(self) -> bool:
52
+ if self.status == KeyStatus.ACTIVE:
53
+ return True
54
+ if self.status in (KeyStatus.RATE_LIMITED, KeyStatus.QUOTA_EXHAUSTED):
55
+ return time.time() >= self.rate_limit_until
56
+ return False
57
+
58
+
59
+ class KeyManager:
60
+ """Thread-safe multi-key manager with auto rotation and error classification."""
61
+
62
+ def __init__(self):
63
+ self._lock = threading.Lock()
64
+ # provider → list of KeyRecord (in insertion order for round-robin)
65
+ self._keys: Dict[str, List[KeyRecord]] = {}
66
+ # Round-robin pointer per provider
67
+ self._indexes: Dict[str, int] = {}
68
+
69
+ def add_key(self, provider: str, key: str) -> None:
70
+ """Register one API key for a provider."""
71
+ p = provider.lower()
72
+ with self._lock:
73
+ if p not in self._keys:
74
+ self._keys[p] = []
75
+ self._indexes[p] = 0
76
+ # Don't add duplicates
77
+ existing = {r.key for r in self._keys[p]}
78
+ if key not in existing:
79
+ self._keys[p].append(KeyRecord(key=key, provider=p))
80
+ logger.debug(f"[{p}] Registered key {KeyRecord(key=key, provider=p).key_hint}")
81
+
82
+ def add_keys(self, provider: str, keys: List[str]) -> None:
83
+ for k in keys:
84
+ self.add_key(provider, k)
85
+
86
+ def get_next_key(self, provider: str) -> Optional[str]:
87
+ """Round-robin rotation. Auto-recovers rate-limited keys after cooldown."""
88
+ p = provider.lower()
89
+ with self._lock:
90
+ records = self._keys.get(p, [])
91
+ if not records:
92
+ return None
93
+
94
+ n = len(records)
95
+ start = self._indexes[p]
96
+
97
+ for i in range(n):
98
+ idx = (start + i) % n
99
+ rec = records[idx]
100
+
101
+ # Auto-recover temporarily banned keys
102
+ if rec.status in (KeyStatus.RATE_LIMITED, KeyStatus.QUOTA_EXHAUSTED):
103
+ if time.time() >= rec.rate_limit_until:
104
+ rec.status = KeyStatus.ACTIVE
105
+ rec.consecutive_errors = 0
106
+ logger.info(f"[{p}] Key {rec.key_hint} auto-recovered from {rec.status.value}")
107
+
108
+ if rec.is_usable:
109
+ self._indexes[p] = (idx + 1) % n
110
+ rec.last_used = time.time()
111
+ rec.total_requests += 1
112
+ return rec.key
113
+
114
+ logger.warning(f"[{p}] No usable keys available!")
115
+ return None
116
+
117
+ def report_success(self, provider: str, key: str) -> None:
118
+ rec = self._find(provider, key)
119
+ if rec:
120
+ rec.consecutive_errors = 0
121
+
122
+ def report_error(self, provider: str, key: str, error_type: str) -> None:
123
+ """
124
+ error_type: "rate_limit" | "quota" | "auth" | "server" | "connection"
125
+ """
126
+ rec = self._find(provider, key)
127
+ if not rec:
128
+ return
129
+
130
+ rec.total_errors += 1
131
+ rec.consecutive_errors += 1
132
+
133
+ if error_type == "auth":
134
+ rec.status = KeyStatus.INVALID
135
+ logger.warning(f"[{provider}] Key {rec.key_hint} permanently disabled (401 Auth)")
136
+
137
+ elif error_type == "quota":
138
+ rec.status = KeyStatus.QUOTA_EXHAUSTED
139
+ rec.rate_limit_until = time.time() + QUOTA_COOLDOWN_SECS
140
+ logger.warning(f"[{provider}] Key {rec.key_hint} quota exhausted. Retry after {QUOTA_COOLDOWN_SECS}s")
141
+
142
+ elif error_type == "rate_limit":
143
+ rec.status = KeyStatus.RATE_LIMITED
144
+ rec.rate_limit_until = time.time() + RATE_LIMIT_COOLDOWN_SECS
145
+ logger.warning(f"[{provider}] Key {rec.key_hint} rate limited. Retry after {RATE_LIMIT_COOLDOWN_SECS}s")
146
+
147
+ elif error_type in ("server", "connection"):
148
+ # Don't ban the key, just note the error
149
+ logger.debug(f"[{provider}] Key {rec.key_hint} got {error_type} error (key kept active)")
150
+
151
+ def get_stats(self, provider: str) -> List[dict]:
152
+ p = provider.lower()
153
+ with self._lock:
154
+ return [
155
+ {
156
+ "hint": r.key_hint,
157
+ "status": r.status.value,
158
+ "total_requests": r.total_requests,
159
+ "total_errors": r.total_errors,
160
+ "last_used": r.last_used,
161
+ }
162
+ for r in self._keys.get(p, [])
163
+ ]
164
+
165
+ def list_providers(self) -> List[str]:
166
+ return list(self._keys.keys())
167
+
168
+ def key_count(self, provider: str) -> dict:
169
+ p = provider.lower()
170
+ records = self._keys.get(p, [])
171
+ return {
172
+ "total": len(records),
173
+ "active": sum(1 for r in records if r.is_usable),
174
+ "invalid": sum(1 for r in records if r.status == KeyStatus.INVALID),
175
+ "rate_limited": sum(1 for r in records if r.status == KeyStatus.RATE_LIMITED),
176
+ }
177
+
178
+ def _find(self, provider: str, key: str) -> Optional[KeyRecord]:
179
+ p = provider.lower()
180
+ with self._lock:
181
+ for rec in self._keys.get(p, []):
182
+ if rec.key == key:
183
+ return rec
184
+ return None
@@ -0,0 +1,37 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import List, Dict, Optional
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class RouterStrategy(ABC):
8
+ """Abstract strategy for sorting and selecting models."""
9
+
10
+ @abstractmethod
11
+ def sort_models(self, requested_model: str) -> List[str]:
12
+ """Return a sorted list of fallback models."""
13
+ pass
14
+
15
+ class FallbackRouter(RouterStrategy):
16
+ """A simple router that uses a pre-defined fallback list."""
17
+
18
+ def __init__(self, fallbacks: Dict[str, List[str]]):
19
+ # e.g. {"gpt-4": ["gpt-4-turbo", "gpt-3.5-turbo"]}
20
+ self.fallbacks = fallbacks
21
+
22
+ def sort_models(self, requested_model: str) -> List[str]:
23
+ # Always try the requested model first, then the fallbacks
24
+ models = [requested_model]
25
+ if requested_model in self.fallbacks:
26
+ models.extend(self.fallbacks[requested_model])
27
+ return models
28
+
29
+ class ModelRouter:
30
+ """Main router class that manages strategies and routes requests."""
31
+
32
+ def __init__(self, strategy: RouterStrategy):
33
+ self.strategy = strategy
34
+
35
+ def get_route(self, requested_model: str) -> List[str]:
36
+ """Get ordered list of models to try."""
37
+ return self.strategy.sort_models(requested_model)
@@ -0,0 +1,79 @@
1
+ import logging
2
+ from typing import AsyncGenerator
3
+ from llmcycle.schema import CompletionRequest, Message
4
+ from llmcycle.providers.base import LLMProvider
5
+ from llmcycle.core.router import ModelRouter
6
+ from llmcycle.core.keys import KeyManager
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class StreamResilienceManager:
11
+ """
12
+ Handles streaming failover.
13
+ If a stream disconnects mid-response, this manager will capture
14
+ the text already generated, switch to a fallback model, append
15
+ the generated text to the assistant's context, and resume the stream.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ router: ModelRouter,
21
+ key_manager: KeyManager,
22
+ providers: dict[str, LLMProvider]
23
+ ):
24
+ self.router = router
25
+ self.key_manager = key_manager
26
+ self.providers = providers # map of model -> LLMProvider instance (simplified)
27
+
28
+ async def safe_stream(self, request: CompletionRequest) -> AsyncGenerator[str, None]:
29
+ models_to_try = self.router.get_route(request.model)
30
+ generated_text_so_far = ""
31
+
32
+ for model in models_to_try:
33
+ if model not in self.providers:
34
+ logger.warning(f"No provider found for model {model}")
35
+ continue
36
+
37
+ provider = self.providers[model]
38
+ api_key = self.key_manager.get_next_key(model) # Assumes provider uses model name for key lookups for simplicity
39
+
40
+ if not api_key:
41
+ logger.warning(f"No active API keys available for model {model}")
42
+ continue
43
+
44
+ try:
45
+ # If we're failing over mid-stream, we must update the prompt
46
+ # to include the generated_text_so_far
47
+ current_request = request.model_copy(deep=True)
48
+ current_request.model = model
49
+
50
+ if generated_text_so_far:
51
+ current_request.messages.append(
52
+ Message(role="assistant", content=generated_text_so_far)
53
+ )
54
+ # Ideally, you'd instruct the fallback model to continue from here
55
+ current_request.messages.append(
56
+ Message(role="user", content="Continue exactly from the last assistant message. Do not repeat anything. Just continue.")
57
+ )
58
+
59
+ logger.info(f"Attempting stream with model {model}")
60
+ stream_gen = provider.generate_stream(current_request, api_key)
61
+
62
+ async for chunk in stream_gen:
63
+ generated_text_so_far += chunk
64
+ yield chunk
65
+
66
+ # If we finish the stream without exceptions, we are done!
67
+ return
68
+
69
+ except Exception as e:
70
+ logger.error(f"Stream interrupted on model {model}: {e}")
71
+ self.key_manager.report_error(api_key, "connection_error")
72
+ logger.info("Failing over to next model in sequence...")
73
+ # The loop will continue and try the next model
74
+
75
+ # If we exit the loop, all models failed
76
+ if not generated_text_so_far:
77
+ raise RuntimeError("All models failed and no text was generated.")
78
+ else:
79
+ logger.error("All models failed, but some text was generated.")
@@ -0,0 +1,21 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import AsyncGenerator
3
+ from llmcycle.schema import CompletionRequest
4
+
5
+ class LLMProvider(ABC):
6
+ """Base class for all specific LLM implementations."""
7
+
8
+ @abstractmethod
9
+ async def generate(self, request: CompletionRequest, api_key: str) -> str:
10
+ """Generate a complete string response."""
11
+ pass
12
+
13
+ @abstractmethod
14
+ async def generate_stream(self, request: CompletionRequest, api_key: str) -> AsyncGenerator[str, None]:
15
+ """Generate a streaming response."""
16
+ pass
17
+
18
+ @abstractmethod
19
+ async def get_models(self, api_key: str) -> list[str]:
20
+ """Return a list of models supported by this provider."""
21
+ pass
@@ -0,0 +1,56 @@
1
+ import httpx
2
+ from typing import AsyncGenerator
3
+ from llmcycle.schema import CompletionRequest
4
+ from llmcycle.providers.base import LLMProvider
5
+
6
+ class OpenAICompatibleProvider(LLMProvider):
7
+ """A generic provider for OpenAI-compatible APIs (OpenAI, DeepSeek, Together, etc)."""
8
+
9
+ def __init__(self, base_url: str):
10
+ # Ensure base_url ends with /v1 or whatever is passed
11
+ self.base_url = base_url.rstrip('/')
12
+
13
+ async def get_models(self, api_key: str) -> list[str]:
14
+ headers = {"Authorization": f"Bearer {api_key}"}
15
+ async with httpx.AsyncClient() as client:
16
+ try:
17
+ response = await client.get(f"{self.base_url}/models", headers=headers, timeout=5.0)
18
+ response.raise_for_status()
19
+ data = response.json()
20
+ if "data" in data:
21
+ return [model["id"] for model in data["data"]]
22
+ return []
23
+ except Exception as e:
24
+ print(f"Failed to fetch models from {self.base_url}: {e}")
25
+ return []
26
+
27
+ async def generate(self, request: CompletionRequest, api_key: str) -> str:
28
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
29
+ async with httpx.AsyncClient() as client:
30
+ payload = request.model_dump(exclude_none=True)
31
+ payload["stream"] = False
32
+ response = await client.post(f"{self.base_url}/chat/completions", headers=headers, json=payload, timeout=60.0)
33
+ response.raise_for_status()
34
+ return response.json()["choices"][0]["message"]["content"]
35
+
36
+ async def generate_stream(self, request: CompletionRequest, api_key: str) -> AsyncGenerator[str, None]:
37
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
38
+ async with httpx.AsyncClient() as client:
39
+ payload = request.model_dump(exclude_none=True)
40
+ payload["stream"] = True
41
+
42
+ async with client.stream("POST", f"{self.base_url}/chat/completions", headers=headers, json=payload) as response:
43
+ response.raise_for_status()
44
+ async for chunk in response.aiter_text():
45
+ # Simplified parsing of Server-Sent Events (SSE)
46
+ if chunk.startswith("data: "):
47
+ import json
48
+ data_str = chunk[6:].strip()
49
+ if data_str == "[DONE]":
50
+ break
51
+ try:
52
+ data = json.loads(data_str)
53
+ if data["choices"][0]["delta"].get("content"):
54
+ yield data["choices"][0]["delta"]["content"]
55
+ except json.JSONDecodeError:
56
+ pass
@@ -0,0 +1,105 @@
1
+ """
2
+ Universal Provider Registry
3
+ ============================
4
+ 50+ providers mapped by their env var prefix → base URL.
5
+ All are OpenAI-compatible REST API endpoints.
6
+ Custom provider: set MYPROVIDER_API_KEYS + MYPROVIDER_BASE_URL.
7
+ """
8
+
9
+ PROVIDER_REGISTRY: dict[str, str] = {
10
+ # ── Frontier / Big Cloud ──────────────────────────────────────────────
11
+ "OPENAI": "https://api.openai.com/v1",
12
+ "AZURE": "https://{resource}.openai.azure.com/openai", # needs override
13
+ "ANTHROPIC": "https://api.anthropic.com/v1",
14
+ "GOOGLE": "https://generativelanguage.googleapis.com/v1beta",
15
+ "VERTEXAI": "https://us-central1-aiplatform.googleapis.com/v1",
16
+ "AWS_BEDROCK": "https://bedrock-runtime.us-east-1.amazonaws.com",
17
+
18
+ # ── Chinese / Asia ────────────────────────────────────────────────────
19
+ "DEEPSEEK": "https://api.deepseek.com/v1",
20
+ "QWEN": "https://dashscope.aliyuncs.com/compatible-mode/v1",
21
+ "DASHSCOPE": "https://dashscope.aliyuncs.com/compatible-mode/v1",
22
+ "MOONSHOT": "https://api.moonshot.cn/v1",
23
+ "MINIMAX": "https://api.minimax.chat/v1",
24
+ "ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
25
+ "BAIDU": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop",
26
+ "VOLCANO": "https://ark.cn-beijing.volces.com/api/v3",
27
+ "XIAOMI": "https://api.mimo.xiaomi.com/v1",
28
+ "ZAI": "https://open.bigmodel.cn/api/paas/v4",
29
+
30
+ # ── Fast Inference / Aggregators ─────────────────────────────────────
31
+ "GROQ": "https://api.groq.com/openai/v1",
32
+ "TOGETHER": "https://api.together.xyz/v1",
33
+ "FIREWORKS": "https://api.fireworks.ai/inference/v1",
34
+ "ANYSCALE": "https://api.endpoints.anyscale.com/v1",
35
+ "PERPLEXITY": "https://api.perplexity.ai",
36
+ "OPENROUTER": "https://openrouter.ai/api/v1",
37
+ "REPLICATE": "https://api.replicate.com/v1",
38
+ "DEEPINFRA": "https://api.deepinfra.com/v1/openai",
39
+ "FEATHERLESS": "https://api.featherless.ai/v1",
40
+ "NOVITA": "https://api.novita.ai/v3/openai",
41
+ "CHUTES": "https://llm.chutes.ai/v1",
42
+ "NSCALE": "https://inference.api.nscale.com/v1",
43
+ "NEBIUS": "https://api.studio.nebius.ai/v1",
44
+ "HYPERBOLIC": "https://api.hyperbolic.xyz/v1",
45
+ "LAMBDA": "https://api.lambdalabs.com/v1",
46
+ "SAMBANOVA": "https://api.sambanova.ai/v1",
47
+ "CEREBRAS": "https://api.cerebras.ai/v1",
48
+ "FRIENDLIAI": "https://inference.friendli.ai/v1",
49
+ "GALADRIEL": "https://api.galadriel.com/v1",
50
+ "GMI": "https://api.gmi.cloud/v1",
51
+ "PUBLICAI": "https://api.public.ai/v1",
52
+ "COMETAPI": "https://api.comet.ai/v1",
53
+ "AIML": "https://api.aimlapi.com/v1",
54
+
55
+ # ── Specialized Providers ─────────────────────────────────────────────
56
+ "MISTRAL": "https://api.mistral.ai/v1",
57
+ "CODESTRAL": "https://codestral.mistral.ai/v1",
58
+ "COHERE": "https://api.cohere.com/v1",
59
+ "AI21": "https://api.ai21.com/studio/v1",
60
+ "NLP_CLOUD": "https://api.nlpcloud.io/v1",
61
+ "ALEPH_ALPHA": "https://api.aleph-alpha.com",
62
+ "PREDIBASE": "https://serving.app.predibase.com",
63
+ "CLARIFAI": "https://api.clarifai.com/v2",
64
+ "HUGGINGFACE": "https://api-inference.huggingface.co/models",
65
+ "BASETEN": "https://model-{model_id}.api.baseten.co/environments/production/predict",
66
+ "GRADIENT": "https://api.gradient.ai/api",
67
+
68
+ # ── Local / Self-Hosted ───────────────────────────────────────────────
69
+ "OLLAMA": "http://localhost:11434/v1",
70
+ "LM_STUDIO": "http://localhost:1234/v1",
71
+ "VLLM": "http://localhost:8000/v1",
72
+ "LLAMAFILE": "http://localhost:8080/v1",
73
+ "TRITON": "http://localhost:8001/v2",
74
+ "XINFERENCE": "http://localhost:9997/v1",
75
+ "DOCKER_MODEL": "http://localhost:12434/engines/llama.cpp/v1",
76
+
77
+ # ── Enterprise / Cloud ────────────────────────────────────────────────
78
+ "DATABRICKS": "https://{workspace}.azuredatabricks.net/serving-endpoints",
79
+ "SAGEMAKER": "https://runtime.sagemaker.us-east-1.amazonaws.com",
80
+ "SNOWFLAKE": "https://{account}.snowflakecomputing.com/api/v2",
81
+ "WATSONX": "https://us-south.ml.cloud.ibm.com/ml/v1",
82
+ "SAP": "https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com",
83
+ "OCI": "https://inference.generativeai.us-chicago-1.oci.customer-oci.com/20231130",
84
+ "CLOUDFLARE": "https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run",
85
+ "HEROKU": "https://llm.api.heroku.com/v1",
86
+ "OVH": "https://oai.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1",
87
+ "SCALEWAY": "https://api.scaleway.ai/v1",
88
+ "DATAROBOT": "https://app.datarobot.com/api/v2",
89
+
90
+ # ── Nvidia ────────────────────────────────────────────────────────────
91
+ "NVIDIA": "https://integrate.api.nvidia.com/v1",
92
+ "NVIDIA_NIM": "https://integrate.api.nvidia.com/v1",
93
+
94
+ # ── GitHub / Microsoft ────────────────────────────────────────────────
95
+ "GITHUB": "https://models.inference.ai.azure.com",
96
+ "VERCEL": "https://ai-gateway.vercel.sh",
97
+ "XAI": "https://api.x.ai/v1",
98
+
99
+ # ── Image Generation ─────────────────────────────────────────────────
100
+ "STABILITY": "https://api.stability.ai/v1",
101
+ "FAL": "https://fal.run",
102
+ "RECRAFT": "https://external.api.recraft.ai/v1",
103
+ "RUNWAYML": "https://api.dev.runwayml.com/v1",
104
+ "BLACK_FOREST": "https://api.us1.bfl.ai/v1",
105
+ }
llmcycle/schema.py ADDED
@@ -0,0 +1,47 @@
1
+ """
2
+ Production schema models with Pydantic v2.
3
+ """
4
+ from __future__ import annotations
5
+ from typing import Optional, List, Dict, Any, Union
6
+ from pydantic import BaseModel, Field
7
+ import time
8
+
9
+ class Message(BaseModel):
10
+ role: str
11
+ content: str
12
+
13
+ class CompletionRequest(BaseModel):
14
+ model: str
15
+ messages: List[Message]
16
+ stream: bool = False
17
+ temperature: Optional[float] = None
18
+ max_tokens: Optional[int] = None
19
+ top_p: Optional[float] = None
20
+ frequency_penalty: Optional[float] = None
21
+ presence_penalty: Optional[float] = None
22
+ stop: Optional[Union[str, List[str]]] = None
23
+ extra: Optional[Dict[str, Any]] = None
24
+
25
+ def to_api_dict(self) -> dict:
26
+ """Serialize for sending to OpenAI-compatible API."""
27
+ d = self.model_dump(exclude_none=True, exclude={"extra"})
28
+ d["messages"] = [m.model_dump() for m in self.messages]
29
+ if self.extra:
30
+ d.update(self.extra)
31
+ return d
32
+
33
+ class CompletionResponse(BaseModel):
34
+ id: str
35
+ model: str
36
+ provider: str
37
+ content: str
38
+ prompt_tokens: int = 0
39
+ completion_tokens: int = 0
40
+ latency_ms: float = 0.0
41
+ created_at: float = Field(default_factory=time.time)
42
+
43
+ class StreamChunk(BaseModel):
44
+ content: str
45
+ model: str
46
+ provider: str
47
+ done: bool = False
@@ -0,0 +1 @@
1
+ """UI Package."""
llmcycle/ui/app.py ADDED
@@ -0,0 +1,88 @@
1
+ import os
2
+ import secrets
3
+ from fastapi import FastAPI, Depends, HTTPException, status
4
+ from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
5
+ from fastapi.responses import FileResponse
6
+ from pydantic import BaseModel
7
+ from typing import List, Dict, Any
8
+ from llmcycle import LLMCycle
9
+ from pathlib import Path
10
+
11
+ app = FastAPI(title="LLMCycle API Dashboard")
12
+
13
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="api/token")
14
+
15
+ # Static token for simplicity in this MVP. For prod, use JWTs.
16
+ SESSION_TOKEN = secrets.token_urlsafe(32)
17
+
18
+ # Path to templates/static
19
+ BASE_DIR = Path(__file__).resolve().parent
20
+ templates_dir = BASE_DIR / "templates"
21
+
22
+ # Global Client Instance
23
+ llm_client = LLMCycle()
24
+
25
+ # Models
26
+ class ProviderInfo(BaseModel):
27
+ name: str
28
+ base_url: str
29
+ total_keys: int
30
+ active_keys: int
31
+
32
+ class DashboardData(BaseModel):
33
+ providers: List[ProviderInfo]
34
+ fallbacks: Dict[str, List[str]]
35
+
36
+ @app.post("/api/token")
37
+ async def login(form_data: OAuth2PasswordRequestForm = Depends()):
38
+ correct_username = os.environ.get("LLMCYCLE_USER_ADMIN", "admin")
39
+ correct_password = os.environ.get("LLMCYCLE_USER_ADMIN_PAASWORD", "admin")
40
+
41
+ if not (secrets.compare_digest(form_data.username, correct_username) and
42
+ secrets.compare_digest(form_data.password, correct_password)):
43
+ raise HTTPException(
44
+ status_code=status.HTTP_401_UNAUTHORIZED,
45
+ detail="Incorrect username or password",
46
+ headers={"WWW-Authenticate": "Bearer"},
47
+ )
48
+
49
+ return {"access_token": SESSION_TOKEN, "token_type": "bearer"}
50
+
51
+ async def verify_token(token: str = Depends(oauth2_scheme)):
52
+ if not secrets.compare_digest(token, SESSION_TOKEN):
53
+ raise HTTPException(
54
+ status_code=status.HTTP_401_UNAUTHORIZED,
55
+ detail="Invalid authentication credentials",
56
+ headers={"WWW-Authenticate": "Bearer"},
57
+ )
58
+ return token
59
+
60
+ @app.get("/api/dashboard", response_model=DashboardData)
61
+ async def get_dashboard_data(token: str = Depends(verify_token)):
62
+ """Protected API endpoint returning all dashboard data as JSON."""
63
+ providers = llm_client.get_available_providers()
64
+
65
+ provider_details = []
66
+ for p in providers:
67
+ keys_list = llm_client.key_manager._keys.get(p, [])
68
+ active_keys = 0
69
+ for k in keys_list:
70
+ stats = llm_client.key_manager._stats.get(k)
71
+ if stats and stats.is_active:
72
+ active_keys += 1
73
+
74
+ provider_details.append(ProviderInfo(
75
+ name=p.upper(),
76
+ base_url=llm_client.providers[p].base_url,
77
+ total_keys=len(keys_list),
78
+ active_keys=active_keys
79
+ ))
80
+
81
+ fallbacks = llm_client.router.strategy.fallbacks if hasattr(llm_client.router.strategy, 'fallbacks') else {}
82
+
83
+ return DashboardData(providers=provider_details, fallbacks=fallbacks)
84
+
85
+ @app.get("/")
86
+ async def serve_ui():
87
+ """Serve the static HTML frontend."""
88
+ return FileResponse(templates_dir / "dashboard.html")
@@ -0,0 +1,378 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>LLMCycle API Dashboard</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;800&display=swap" rel="stylesheet">
8
+ <style>
9
+ :root {
10
+ --bg: #0a0a0f;
11
+ --glass-bg: rgba(255, 255, 255, 0.03);
12
+ --glass-border: rgba(255, 255, 255, 0.08);
13
+ --primary: #6366f1;
14
+ --primary-glow: rgba(99, 102, 241, 0.4);
15
+ --text-main: #f8fafc;
16
+ --text-muted: #94a3b8;
17
+ --success: #10b981;
18
+ --error: #ef4444;
19
+ }
20
+
21
+ body {
22
+ margin: 0;
23
+ padding: 0;
24
+ background-color: var(--bg);
25
+ color: var(--text-main);
26
+ font-family: 'Inter', sans-serif;
27
+ min-height: 100vh;
28
+ background-image:
29
+ radial-gradient(circle at 15% 50%, rgba(99, 102, 241, 0.15), transparent 25%),
30
+ radial-gradient(circle at 85% 30%, rgba(168, 85, 247, 0.15), transparent 25%);
31
+ background-attachment: fixed;
32
+ }
33
+
34
+ .container {
35
+ max-width: 1200px;
36
+ margin: 0 auto;
37
+ padding: 40px 20px;
38
+ }
39
+
40
+ header {
41
+ display: flex;
42
+ justify-content: space-between;
43
+ align-items: center;
44
+ margin-bottom: 50px;
45
+ }
46
+
47
+ h1 {
48
+ font-weight: 800;
49
+ font-size: 2.5rem;
50
+ margin: 0;
51
+ background: linear-gradient(135deg, #a855f7, #6366f1);
52
+ -webkit-background-clip: text;
53
+ -webkit-text-fill-color: transparent;
54
+ text-shadow: 0 0 30px var(--primary-glow);
55
+ }
56
+
57
+ .user-badge {
58
+ background: var(--glass-bg);
59
+ border: 1px solid var(--glass-border);
60
+ padding: 8px 16px;
61
+ border-radius: 20px;
62
+ backdrop-filter: blur(10px);
63
+ font-size: 0.9rem;
64
+ color: var(--text-muted);
65
+ cursor: pointer;
66
+ }
67
+
68
+ .user-badge:hover {
69
+ border-color: rgba(239, 68, 68, 0.5);
70
+ color: var(--error);
71
+ }
72
+
73
+ .grid {
74
+ display: grid;
75
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
76
+ gap: 24px;
77
+ }
78
+
79
+ .card {
80
+ background: var(--glass-bg);
81
+ border: 1px solid var(--glass-border);
82
+ border-radius: 16px;
83
+ padding: 24px;
84
+ backdrop-filter: blur(12px);
85
+ transition: transform 0.3s ease, box-shadow 0.3s ease;
86
+ position: relative;
87
+ overflow: hidden;
88
+ }
89
+
90
+ .card::before {
91
+ content: "";
92
+ position: absolute;
93
+ top: 0; left: 0; right: 0;
94
+ height: 1px;
95
+ background: linear-gradient(90deg, transparent, rgba(255,255,255,0.2), transparent);
96
+ }
97
+
98
+ .card:hover {
99
+ transform: translateY(-5px);
100
+ box-shadow: 0 10px 30px -10px var(--primary-glow);
101
+ border-color: rgba(99, 102, 241, 0.3);
102
+ }
103
+
104
+ .card-header {
105
+ display: flex;
106
+ justify-content: space-between;
107
+ align-items: center;
108
+ margin-bottom: 16px;
109
+ }
110
+
111
+ .card-title {
112
+ font-size: 1.25rem;
113
+ font-weight: 600;
114
+ margin: 0;
115
+ }
116
+
117
+ .status-dot {
118
+ width: 10px;
119
+ height: 10px;
120
+ background-color: var(--success);
121
+ border-radius: 50%;
122
+ box-shadow: 0 0 10px var(--success);
123
+ animation: pulse 2s infinite;
124
+ }
125
+
126
+ @keyframes pulse {
127
+ 0% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.4); }
128
+ 70% { box-shadow: 0 0 0 10px rgba(16, 185, 129, 0); }
129
+ 100% { box-shadow: 0 0 0 0 rgba(16, 185, 129, 0); }
130
+ }
131
+
132
+ .metric {
133
+ display: flex;
134
+ justify-content: space-between;
135
+ margin-bottom: 8px;
136
+ color: var(--text-muted);
137
+ font-size: 0.95rem;
138
+ }
139
+
140
+ .metric span.value {
141
+ color: var(--text-main);
142
+ font-weight: 600;
143
+ }
144
+
145
+ .url {
146
+ font-size: 0.8rem;
147
+ color: var(--primary);
148
+ word-break: break-all;
149
+ background: rgba(99, 102, 241, 0.1);
150
+ padding: 4px 8px;
151
+ border-radius: 6px;
152
+ margin-top: 12px;
153
+ display: inline-block;
154
+ }
155
+
156
+ .section-title {
157
+ margin: 40px 0 20px 0;
158
+ font-size: 1.5rem;
159
+ font-weight: 600;
160
+ color: var(--text-main);
161
+ }
162
+
163
+ /* Login Form Styles */
164
+ #login-view {
165
+ max-width: 400px;
166
+ margin: 100px auto;
167
+ text-align: center;
168
+ }
169
+
170
+ input {
171
+ width: 100%;
172
+ padding: 12px;
173
+ margin-bottom: 16px;
174
+ border-radius: 8px;
175
+ border: 1px solid var(--glass-border);
176
+ background: rgba(0,0,0,0.2);
177
+ color: white;
178
+ box-sizing: border-box;
179
+ font-family: 'Inter', sans-serif;
180
+ }
181
+
182
+ input:focus {
183
+ outline: none;
184
+ border-color: var(--primary);
185
+ }
186
+
187
+ button {
188
+ width: 100%;
189
+ padding: 12px;
190
+ border-radius: 8px;
191
+ border: none;
192
+ background: linear-gradient(135deg, #6366f1, #a855f7);
193
+ color: white;
194
+ font-weight: 600;
195
+ cursor: pointer;
196
+ font-family: 'Inter', sans-serif;
197
+ transition: opacity 0.2s;
198
+ }
199
+
200
+ button:hover {
201
+ opacity: 0.9;
202
+ }
203
+
204
+ #error-msg {
205
+ color: var(--error);
206
+ margin-bottom: 16px;
207
+ font-size: 0.9rem;
208
+ display: none;
209
+ }
210
+
211
+ </style>
212
+ </head>
213
+ <body>
214
+ <div class="container">
215
+
216
+ <!-- LOGIN VIEW -->
217
+ <div id="login-view">
218
+ <h1 style="margin-bottom: 30px;">LLMCycle Login</h1>
219
+ <div id="error-msg">Invalid username or password.</div>
220
+ <form id="login-form">
221
+ <input type="text" id="username" placeholder="Username" required>
222
+ <input type="password" id="password" placeholder="Password" required>
223
+ <button type="submit">Authenticate</button>
224
+ </form>
225
+ </div>
226
+
227
+ <!-- DASHBOARD VIEW -->
228
+ <div id="dashboard-view" style="display: none;">
229
+ <header>
230
+ <h1>LLMCycle ♻️</h1>
231
+ <div class="user-badge" id="logout-btn">Log Out</div>
232
+ </header>
233
+
234
+ <h2 class="section-title">Active Providers</h2>
235
+ <div class="grid" id="providers-container">
236
+ <!-- Dynamically populated -->
237
+ </div>
238
+
239
+ <h2 class="section-title">Fallback Routing Logic</h2>
240
+ <div class="card" style="max-width: 600px;" id="fallbacks-container">
241
+ <!-- Dynamically populated -->
242
+ </div>
243
+ </div>
244
+
245
+ </div>
246
+
247
+ <script>
248
+ const loginView = document.getElementById('login-view');
249
+ const dashboardView = document.getElementById('dashboard-view');
250
+ const loginForm = document.getElementById('login-form');
251
+ const errorMsg = document.getElementById('error-msg');
252
+ const logoutBtn = document.getElementById('logout-btn');
253
+
254
+ // Check if token exists
255
+ let token = localStorage.getItem('llmcycle_token');
256
+
257
+ if (token) {
258
+ loadDashboard();
259
+ } else {
260
+ loginView.style.display = 'block';
261
+ }
262
+
263
+ // Handle Login
264
+ loginForm.addEventListener('submit', async (e) => {
265
+ e.preventDefault();
266
+ const formData = new URLSearchParams();
267
+ formData.append('username', document.getElementById('username').value);
268
+ formData.append('password', document.getElementById('password').value);
269
+
270
+ try {
271
+ const response = await fetch('/api/token', {
272
+ method: 'POST',
273
+ headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
274
+ body: formData
275
+ });
276
+
277
+ if (response.ok) {
278
+ const data = await response.json();
279
+ localStorage.setItem('llmcycle_token', data.access_token);
280
+ token = data.access_token;
281
+ errorMsg.style.display = 'none';
282
+ loadDashboard();
283
+ } else {
284
+ errorMsg.style.display = 'block';
285
+ }
286
+ } catch (err) {
287
+ console.error(err);
288
+ errorMsg.style.display = 'block';
289
+ }
290
+ });
291
+
292
+ // Handle Logout
293
+ logoutBtn.addEventListener('click', () => {
294
+ localStorage.removeItem('llmcycle_token');
295
+ token = null;
296
+ dashboardView.style.display = 'none';
297
+ loginView.style.display = 'block';
298
+ document.getElementById('password').value = '';
299
+ });
300
+
301
+ // Fetch API and Build UI
302
+ async function loadDashboard() {
303
+ try {
304
+ const response = await fetch('/api/dashboard', {
305
+ headers: { 'Authorization': `Bearer ${token}` }
306
+ });
307
+
308
+ if (response.status === 401) {
309
+ // Token invalid
310
+ localStorage.removeItem('llmcycle_token');
311
+ loginView.style.display = 'block';
312
+ dashboardView.style.display = 'none';
313
+ return;
314
+ }
315
+
316
+ const data = await response.json();
317
+ renderDashboard(data);
318
+
319
+ loginView.style.display = 'none';
320
+ dashboardView.style.display = 'block';
321
+ } catch (err) {
322
+ console.error('Failed to load dashboard', err);
323
+ }
324
+ }
325
+
326
+ function renderDashboard(data) {
327
+ const pContainer = document.getElementById('providers-container');
328
+ const fContainer = document.getElementById('fallbacks-container');
329
+
330
+ pContainer.innerHTML = '';
331
+ fContainer.innerHTML = '';
332
+
333
+ // Providers
334
+ if (data.providers.length === 0) {
335
+ pContainer.innerHTML = `
336
+ <div class="card" style="grid-column: 1 / -1; text-align: center; border-style: dashed;">
337
+ <p style="color: var(--text-muted);">No providers detected. Add API keys to your .env file.</p>
338
+ </div>`;
339
+ } else {
340
+ data.providers.forEach(p => {
341
+ pContainer.innerHTML += `
342
+ <div class="card">
343
+ <div class="card-header">
344
+ <h3 class="card-title">${p.name}</h3>
345
+ <div class="status-dot"></div>
346
+ </div>
347
+ <div class="metric">
348
+ <span>Total Keys:</span>
349
+ <span class="value">${p.total_keys}</span>
350
+ </div>
351
+ <div class="metric">
352
+ <span>Active Keys:</span>
353
+ <span class="value">${p.active_keys}</span>
354
+ </div>
355
+ <div class="url">${p.base_url}</div>
356
+ </div>`;
357
+ });
358
+ }
359
+
360
+ // Fallbacks
361
+ const fKeys = Object.keys(data.fallbacks);
362
+ if (fKeys.length === 0) {
363
+ fContainer.innerHTML = `<p style="color: var(--text-muted); margin: 0;">No custom fallback routes configured.</p>`;
364
+ } else {
365
+ fKeys.forEach(primary => {
366
+ const secondaries = data.fallbacks[primary].join(', ');
367
+ fContainer.innerHTML += `
368
+ <div class="metric" style="align-items: center; border-bottom: 1px solid var(--glass-border); padding-bottom: 12px; margin-bottom: 12px;">
369
+ <span style="font-weight: 600; color: #a855f7;">${primary}</span>
370
+ <span style="color: var(--text-muted); margin: 0 10px;">➜ falls back to ➜</span>
371
+ <span style="font-weight: 600;">${secondaries}</span>
372
+ </div>`;
373
+ });
374
+ }
375
+ }
376
+ </script>
377
+ </body>
378
+ </html>
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.4
2
+ Name: llmcycle
3
+ Version: 0.1.0
4
+ Summary: Cycle through multiple LLM providers with smart fallback, load balancing, and unified API
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: fastapi>=0.136.1
7
+ Requires-Dist: httpx>=0.28.1
8
+ Requires-Dist: jinja2>=3.1.6
9
+ Requires-Dist: pydantic>=2.13.4
10
+ Requires-Dist: python-dotenv>=1.2.2
11
+ Requires-Dist: python-multipart>=0.0.29
12
+ Requires-Dist: uvicorn>=0.47.0
13
+ Description-Content-Type: text/markdown
14
+
15
+ # LLMCycle ♻️
16
+
17
+ An enterprise-grade, highly resilient LLM management and routing framework. Designed to be **better than LiteLLM** with advanced multi-key support, customized routing (sort order), robust mid-stream error failovers, and a premium **Web Dashboard**.
18
+
19
+ ## 🚀 Key Features
20
+
21
+ * **🔑 Universal Provider Support**: Supports *any* provider on the market instantly. Just add `<PROVIDER_NAME>_API_KEYS` to your `.env`!
22
+ * **⚖️ Auto Load-Balancing**: Load multiple API keys for the same provider simply by comma-separating them in your `.env`. LLMCycle automatically round-robins across them and tracks rate limits locally.
23
+ * **🛣️ Custom Fallback Routing**: Configure custom routing. If a primary provider fails, it automatically falls back to your configured secondary.
24
+ * **🛡️ Streaming Time Resilience**: If an LLM disconnects *while streaming a response*, LLMCycle captures the generated text, silently switches to your fallback model, and resumes the stream seamlessly. The client never notices!
25
+ * **🖥️ Premium Web Dashboard**: Manage and view your keys, active providers, and fallback routes through a beautifully designed, secure UI.
26
+
27
+ ## 📦 Installation
28
+
29
+ ```bash
30
+ uv add llmcycle
31
+ uv add python-dotenv httpx fastapi uvicorn jinja2 python-multipart
32
+ ```
33
+
34
+ ## ⚙️ Configuration (`.env`)
35
+
36
+ Drop your keys into a `.env` file. To use multiple keys for load balancing, just separate them with commas!
37
+
38
+ ```env
39
+ DEEPSEEK_API_KEYS=sk-deepseek-1,sk-deepseek-2
40
+ OPENAI_API_KEYS=sk-openai-primary
41
+ TOGETHER_API_KEYS=sk-together-1
42
+
43
+ # You can even use completely custom providers!
44
+ # LLMCycle will default the base URL to https://api.mycustomai.com/v1
45
+ MYCUSTOMAI_API_KEYS=sk-custom
46
+ # Or explicitly define the base URL for custom providers
47
+ OLLAMA_API_KEYS=local
48
+ OLLAMA_BASE_URL=http://localhost:11434/v1
49
+
50
+ # UI Dashboard Auth
51
+ LLMCYCLE_USER_ADMIN=admin
52
+ LLMCYCLE_USER_ADMIN_PAASWORD=admin
53
+ ```
54
+
55
+ ## 🖥️ Starting the Web Dashboard
56
+
57
+ We built a gorgeous, premium Glassmorphism dashboard to monitor your providers.
58
+
59
+ ```bash
60
+ # Make sure your PYTHONPATH is set if running from source:
61
+ # Windows: $env:PYTHONPATH="src"
62
+ # Linux/Mac: export PYTHONPATH="src"
63
+
64
+ uv run llmcycle ui
65
+ ```
66
+ *Navigate to `http://127.0.0.1:8000` and login with the credentials defined in your `.env`!*
67
+
68
+ ## 💻 Usage: Everything in One!
69
+
70
+ ```python
71
+ import asyncio
72
+ from llmcycle import LLMCycle
73
+
74
+ async def main():
75
+ # 1. Initialization (Auto-loads all providers & keys from .env)
76
+ client = LLMCycle(
77
+ env_path=".env",
78
+ custom_fallbacks={
79
+ "deepseek": ["openai", "together"] # Sort order / Fallback chain
80
+ }
81
+ )
82
+
83
+ # 2. List all dynamically loaded providers
84
+ providers = client.get_available_providers()
85
+ print("Loaded Providers:", providers)
86
+
87
+ # 3. Query models supported by a provider
88
+ models = await client.get_provider_models("deepseek")
89
+ print("DeepSeek Models:", models)
90
+
91
+ if __name__ == "__main__":
92
+ asyncio.run(main())
93
+ ```
94
+
95
+ ## 🔌 Massive Universal Provider Registry
96
+
97
+ LLMCycle is pre-configured with base URLs for the most popular platforms:
98
+ `OPENAI`, `DEEPSEEK`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `MISTRAL`, `PERPLEXITY`, `ANYSCALE`, `FIREWORKS`, `COHERE`, `DATABRICKS`, `HUGGINGFACE`.
99
+
100
+ **Wildcard Support:** If you type `RANDOM_API_KEYS`, LLMCycle will automatically assume `https://api.random.com/v1`. If that's wrong, just define `RANDOM_BASE_URL` in your `.env`!
@@ -0,0 +1,18 @@
1
+ llmcycle/__init__.py,sha256=JajfAd6eO65ZR5mcqSFaFH55qrlUtUTB25jIgVaMEXQ,921
2
+ llmcycle/cli.py,sha256=SFD1mMmVNlDPWjseSxpCIM3roVLccVxVRwKX2jZGTzw,613
3
+ llmcycle/client.py,sha256=WZepJ5-pBK26F7u1DPVQ-lcUMwop-dHEEldvnZCXhu4,3100
4
+ llmcycle/schema.py,sha256=en8KR8u4uhgd40OsnJ7gGtdGdbtUaVWGCndlgdQYS-8,1293
5
+ llmcycle/core/errors.py,sha256=yL8HWqKGTl8DIfr1wqOPBxXI4xXbbTz6KCy82QLQI5A,3208
6
+ llmcycle/core/keys.py,sha256=CUUqNXhP2VO7Ov3kIcAesBcMH7zdpmJ6Uh9zbeQ0Ews,6585
7
+ llmcycle/core/router.py,sha256=RNR1CE7lq4k9R_WS1szyf458MNTDh6-9O77pxpEt-DE,1291
8
+ llmcycle/core/stream.py,sha256=Cxr3G7-MEs_LoPgj2snDvairrgml7SOuFytEweEUFqQ,3397
9
+ llmcycle/providers/base.py,sha256=u7pegcCUA4HhLcvMVyW1JKqJvSdM_2OmLHzJ_6YKTHM,722
10
+ llmcycle/providers/openai_compatible.py,sha256=FR5AOAstzE9eDTKo3Gv2fRiyghVP_IxWYRUGlwsK93Y,2829
11
+ llmcycle/providers/registry.py,sha256=v1g0bpBqPI14tdrS0HM_F4B-16d0AfRVvOENiU-aWW4,6747
12
+ llmcycle/ui/__init__.py,sha256=tUoGY9CO2NW1BZ5oOJ1Es1QB_qEMRr2uTGDhreaB2Nw,18
13
+ llmcycle/ui/app.py,sha256=OAZK0Mo8Lj6loayG04G4nxNIXSLQvvBiNuCFxbkmV6Y,3040
14
+ llmcycle/ui/templates/dashboard.html,sha256=vt2E6qgVL7SR1NYIlcoNW45p6WhPUVXVXIrZa-_GFT4,12744
15
+ llmcycle-0.1.0.dist-info/METADATA,sha256=Jd_PQqNiLYZB1sZ9LrDku0M9zF0wIDQPqQsnHM3sihw,3874
16
+ llmcycle-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
17
+ llmcycle-0.1.0.dist-info/entry_points.txt,sha256=F8C9GmojlaRYpS_ru_t0MwL7Ju06fZLy1KqJlVaDuck,47
18
+ llmcycle-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ llmcycle = llmcycle.cli:main