llmcycle 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmcycle-0.1.0/.gitignore +11 -0
- llmcycle-0.1.0/.python-version +1 -0
- llmcycle-0.1.0/PKG-INFO +100 -0
- llmcycle-0.1.0/README.md +86 -0
- llmcycle-0.1.0/example.py +58 -0
- llmcycle-0.1.0/example_auto.py +29 -0
- llmcycle-0.1.0/main.py +6 -0
- llmcycle-0.1.0/pyproject.toml +28 -0
- llmcycle-0.1.0/src/llmcycle/__init__.py +25 -0
- llmcycle-0.1.0/src/llmcycle/cli.py +17 -0
- llmcycle-0.1.0/src/llmcycle/client.py +73 -0
- llmcycle-0.1.0/src/llmcycle/core/errors.py +71 -0
- llmcycle-0.1.0/src/llmcycle/core/keys.py +184 -0
- llmcycle-0.1.0/src/llmcycle/core/router.py +37 -0
- llmcycle-0.1.0/src/llmcycle/core/stream.py +79 -0
- llmcycle-0.1.0/src/llmcycle/providers/base.py +21 -0
- llmcycle-0.1.0/src/llmcycle/providers/openai_compatible.py +56 -0
- llmcycle-0.1.0/src/llmcycle/providers/registry.py +105 -0
- llmcycle-0.1.0/src/llmcycle/schema.py +47 -0
- llmcycle-0.1.0/src/llmcycle/ui/__init__.py +1 -0
- llmcycle-0.1.0/src/llmcycle/ui/app.py +88 -0
- llmcycle-0.1.0/src/llmcycle/ui/templates/dashboard.html +378 -0
- llmcycle-0.1.0/tests/test_keys.py +23 -0
- llmcycle-0.1.0/uv.lock +496 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.11
|
llmcycle-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llmcycle
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cycle through multiple LLM providers with smart fallback, load balancing, and unified API
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: fastapi>=0.136.1
|
|
7
|
+
Requires-Dist: httpx>=0.28.1
|
|
8
|
+
Requires-Dist: jinja2>=3.1.6
|
|
9
|
+
Requires-Dist: pydantic>=2.13.4
|
|
10
|
+
Requires-Dist: python-dotenv>=1.2.2
|
|
11
|
+
Requires-Dist: python-multipart>=0.0.29
|
|
12
|
+
Requires-Dist: uvicorn>=0.47.0
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# LLMCycle ♻️
|
|
16
|
+
|
|
17
|
+
An enterprise-grade, highly resilient LLM management and routing framework. Designed to be **better than LiteLLM** with advanced multi-key support, customized routing (sort order), robust mid-stream error failovers, and a premium **Web Dashboard**.
|
|
18
|
+
|
|
19
|
+
## 🚀 Key Features
|
|
20
|
+
|
|
21
|
+
* **🔑 Universal Provider Support**: Supports *any* provider on the market instantly. Just add `<PROVIDER_NAME>_API_KEYS` to your `.env`!
|
|
22
|
+
* **⚖️ Auto Load-Balancing**: Load multiple API keys for the same provider simply by comma-separating them in your `.env`. LLMCycle automatically round-robins across them and tracks rate limits locally.
|
|
23
|
+
* **🛣️ Custom Fallback Routing**: Configure custom routing. If a primary provider fails, it automatically falls back to your configured secondary.
|
|
24
|
+
* **🛡️ Streaming Time Resilience**: If an LLM disconnects *while streaming a response*, LLMCycle captures the generated text, silently switches to your fallback model, and resumes the stream seamlessly. The client never notices!
|
|
25
|
+
* **🖥️ Premium Web Dashboard**: Manage and view your keys, active providers, and fallback routes through a beautifully designed, secure UI.
|
|
26
|
+
|
|
27
|
+
## 📦 Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uv add llmcycle
|
|
31
|
+
uv add python-dotenv httpx fastapi uvicorn jinja2 python-multipart
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## ⚙️ Configuration (`.env`)
|
|
35
|
+
|
|
36
|
+
Drop your keys into a `.env` file. To use multiple keys for load balancing, just separate them with commas!
|
|
37
|
+
|
|
38
|
+
```env
|
|
39
|
+
DEEPSEEK_API_KEYS=sk-deepseek-1,sk-deepseek-2
|
|
40
|
+
OPENAI_API_KEYS=sk-openai-primary
|
|
41
|
+
TOGETHER_API_KEYS=sk-together-1
|
|
42
|
+
|
|
43
|
+
# You can even use completely custom providers!
|
|
44
|
+
# LLMCycle will default the base URL to https://api.mycustomai.com/v1
|
|
45
|
+
MYCUSTOMAI_API_KEYS=sk-custom
|
|
46
|
+
# Or explicitly define the base URL for custom providers
|
|
47
|
+
OLLAMA_API_KEYS=local
|
|
48
|
+
OLLAMA_BASE_URL=http://localhost:11434/v1
|
|
49
|
+
|
|
50
|
+
# UI Dashboard Auth
|
|
51
|
+
LLMCYCLE_USER_ADMIN=admin
|
|
52
|
+
LLMCYCLE_USER_ADMIN_PAASWORD=admin
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## 🖥️ Starting the Web Dashboard
|
|
56
|
+
|
|
57
|
+
We built a gorgeous, premium Glassmorphism dashboard to monitor your providers.
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Make sure your PYTHONPATH is set if running from source:
|
|
61
|
+
# Windows: $env:PYTHONPATH="src"
|
|
62
|
+
# Linux/Mac: export PYTHONPATH="src"
|
|
63
|
+
|
|
64
|
+
uv run llmcycle ui
|
|
65
|
+
```
|
|
66
|
+
*Navigate to `http://127.0.0.1:8000` and login with the credentials defined in your `.env`!*
|
|
67
|
+
|
|
68
|
+
## 💻 Usage: Everything in One!
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import asyncio
|
|
72
|
+
from llmcycle import LLMCycle
|
|
73
|
+
|
|
74
|
+
async def main():
|
|
75
|
+
# 1. Initialization (Auto-loads all providers & keys from .env)
|
|
76
|
+
client = LLMCycle(
|
|
77
|
+
env_path=".env",
|
|
78
|
+
custom_fallbacks={
|
|
79
|
+
"deepseek": ["openai", "together"] # Sort order / Fallback chain
|
|
80
|
+
}
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# 2. List all dynamically loaded providers
|
|
84
|
+
providers = client.get_available_providers()
|
|
85
|
+
print("Loaded Providers:", providers)
|
|
86
|
+
|
|
87
|
+
# 3. Query models supported by a provider
|
|
88
|
+
models = await client.get_provider_models("deepseek")
|
|
89
|
+
print("DeepSeek Models:", models)
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
asyncio.run(main())
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## 🔌 Massive Universal Provider Registry
|
|
96
|
+
|
|
97
|
+
LLMCycle is pre-configured with base URLs for the most popular platforms:
|
|
98
|
+
`OPENAI`, `DEEPSEEK`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `MISTRAL`, `PERPLEXITY`, `ANYSCALE`, `FIREWORKS`, `COHERE`, `DATABRICKS`, `HUGGINGFACE`.
|
|
99
|
+
|
|
100
|
+
**Wildcard Support:** If you type `RANDOM_API_KEYS`, LLMCycle will automatically assume `https://api.random.com/v1`. If that's wrong, just define `RANDOM_BASE_URL` in your `.env`!
|
llmcycle-0.1.0/README.md
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# LLMCycle ♻️
|
|
2
|
+
|
|
3
|
+
An enterprise-grade, highly resilient LLM management and routing framework. Designed to be **better than LiteLLM** with advanced multi-key support, customized routing (sort order), robust mid-stream error failovers, and a premium **Web Dashboard**.
|
|
4
|
+
|
|
5
|
+
## 🚀 Key Features
|
|
6
|
+
|
|
7
|
+
* **🔑 Universal Provider Support**: Supports *any* provider on the market instantly. Just add `<PROVIDER_NAME>_API_KEYS` to your `.env`!
|
|
8
|
+
* **⚖️ Auto Load-Balancing**: Load multiple API keys for the same provider simply by comma-separating them in your `.env`. LLMCycle automatically round-robins across them and tracks rate limits locally.
|
|
9
|
+
* **🛣️ Custom Fallback Routing**: Configure custom routing. If a primary provider fails, it automatically falls back to your configured secondary.
|
|
10
|
+
* **🛡️ Streaming Time Resilience**: If an LLM disconnects *while streaming a response*, LLMCycle captures the generated text, silently switches to your fallback model, and resumes the stream seamlessly. The client never notices!
|
|
11
|
+
* **🖥️ Premium Web Dashboard**: Manage and view your keys, active providers, and fallback routes through a beautifully designed, secure UI.
|
|
12
|
+
|
|
13
|
+
## 📦 Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv add llmcycle
|
|
17
|
+
uv add python-dotenv httpx fastapi uvicorn jinja2 python-multipart
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## ⚙️ Configuration (`.env`)
|
|
21
|
+
|
|
22
|
+
Drop your keys into a `.env` file. To use multiple keys for load balancing, just separate them with commas!
|
|
23
|
+
|
|
24
|
+
```env
|
|
25
|
+
DEEPSEEK_API_KEYS=sk-deepseek-1,sk-deepseek-2
|
|
26
|
+
OPENAI_API_KEYS=sk-openai-primary
|
|
27
|
+
TOGETHER_API_KEYS=sk-together-1
|
|
28
|
+
|
|
29
|
+
# You can even use completely custom providers!
|
|
30
|
+
# LLMCycle will default the base URL to https://api.mycustomai.com/v1
|
|
31
|
+
MYCUSTOMAI_API_KEYS=sk-custom
|
|
32
|
+
# Or explicitly define the base URL for custom providers
|
|
33
|
+
OLLAMA_API_KEYS=local
|
|
34
|
+
OLLAMA_BASE_URL=http://localhost:11434/v1
|
|
35
|
+
|
|
36
|
+
# UI Dashboard Auth
|
|
37
|
+
LLMCYCLE_USER_ADMIN=admin
|
|
38
|
+
LLMCYCLE_USER_ADMIN_PAASWORD=admin
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## 🖥️ Starting the Web Dashboard
|
|
42
|
+
|
|
43
|
+
We built a gorgeous, premium Glassmorphism dashboard to monitor your providers.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Make sure your PYTHONPATH is set if running from source:
|
|
47
|
+
# Windows: $env:PYTHONPATH="src"
|
|
48
|
+
# Linux/Mac: export PYTHONPATH="src"
|
|
49
|
+
|
|
50
|
+
uv run llmcycle ui
|
|
51
|
+
```
|
|
52
|
+
*Navigate to `http://127.0.0.1:8000` and login with the credentials defined in your `.env`!*
|
|
53
|
+
|
|
54
|
+
## 💻 Usage: Everything in One!
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import asyncio
|
|
58
|
+
from llmcycle import LLMCycle
|
|
59
|
+
|
|
60
|
+
async def main():
|
|
61
|
+
# 1. Initialization (Auto-loads all providers & keys from .env)
|
|
62
|
+
client = LLMCycle(
|
|
63
|
+
env_path=".env",
|
|
64
|
+
custom_fallbacks={
|
|
65
|
+
"deepseek": ["openai", "together"] # Sort order / Fallback chain
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# 2. List all dynamically loaded providers
|
|
70
|
+
providers = client.get_available_providers()
|
|
71
|
+
print("Loaded Providers:", providers)
|
|
72
|
+
|
|
73
|
+
# 3. Query models supported by a provider
|
|
74
|
+
models = await client.get_provider_models("deepseek")
|
|
75
|
+
print("DeepSeek Models:", models)
|
|
76
|
+
|
|
77
|
+
if __name__ == "__main__":
|
|
78
|
+
asyncio.run(main())
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## 🔌 Massive Universal Provider Registry
|
|
82
|
+
|
|
83
|
+
LLMCycle is pre-configured with base URLs for the most popular platforms:
|
|
84
|
+
`OPENAI`, `DEEPSEEK`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `MISTRAL`, `PERPLEXITY`, `ANYSCALE`, `FIREWORKS`, `COHERE`, `DATABRICKS`, `HUGGINGFACE`.
|
|
85
|
+
|
|
86
|
+
**Wildcard Support:** If you type `RANDOM_API_KEYS`, LLMCycle will automatically assume `https://api.random.com/v1`. If that's wrong, just define `RANDOM_BASE_URL` in your `.env`!
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from llmcycle.core.keys import KeyManager
|
|
3
|
+
from llmcycle.core.router import ModelRouter, FallbackRouter
|
|
4
|
+
from llmcycle.core.stream import StreamResilienceManager
|
|
5
|
+
from llmcycle.schema import CompletionRequest, Message
|
|
6
|
+
from llmcycle.providers.base import LLMProvider
|
|
7
|
+
from typing import AsyncGenerator
|
|
8
|
+
|
|
9
|
+
class MockProvider(LLMProvider):
|
|
10
|
+
"""A mock provider that fails midway through the stream to test resilience."""
|
|
11
|
+
def __init__(self, should_fail: bool):
|
|
12
|
+
self.should_fail = should_fail
|
|
13
|
+
|
|
14
|
+
async def generate(self, request: CompletionRequest, api_key: str) -> str:
|
|
15
|
+
return "Hello World"
|
|
16
|
+
|
|
17
|
+
async def generate_stream(self, request: CompletionRequest, api_key: str) -> AsyncGenerator[str, None]:
|
|
18
|
+
chunks = ["Hello", " world", ",", " how", " are", " you?"]
|
|
19
|
+
for i, chunk in enumerate(chunks):
|
|
20
|
+
if self.should_fail and i == 3:
|
|
21
|
+
raise ConnectionError("Mock streaming disconnect")
|
|
22
|
+
yield chunk
|
|
23
|
+
await asyncio.sleep(0.1)
|
|
24
|
+
|
|
25
|
+
async def get_models(self, api_key: str) -> list[str]:
|
|
26
|
+
return ["gpt-4", "gpt-4-turbo"]
|
|
27
|
+
|
|
28
|
+
async def main():
|
|
29
|
+
print("Setting up LLMCycle Manager...")
|
|
30
|
+
km = KeyManager()
|
|
31
|
+
km.add_key("gpt-4-turbo", "sk-mock-1")
|
|
32
|
+
km.add_key("gpt-4", "sk-mock-2")
|
|
33
|
+
|
|
34
|
+
router = ModelRouter(FallbackRouter({"gpt-4-turbo": ["gpt-4"]}))
|
|
35
|
+
|
|
36
|
+
providers = {
|
|
37
|
+
"gpt-4-turbo": MockProvider(should_fail=True),
|
|
38
|
+
"gpt-4": MockProvider(should_fail=False)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
stream_manager = StreamResilienceManager(router, km, providers)
|
|
42
|
+
|
|
43
|
+
request = CompletionRequest(
|
|
44
|
+
model="gpt-4-turbo",
|
|
45
|
+
messages=[Message(role="user", content="Say hello!")]
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
print("\nStarting robust stream...")
|
|
49
|
+
try:
|
|
50
|
+
async for chunk in stream_manager.safe_stream(request):
|
|
51
|
+
print(chunk, end="", flush=True)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
print(f"\nStream failed completely: {e}")
|
|
54
|
+
|
|
55
|
+
print("\n\nFinished stream successfully, even with mid-stream disconnect!")
|
|
56
|
+
|
|
57
|
+
if __name__ == "__main__":
|
|
58
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from llmcycle import LLMCycle
|
|
3
|
+
|
|
4
|
+
async def main():
|
|
5
|
+
print("Initializing LLMCycle (auto-loading from .env)...")
|
|
6
|
+
# You can provide fallbacks, so if deepseek fails, it falls back to openai
|
|
7
|
+
client = LLMCycle(
|
|
8
|
+
env_path=".env",
|
|
9
|
+
custom_fallbacks={
|
|
10
|
+
"deepseek": ["openai"]
|
|
11
|
+
}
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# 1. Get Available Providers automatically loaded from env
|
|
15
|
+
providers = client.get_available_providers()
|
|
16
|
+
print(f"\nProviders automatically loaded: {providers}")
|
|
17
|
+
|
|
18
|
+
# 2. Get Models for a specific provider (This will make a network request to the base URL)
|
|
19
|
+
# Note: Since the keys in our .env are fake, this might fail unless we catch it
|
|
20
|
+
if "deepseek" in providers:
|
|
21
|
+
print("\nFetching models for DeepSeek using loaded keys...")
|
|
22
|
+
models = await client.get_provider_models("deepseek")
|
|
23
|
+
if models:
|
|
24
|
+
print(f"DeepSeek Models: {models[:3]} ...")
|
|
25
|
+
else:
|
|
26
|
+
print("Failed to fetch models (Fake API key used in .env)")
|
|
27
|
+
|
|
28
|
+
if __name__ == "__main__":
|
|
29
|
+
asyncio.run(main())
|
llmcycle-0.1.0/main.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "llmcycle"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Cycle through multiple LLM providers with smart fallback, load balancing, and unified API"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"fastapi>=0.136.1",
|
|
13
|
+
"httpx>=0.28.1",
|
|
14
|
+
"jinja2>=3.1.6",
|
|
15
|
+
"pydantic>=2.13.4",
|
|
16
|
+
"python-dotenv>=1.2.2",
|
|
17
|
+
"python-multipart>=0.0.29",
|
|
18
|
+
"uvicorn>=0.47.0",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.scripts]
|
|
22
|
+
llmcycle = "llmcycle.cli:main"
|
|
23
|
+
|
|
24
|
+
[dependency-groups]
|
|
25
|
+
dev = [
|
|
26
|
+
"pytest>=9.0.3",
|
|
27
|
+
"pytest-asyncio>=1.3.0",
|
|
28
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLMCycle - Production-Grade Universal LLM Router
|
|
3
|
+
=================================================
|
|
4
|
+
Auto multi-key rotation, intelligent error handling, streaming resilience,
|
|
5
|
+
and support for 50+ providers out-of-the-box.
|
|
6
|
+
"""
|
|
7
|
+
from .client import LLMCycle
|
|
8
|
+
from .schema import CompletionRequest, Message, CompletionResponse, StreamChunk
|
|
9
|
+
from .core.keys import KeyManager, KeyStatus
|
|
10
|
+
from .core.router import ModelRouter, RoutingStrategy
|
|
11
|
+
from .core.errors import (
|
|
12
|
+
LLMCycleError, RateLimitError, AuthenticationError,
|
|
13
|
+
ProviderError, AllProvidersFailedError, StreamInterruptedError,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"LLMCycle",
|
|
18
|
+
"CompletionRequest", "Message", "CompletionResponse", "StreamChunk",
|
|
19
|
+
"KeyManager", "KeyStatus",
|
|
20
|
+
"ModelRouter", "RoutingStrategy",
|
|
21
|
+
"LLMCycleError", "RateLimitError", "AuthenticationError",
|
|
22
|
+
"ProviderError", "AllProvidersFailedError", "StreamInterruptedError",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
__version__ = "0.2.0"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import uvicorn
|
|
3
|
+
|
|
4
|
+
def main():
|
|
5
|
+
parser = argparse.ArgumentParser(description="LLMCycle CLI")
|
|
6
|
+
parser.add_argument("command", choices=["ui"], help="Command to run")
|
|
7
|
+
parser.add_argument("--host", default="127.0.0.1", help="Host for the UI")
|
|
8
|
+
parser.add_argument("--port", type=int, default=8000, help="Port for the UI")
|
|
9
|
+
|
|
10
|
+
args = parser.parse_args()
|
|
11
|
+
|
|
12
|
+
if args.command == "ui":
|
|
13
|
+
print(f"Starting LLMCycle Dashboard on http://{args.host}:{args.port}")
|
|
14
|
+
uvicorn.run("llmcycle.ui.app:app", host=args.host, port=args.port, reload=True)
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
main()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
from llmcycle.core.keys import KeyManager
|
|
4
|
+
from llmcycle.core.router import ModelRouter, FallbackRouter
|
|
5
|
+
from llmcycle.core.stream import StreamResilienceManager
|
|
6
|
+
from llmcycle.providers.openai_compatible import OpenAICompatibleProvider
|
|
7
|
+
|
|
8
|
+
# Massive default registry
|
|
9
|
+
PROVIDER_BASE_URLS = {
|
|
10
|
+
"OPENAI": "https://api.openai.com/v1",
|
|
11
|
+
"DEEPSEEK": "https://api.deepseek.com/v1",
|
|
12
|
+
"ANTHROPIC": "https://api.anthropic.com/v1",
|
|
13
|
+
"TOGETHER": "https://api.together.xyz/v1",
|
|
14
|
+
"GROQ": "https://api.groq.com/openai/v1",
|
|
15
|
+
"MISTRAL": "https://api.mistral.ai/v1",
|
|
16
|
+
"PERPLEXITY": "https://api.perplexity.ai",
|
|
17
|
+
"ANYSCALE": "https://api.endpoints.anyscale.com/v1",
|
|
18
|
+
"FIREWORKS": "https://api.fireworks.ai/inference/v1",
|
|
19
|
+
"COHERE": "https://api.cohere.com/v1",
|
|
20
|
+
"DATABRICKS": "https://serving.api.databricks.com/serving-endpoints",
|
|
21
|
+
"HUGGINGFACE": "https://api-inference.huggingface.co/models",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
class LLMCycle:
|
|
25
|
+
"""Main entrypoint for LLMCycle with Universal Provider Support."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, env_path: str = ".env", custom_fallbacks: dict = None):
|
|
28
|
+
load_dotenv(env_path)
|
|
29
|
+
|
|
30
|
+
self.key_manager = KeyManager()
|
|
31
|
+
self.providers = {}
|
|
32
|
+
|
|
33
|
+
# Auto-discover
|
|
34
|
+
self._auto_load_keys()
|
|
35
|
+
|
|
36
|
+
# Setup Routing Strategy
|
|
37
|
+
fallbacks = custom_fallbacks or {}
|
|
38
|
+
self.router = ModelRouter(FallbackRouter(fallbacks))
|
|
39
|
+
self.stream_manager = StreamResilienceManager(self.router, self.key_manager, self.providers)
|
|
40
|
+
|
|
41
|
+
def _auto_load_keys(self):
|
|
42
|
+
"""Finds any env var ending with _API_KEYS and universally registers the provider."""
|
|
43
|
+
for key, val in os.environ.items():
|
|
44
|
+
if key.endswith("_API_KEYS"):
|
|
45
|
+
provider_name = key.replace("_API_KEYS", "").upper()
|
|
46
|
+
keys = [k.strip() for k in val.split(",") if k.strip()]
|
|
47
|
+
|
|
48
|
+
if not keys:
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
# 1. Check if user explicitly defined a BASE URL for this provider
|
|
52
|
+
# 2. Check the massive default registry
|
|
53
|
+
# 3. Fallback: Assume a standard OpenAI compatible format
|
|
54
|
+
base_url = os.environ.get(f"{provider_name}_BASE_URL")
|
|
55
|
+
if not base_url:
|
|
56
|
+
base_url = PROVIDER_BASE_URLS.get(provider_name, f"https://api.{provider_name.lower()}.com/v1")
|
|
57
|
+
|
|
58
|
+
if base_url:
|
|
59
|
+
self.providers[provider_name.lower()] = OpenAICompatibleProvider(base_url)
|
|
60
|
+
for k in keys:
|
|
61
|
+
self.key_manager.add_key(provider_name.lower(), k)
|
|
62
|
+
|
|
63
|
+
def get_available_providers(self) -> list[str]:
|
|
64
|
+
return list(self.providers.keys())
|
|
65
|
+
|
|
66
|
+
async def get_provider_models(self, provider_name: str) -> list[str]:
|
|
67
|
+
p_name = provider_name.lower()
|
|
68
|
+
if p_name not in self.providers:
|
|
69
|
+
return []
|
|
70
|
+
key = self.key_manager.get_next_key(p_name)
|
|
71
|
+
if not key:
|
|
72
|
+
return []
|
|
73
|
+
return await self.providers[p_name].get_models(key)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for LLMCycle.
|
|
3
|
+
All errors map from HTTP status codes so the router knows exactly
|
|
4
|
+
what to do: retry, rotate key, skip provider, or give up.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
class LLMCycleError(Exception):
|
|
8
|
+
"""Base error for all LLMCycle exceptions."""
|
|
9
|
+
def __init__(self, message: str, provider: str = "", model: str = "", status_code: int = 0):
|
|
10
|
+
super().__init__(message)
|
|
11
|
+
self.provider = provider
|
|
12
|
+
self.model = model
|
|
13
|
+
self.status_code = status_code
|
|
14
|
+
|
|
15
|
+
class RateLimitError(LLMCycleError):
|
|
16
|
+
"""429: Rate limit exceeded. Rotate key and retry."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
class AuthenticationError(LLMCycleError):
|
|
20
|
+
"""401: Invalid API key. Disable key permanently."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
class ProviderError(LLMCycleError):
|
|
24
|
+
"""400/500+: Provider-side error. Try next provider."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
class QuotaExceededError(LLMCycleError):
|
|
28
|
+
"""402/429 with quota message: Key quota exhausted. Rotate key."""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
class ContentPolicyError(LLMCycleError):
|
|
32
|
+
"""400: Content policy violation. Do NOT retry - fail fast."""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
class StreamInterruptedError(LLMCycleError):
|
|
36
|
+
"""Mid-stream disconnection. Contains partial text generated so far."""
|
|
37
|
+
def __init__(self, message: str, partial_text: str = "", **kwargs):
|
|
38
|
+
super().__init__(message, **kwargs)
|
|
39
|
+
self.partial_text = partial_text
|
|
40
|
+
|
|
41
|
+
class AllProvidersFailedError(LLMCycleError):
|
|
42
|
+
"""Raised when every provider in the fallback chain fails."""
|
|
43
|
+
def __init__(self, errors: list):
|
|
44
|
+
msg = f"All {len(errors)} providers failed. Last error: {errors[-1]}"
|
|
45
|
+
super().__init__(msg)
|
|
46
|
+
self.errors = errors
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Maps HTTP status codes to exception types
|
|
50
|
+
def classify_http_error(status_code: int, response_text: str, provider: str, model: str) -> LLMCycleError:
|
|
51
|
+
"""Factory: Convert an HTTP error into the correct LLMCycle exception."""
|
|
52
|
+
text_lower = response_text.lower()
|
|
53
|
+
|
|
54
|
+
if status_code == 401:
|
|
55
|
+
return AuthenticationError(f"[{provider}] Auth failed (401): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
56
|
+
|
|
57
|
+
if status_code == 429:
|
|
58
|
+
if any(w in text_lower for w in ("quota", "limit exceeded", "exhausted", "billing")):
|
|
59
|
+
return QuotaExceededError(f"[{provider}] Quota exceeded (429): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
60
|
+
return RateLimitError(f"[{provider}] Rate limited (429): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
61
|
+
|
|
62
|
+
if status_code == 402:
|
|
63
|
+
return QuotaExceededError(f"[{provider}] Payment required (402): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
64
|
+
|
|
65
|
+
if status_code == 400:
|
|
66
|
+
if any(w in text_lower for w in ("content_policy", "safety", "harmful", "violat")):
|
|
67
|
+
return ContentPolicyError(f"[{provider}] Content policy (400): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
68
|
+
return ProviderError(f"[{provider}] Bad request (400): {response_text}", provider=provider, model=model, status_code=status_code)
|
|
69
|
+
|
|
70
|
+
# All other 4xx/5xx
|
|
71
|
+
return ProviderError(f"[{provider}] HTTP {status_code}: {response_text[:200]}", provider=provider, model=model, status_code=status_code)
|