codex-proxy 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codex_proxy/__init__.py +3 -0
- codex_proxy/__main__.py +66 -0
- codex_proxy/circuit_breaker.py +83 -0
- codex_proxy/compaction.py +42 -0
- codex_proxy/config.py +313 -0
- codex_proxy/key_rotation.py +108 -0
- codex_proxy/plugins.py +110 -0
- codex_proxy/plugins_builtin.py +34 -0
- codex_proxy/providers.py +130 -0
- codex_proxy/server.py +647 -0
- codex_proxy/store.py +97 -0
- codex_proxy/translator.py +360 -0
- codex_proxy/tui.py +262 -0
- codex_proxy-3.1.0.dist-info/METADATA +25 -0
- codex_proxy-3.1.0.dist-info/RECORD +18 -0
- codex_proxy-3.1.0.dist-info/WHEEL +4 -0
- codex_proxy-3.1.0.dist-info/entry_points.txt +2 -0
- codex_proxy-3.1.0.dist-info/licenses/LICENSE +21 -0
codex_proxy/__init__.py
ADDED
codex_proxy/__main__.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""CLI entry point — `codex-proxy` or `python -m codex_proxy`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .config import DEFAULT_CONFIG, load_config, write_example_config
|
|
10
|
+
from .server import run
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
ap = argparse.ArgumentParser(
|
|
15
|
+
prog="codex-proxy",
|
|
16
|
+
description="Responses API to Chat Completions bridge for Codex CLI",
|
|
17
|
+
)
|
|
18
|
+
ap.add_argument("--config", "-c", type=str, default=None,
|
|
19
|
+
help=f"Config file path (default: {DEFAULT_CONFIG})")
|
|
20
|
+
ap.add_argument("--host", type=str, default=None,
|
|
21
|
+
help="Override bind host")
|
|
22
|
+
ap.add_argument("--port", "-p", type=int, default=None,
|
|
23
|
+
help="Override bind port")
|
|
24
|
+
ap.add_argument("--init", action="store_true",
|
|
25
|
+
help="Write example config and exit")
|
|
26
|
+
ap.add_argument("--print-config", action="store_true",
|
|
27
|
+
help="Print resolved config and exit")
|
|
28
|
+
ap.add_argument("--tui", "-t", action="store_true",
|
|
29
|
+
help="Launch interactive Rich TUI dashboard")
|
|
30
|
+
args = ap.parse_args()
|
|
31
|
+
|
|
32
|
+
if args.init:
|
|
33
|
+
path = write_example_config()
|
|
34
|
+
print(f"Example config written to {path}")
|
|
35
|
+
sys.exit(0)
|
|
36
|
+
|
|
37
|
+
config = load_config(args.config and Path(args.config))
|
|
38
|
+
|
|
39
|
+
if args.host:
|
|
40
|
+
config.server.host = args.host
|
|
41
|
+
if args.port:
|
|
42
|
+
config.server.port = args.port
|
|
43
|
+
|
|
44
|
+
if args.print_config:
|
|
45
|
+
print(f" host: {config.server.host}")
|
|
46
|
+
print(f" port: {config.server.port}")
|
|
47
|
+
print(f" provider: {config.provider.display_name}")
|
|
48
|
+
print(f" base_url: {config.provider.base_url}")
|
|
49
|
+
print(f" models: {', '.join(config.provider.models)}")
|
|
50
|
+
print(f" api_key: {'***' if config.provider.effective_api_key() else '(empty)'}")
|
|
51
|
+
print(f" key_pool: {len(config.provider.effective_api_keys())} key(s)")
|
|
52
|
+
print(f" circuit_breaker: {'enabled' if config.circuit_breaker.enabled else 'disabled'} (threshold={config.circuit_breaker.failure_threshold}, timeout={config.circuit_breaker.recovery_timeout}s)")
|
|
53
|
+
print(f" compaction: {'enabled' if config.compaction.enabled else 'disabled'} (max_messages={config.compaction.max_messages}, keep_last={config.compaction.keep_last})")
|
|
54
|
+
print(f" plugins: {'enabled' if config.plugins.enabled else 'disabled'} ({len(config.plugins.plugins)} configured)")
|
|
55
|
+
sys.exit(0)
|
|
56
|
+
|
|
57
|
+
if not config.provider.effective_api_key():
|
|
58
|
+
print("WARNING: No API key configured.")
|
|
59
|
+
print(f" Set api_key or api_key_env in {DEFAULT_CONFIG}")
|
|
60
|
+
print(" Or set CODEX_PROXY_API_KEY / OPENAI_API_KEY env var")
|
|
61
|
+
|
|
62
|
+
run(config, tui=args.tui)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
main()
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Async circuit breaker for upstream provider protection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from enum import Enum
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("codex-proxy")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CircuitState(Enum):
|
|
14
|
+
CLOSED = "closed"
|
|
15
|
+
OPEN = "open"
|
|
16
|
+
HALF_OPEN = "half_open"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class CircuitBreaker:
|
|
21
|
+
"""Simple circuit breaker for upstream requests.
|
|
22
|
+
|
|
23
|
+
- CLOSED: requests flow normally
|
|
24
|
+
- OPEN: requests are rejected immediately (fail fast)
|
|
25
|
+
- HALF_OPEN: one request is allowed through to test recovery
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
failure_threshold: int = 5
|
|
29
|
+
recovery_timeout: float = 30.0
|
|
30
|
+
half_open_max: int = 1
|
|
31
|
+
|
|
32
|
+
state: CircuitState = field(default=CircuitState.CLOSED, init=False)
|
|
33
|
+
failure_count: int = field(default=0, init=False)
|
|
34
|
+
last_failure_time: float = field(default=0.0, init=False)
|
|
35
|
+
half_open_count: int = field(default=0, init=False)
|
|
36
|
+
|
|
37
|
+
def can_execute(self) -> bool:
|
|
38
|
+
"""Check if a request can proceed."""
|
|
39
|
+
if self.state == CircuitState.CLOSED:
|
|
40
|
+
return True
|
|
41
|
+
if self.state == CircuitState.OPEN:
|
|
42
|
+
if time.time() - self.last_failure_time >= self.recovery_timeout:
|
|
43
|
+
self.state = CircuitState.HALF_OPEN
|
|
44
|
+
self.half_open_count = 1
|
|
45
|
+
logger.info("Circuit breaker: OPEN -> HALF_OPEN")
|
|
46
|
+
return True
|
|
47
|
+
return False
|
|
48
|
+
# HALF_OPEN
|
|
49
|
+
if self.half_open_count < self.half_open_max:
|
|
50
|
+
self.half_open_count += 1
|
|
51
|
+
return True
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
def record_success(self) -> None:
|
|
55
|
+
"""Record a successful request."""
|
|
56
|
+
if self.state == CircuitState.HALF_OPEN:
|
|
57
|
+
self.state = CircuitState.CLOSED
|
|
58
|
+
self.failure_count = 0
|
|
59
|
+
logger.info("Circuit breaker: HALF_OPEN -> CLOSED")
|
|
60
|
+
elif self.state == CircuitState.CLOSED:
|
|
61
|
+
self.failure_count = 0
|
|
62
|
+
|
|
63
|
+
def record_failure(self) -> None:
|
|
64
|
+
"""Record a failed request."""
|
|
65
|
+
self.failure_count += 1
|
|
66
|
+
self.last_failure_time = time.time()
|
|
67
|
+
if self.state == CircuitState.HALF_OPEN:
|
|
68
|
+
self.state = CircuitState.OPEN
|
|
69
|
+
logger.warning("Circuit breaker: HALF_OPEN -> OPEN")
|
|
70
|
+
elif self.failure_count >= self.failure_threshold:
|
|
71
|
+
self.state = CircuitState.OPEN
|
|
72
|
+
logger.warning(
|
|
73
|
+
"Circuit breaker: CLOSED -> OPEN (failures=%d)", self.failure_count,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def get_status(self) -> dict:
|
|
77
|
+
"""Return circuit breaker status for monitoring."""
|
|
78
|
+
return {
|
|
79
|
+
"state": self.state.value,
|
|
80
|
+
"failure_count": self.failure_count,
|
|
81
|
+
"failure_threshold": self.failure_threshold,
|
|
82
|
+
"recovery_timeout": self.recovery_timeout,
|
|
83
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Context compaction — trim conversation history to fit within limits."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def compact_messages(
|
|
7
|
+
messages: list[dict], max_messages: int = 50, keep_last: int = 20,
|
|
8
|
+
) -> list[dict]:
|
|
9
|
+
"""Compact messages if the conversation exceeds max_messages.
|
|
10
|
+
|
|
11
|
+
Strategy:
|
|
12
|
+
- Always keep the system message (if present)
|
|
13
|
+
- Keep the last `keep_last` messages
|
|
14
|
+
- Drop messages in between, inserting a compaction notice
|
|
15
|
+
|
|
16
|
+
Returns messages unchanged if under the limit.
|
|
17
|
+
"""
|
|
18
|
+
if len(messages) <= max_messages:
|
|
19
|
+
return messages
|
|
20
|
+
|
|
21
|
+
system_msgs: list[dict] = []
|
|
22
|
+
rest = messages
|
|
23
|
+
if messages and messages[0].get("role") == "system":
|
|
24
|
+
system_msgs = [messages[0]]
|
|
25
|
+
rest = messages[1:]
|
|
26
|
+
|
|
27
|
+
if len(rest) <= keep_last:
|
|
28
|
+
return messages
|
|
29
|
+
|
|
30
|
+
kept = rest[-keep_last:]
|
|
31
|
+
dropped_count = len(rest) - keep_last
|
|
32
|
+
|
|
33
|
+
compaction_notice = {
|
|
34
|
+
"role": "system",
|
|
35
|
+
"content": (
|
|
36
|
+
f"[Context compacted: {dropped_count} earlier messages were "
|
|
37
|
+
"summarized to fit context limits. Continue based on the "
|
|
38
|
+
"remaining conversation.]"
|
|
39
|
+
),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return system_msgs + [compaction_notice] + kept
|
codex_proxy/config.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""Config file loading — ~/.codex-proxy/config.toml"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
if sys.version_info >= (3, 11):
|
|
11
|
+
import tomllib
|
|
12
|
+
else:
|
|
13
|
+
import tomli as tomllib # type: ignore[import-not-found]
|
|
14
|
+
|
|
15
|
+
DEFAULT_DIR = Path.home() / ".codex-proxy"
|
|
16
|
+
DEFAULT_CONFIG = DEFAULT_DIR / "config.toml"
|
|
17
|
+
DEFAULT_PORT = 4242
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ProviderConfig:
|
|
22
|
+
name: str = "zai"
|
|
23
|
+
display_name: str = "Z.AI"
|
|
24
|
+
base_url: str = "https://api.z.ai/api/paas/v4"
|
|
25
|
+
api_key: str = ""
|
|
26
|
+
api_key_env: str = "" # env var name to read key from
|
|
27
|
+
api_keys: list[str] = field(default_factory=list)
|
|
28
|
+
api_keys_env: list[str] = field(default_factory=list)
|
|
29
|
+
models: list[str] = field(default_factory=lambda: ["glm-5.1", "glm-5", "glm-4.7"])
|
|
30
|
+
default_model: str = "glm-5.1"
|
|
31
|
+
stream: bool = True
|
|
32
|
+
extra_headers: dict[str, str] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
def effective_api_key(self) -> str:
|
|
35
|
+
if self.api_key:
|
|
36
|
+
return self.api_key
|
|
37
|
+
if self.api_key_env:
|
|
38
|
+
return os.environ.get(self.api_key_env, "")
|
|
39
|
+
return ""
|
|
40
|
+
|
|
41
|
+
def effective_api_keys(self) -> list[str]:
|
|
42
|
+
"""Return the full resolved key pool."""
|
|
43
|
+
keys: list[str] = []
|
|
44
|
+
if self.api_keys:
|
|
45
|
+
keys.extend(self.api_keys)
|
|
46
|
+
for env_name in self.api_keys_env:
|
|
47
|
+
val = os.environ.get(env_name, "")
|
|
48
|
+
if val:
|
|
49
|
+
keys.append(val)
|
|
50
|
+
if not keys:
|
|
51
|
+
single = self.effective_api_key()
|
|
52
|
+
if single:
|
|
53
|
+
keys.append(single)
|
|
54
|
+
return keys
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class ServerConfig:
|
|
59
|
+
host: str = "127.0.0.1"
|
|
60
|
+
port: int = DEFAULT_PORT
|
|
61
|
+
log_level: str = "warning"
|
|
62
|
+
log_dir: Path = field(default_factory=lambda: DEFAULT_DIR / "logs")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class StoreConfig:
|
|
67
|
+
ttl_seconds: int = 600
|
|
68
|
+
max_entries: int = 100
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class CircuitBreakerConfig:
|
|
73
|
+
enabled: bool = True
|
|
74
|
+
failure_threshold: int = 5
|
|
75
|
+
recovery_timeout: float = 30.0
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class CompactionConfig:
|
|
80
|
+
enabled: bool = True
|
|
81
|
+
max_messages: int = 50
|
|
82
|
+
keep_last: int = 20
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class PluginConfig:
|
|
87
|
+
enabled: bool = False
|
|
88
|
+
plugins: list[str] = field(default_factory=list)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class ProxyConfig:
|
|
93
|
+
server: ServerConfig = field(default_factory=ServerConfig)
|
|
94
|
+
provider: ProviderConfig = field(default_factory=ProviderConfig)
|
|
95
|
+
store: StoreConfig = field(default_factory=StoreConfig)
|
|
96
|
+
circuit_breaker: CircuitBreakerConfig = field(default_factory=CircuitBreakerConfig)
|
|
97
|
+
compaction: CompactionConfig = field(default_factory=CompactionConfig)
|
|
98
|
+
plugins: PluginConfig = field(default_factory=PluginConfig)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def load_config(path: Path | None = None) -> ProxyConfig:
|
|
102
|
+
"""Load config from TOML file, falling back to defaults."""
|
|
103
|
+
config_path = path or DEFAULT_CONFIG
|
|
104
|
+
|
|
105
|
+
if not config_path.exists():
|
|
106
|
+
# Try env var overrides for quick setup
|
|
107
|
+
provider = ProviderConfig(
|
|
108
|
+
base_url=os.environ.get("CODEX_PROXY_BASE_URL", ProviderConfig.base_url),
|
|
109
|
+
api_key=os.environ.get("CODEX_PROXY_API_KEY", ""),
|
|
110
|
+
api_key_env=os.environ.get("CODEX_PROXY_API_KEY_ENV", ""),
|
|
111
|
+
default_model=os.environ.get("CODEX_PROXY_MODEL", "glm-5.1"),
|
|
112
|
+
)
|
|
113
|
+
if not provider.api_key and not provider.api_key_env:
|
|
114
|
+
provider.api_key_env = "OPENAI_API_KEY"
|
|
115
|
+
return ProxyConfig(provider=provider)
|
|
116
|
+
|
|
117
|
+
with open(config_path, "rb") as f:
|
|
118
|
+
raw = tomllib.load(f)
|
|
119
|
+
|
|
120
|
+
server_raw = raw.get("server", {})
|
|
121
|
+
provider_raw = raw.get("provider", {})
|
|
122
|
+
store_raw = raw.get("store", {})
|
|
123
|
+
cb_raw = raw.get("circuit_breaker", {})
|
|
124
|
+
comp_raw = raw.get("compaction", {})
|
|
125
|
+
plugins_raw = raw.get("plugins", {})
|
|
126
|
+
|
|
127
|
+
server = ServerConfig(
|
|
128
|
+
host=server_raw.get("host", "127.0.0.1"),
|
|
129
|
+
port=server_raw.get("port", DEFAULT_PORT),
|
|
130
|
+
log_level=server_raw.get("log_level", "warning"),
|
|
131
|
+
log_dir=Path(server_raw.get("log_dir", str(DEFAULT_DIR / "logs"))),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
provider = ProviderConfig(
|
|
135
|
+
name=provider_raw.get("name", "zai"),
|
|
136
|
+
display_name=provider_raw.get("display_name", "Z.AI"),
|
|
137
|
+
base_url=provider_raw.get("base_url", "https://api.z.ai/api/paas/v4"),
|
|
138
|
+
api_key=provider_raw.get("api_key", ""),
|
|
139
|
+
api_key_env=provider_raw.get("api_key_env", ""),
|
|
140
|
+
api_keys=provider_raw.get("api_keys", []),
|
|
141
|
+
api_keys_env=provider_raw.get("api_keys_env", []),
|
|
142
|
+
models=provider_raw.get("models", ["glm-5.1", "glm-5", "glm-4.7"]),
|
|
143
|
+
default_model=provider_raw.get("default_model", "glm-5.1"),
|
|
144
|
+
stream=provider_raw.get("stream", True),
|
|
145
|
+
extra_headers=provider_raw.get("extra_headers", {}),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
store = StoreConfig(
|
|
149
|
+
ttl_seconds=store_raw.get("ttl_seconds", 600),
|
|
150
|
+
max_entries=store_raw.get("max_entries", 100),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
circuit_breaker = CircuitBreakerConfig(
|
|
154
|
+
enabled=cb_raw.get("enabled", True),
|
|
155
|
+
failure_threshold=cb_raw.get("failure_threshold", 5),
|
|
156
|
+
recovery_timeout=cb_raw.get("recovery_timeout", 30.0),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
compaction = CompactionConfig(
|
|
160
|
+
enabled=comp_raw.get("enabled", True),
|
|
161
|
+
max_messages=comp_raw.get("max_messages", 50),
|
|
162
|
+
keep_last=comp_raw.get("keep_last", 20),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
plugins = PluginConfig(
|
|
166
|
+
enabled=plugins_raw.get("enabled", False),
|
|
167
|
+
plugins=plugins_raw.get("plugins", []),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return ProxyConfig(
|
|
171
|
+
server=server, provider=provider, store=store,
|
|
172
|
+
circuit_breaker=circuit_breaker, compaction=compaction,
|
|
173
|
+
plugins=plugins,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def write_example_config(path: Path | None = None) -> Path:
|
|
178
|
+
"""Write an example config file."""
|
|
179
|
+
target = path or DEFAULT_CONFIG
|
|
180
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
181
|
+
|
|
182
|
+
content = """\
|
|
183
|
+
# codex-proxy config — https://github.com/ZakPro/codex-proxy
|
|
184
|
+
|
|
185
|
+
[server]
|
|
186
|
+
host = "127.0.0.1"
|
|
187
|
+
port = 4242
|
|
188
|
+
log_level = "warning" # debug, info, warning, error
|
|
189
|
+
|
|
190
|
+
[store]
|
|
191
|
+
ttl_seconds = 600 # response cache TTL (10 min)
|
|
192
|
+
max_entries = 100 # max cached responses
|
|
193
|
+
|
|
194
|
+
[circuit_breaker]
|
|
195
|
+
enabled = true # protect upstream from cascading failures
|
|
196
|
+
failure_threshold = 5 # consecutive failures before opening circuit
|
|
197
|
+
recovery_timeout = 30.0 # seconds before trying half-open recovery
|
|
198
|
+
|
|
199
|
+
[compaction]
|
|
200
|
+
enabled = true # auto-trim long conversations
|
|
201
|
+
max_messages = 50 # trigger compaction above this count
|
|
202
|
+
keep_last = 20 # recent messages to preserve
|
|
203
|
+
|
|
204
|
+
[plugins]
|
|
205
|
+
enabled = true # enable hook-based middleware plugins
|
|
206
|
+
plugins = [
|
|
207
|
+
"codex_proxy.plugins_builtin.LoggingPlugin", # built-in structured logger
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
[provider]
|
|
211
|
+
# Provider: Z.AI (GLM models)
|
|
212
|
+
name = "zai"
|
|
213
|
+
display_name = "Z.AI"
|
|
214
|
+
base_url = "https://api.z.ai/api/paas/v4"
|
|
215
|
+
api_key = "" # or set api_key_env below
|
|
216
|
+
api_key_env = "OPENAI_API_KEY" # reads from env var
|
|
217
|
+
# api_keys = ["sk-key1", "sk-key2", "sk-key3"] # multi-key rotation
|
|
218
|
+
# api_keys_env = ["OPENAI_API_KEY_1", "OPENAI_API_KEY_2"]
|
|
219
|
+
models = ["glm-5.1", "glm-5", "glm-4.7", "glm-4.6", "glm-4.5-air"]
|
|
220
|
+
default_model = "glm-5.1"
|
|
221
|
+
|
|
222
|
+
# --- Other providers (uncomment one) ---
|
|
223
|
+
|
|
224
|
+
# [provider]
|
|
225
|
+
# name = "groq"
|
|
226
|
+
# display_name = "Groq"
|
|
227
|
+
# base_url = "https://api.groq.com/openai/v1"
|
|
228
|
+
# api_key_env = "GROQ_API_KEY"
|
|
229
|
+
# models = ["llama-4-maverick-17b", "mixtral-8x7b-32768"]
|
|
230
|
+
# default_model = "llama-4-maverick-17b"
|
|
231
|
+
|
|
232
|
+
# [provider]
|
|
233
|
+
# name = "together"
|
|
234
|
+
# display_name = "Together AI"
|
|
235
|
+
# base_url = "https://api.together.xyz/v1"
|
|
236
|
+
# api_key_env = "TOGETHER_API_KEY"
|
|
237
|
+
# models = ["meta-llama/Llama-3.3-70B-Instruct-Turbo"]
|
|
238
|
+
# default_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
|
239
|
+
|
|
240
|
+
# [provider]
|
|
241
|
+
# name = "openrouter"
|
|
242
|
+
# display_name = "OpenRouter"
|
|
243
|
+
# base_url = "https://openrouter.ai/api/v1"
|
|
244
|
+
# api_key_env = "OPENROUTER_API_KEY"
|
|
245
|
+
# models = ["deepseek/deepseek-chat-v3-0324"]
|
|
246
|
+
# default_model = "deepseek/deepseek-chat-v3-0324"
|
|
247
|
+
|
|
248
|
+
# [provider]
|
|
249
|
+
# name = "ollama"
|
|
250
|
+
# display_name = "Ollama (local)"
|
|
251
|
+
# base_url = "http://localhost:11434/v1"
|
|
252
|
+
# api_key = "ollama" # Ollama doesn't need a real key
|
|
253
|
+
# models = ["qwen3:32b", "codellama:34b"]
|
|
254
|
+
# default_model = "qwen3:32b"
|
|
255
|
+
|
|
256
|
+
# [provider]
|
|
257
|
+
# name = "fireworks"
|
|
258
|
+
# display_name = "Fireworks AI"
|
|
259
|
+
# base_url = "https://api.fireworks.ai/inference/v1"
|
|
260
|
+
# api_key_env = "FIREWORKS_API_KEY"
|
|
261
|
+
# models = ["accounts/fireworks/models/llama4-maverick-instruct-basic"]
|
|
262
|
+
# default_model = "accounts/fireworks/models/llama4-maverick-instruct-basic"
|
|
263
|
+
|
|
264
|
+
# [provider]
|
|
265
|
+
# name = "anthropic"
|
|
266
|
+
# display_name = "Anthropic"
|
|
267
|
+
# base_url = "https://api.anthropic.com/v1"
|
|
268
|
+
# api_key_env = "ANTHROPIC_API_KEY"
|
|
269
|
+
# models = ["claude-sonnet-4-20250514"]
|
|
270
|
+
# default_model = "claude-sonnet-4-20250514"
|
|
271
|
+
|
|
272
|
+
# [provider]
|
|
273
|
+
# name = "gemini"
|
|
274
|
+
# display_name = "Google Gemini"
|
|
275
|
+
# base_url = "https://generativelanguage.googleapis.com/v1beta/openai"
|
|
276
|
+
# api_key_env = "GEMINI_API_KEY"
|
|
277
|
+
# models = ["gemini-2.5-flash"]
|
|
278
|
+
# default_model = "gemini-2.5-flash"
|
|
279
|
+
|
|
280
|
+
# [provider]
|
|
281
|
+
# name = "deepseek"
|
|
282
|
+
# display_name = "DeepSeek"
|
|
283
|
+
# base_url = "https://api.deepseek.com/v1"
|
|
284
|
+
# api_key_env = "DEEPSEEK_API_KEY"
|
|
285
|
+
# models = ["deepseek-chat", "deepseek-reasoner"]
|
|
286
|
+
# default_model = "deepseek-chat"
|
|
287
|
+
|
|
288
|
+
# [provider]
|
|
289
|
+
# name = "mistral"
|
|
290
|
+
# display_name = "Mistral AI"
|
|
291
|
+
# base_url = "https://api.mistral.ai/v1"
|
|
292
|
+
# api_key_env = "MISTRAL_API_KEY"
|
|
293
|
+
# models = ["mistral-large-latest"]
|
|
294
|
+
# default_model = "mistral-large-latest"
|
|
295
|
+
|
|
296
|
+
# [provider]
|
|
297
|
+
# name = "cohere"
|
|
298
|
+
# display_name = "Cohere"
|
|
299
|
+
# base_url = "https://api.cohere.com/compatibility/v1"
|
|
300
|
+
# api_key_env = "CO_API_KEY"
|
|
301
|
+
# models = ["command-a-03-2025"]
|
|
302
|
+
# default_model = "command-a-03-2025"
|
|
303
|
+
|
|
304
|
+
# [provider]
|
|
305
|
+
# name = "nvidia"
|
|
306
|
+
# display_name = "NVIDIA NIM"
|
|
307
|
+
# base_url = "https://integrate.api.nvidia.com/v1"
|
|
308
|
+
# api_key_env = "NVIDIA_API_KEY"
|
|
309
|
+
# models = ["nvidia/llama-3.1-nemotron-ultra-253b-v1"]
|
|
310
|
+
# default_model = "nvidia/llama-3.1-nemotron-ultra-253b-v1"
|
|
311
|
+
"""
|
|
312
|
+
target.write_text(content, encoding="utf-8")
|
|
313
|
+
return target
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Multi-key rotation with per-key circuit breakers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from .circuit_breaker import CircuitBreaker
|
|
9
|
+
|
|
10
|
+
_FAIL_CODES = frozenset({401, 403, 429})
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _mask_key(key: str) -> str:
|
|
14
|
+
"""Mask a key for display: show first 3 and last 4 chars."""
|
|
15
|
+
if len(key) <= 7:
|
|
16
|
+
return "***"
|
|
17
|
+
return f"{key[:3]}...{key[-4:]}"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class _KeyEntry:
|
|
22
|
+
key: str
|
|
23
|
+
circuit_breaker: CircuitBreaker
|
|
24
|
+
error_count: int = 0
|
|
25
|
+
success_count: int = 0
|
|
26
|
+
last_used: float = 0.0
|
|
27
|
+
last_error_status: int = 0
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class KeyRotator:
|
|
31
|
+
"""Round-robin key pool with per-key circuit breakers."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, keys: list[str], failure_threshold: int = 3,
|
|
34
|
+
recovery_timeout: float = 60.0) -> None:
|
|
35
|
+
if not keys:
|
|
36
|
+
raise ValueError("KeyRotator requires at least one key")
|
|
37
|
+
self._keys = keys
|
|
38
|
+
self._failure_threshold = failure_threshold
|
|
39
|
+
self._recovery_timeout = recovery_timeout
|
|
40
|
+
self._index = 0
|
|
41
|
+
self._entries: list[_KeyEntry] = []
|
|
42
|
+
self._rebuild_entries(keys)
|
|
43
|
+
|
|
44
|
+
def _rebuild_entries(self, keys: list[str]) -> None:
|
|
45
|
+
self._entries = [
|
|
46
|
+
_KeyEntry(
|
|
47
|
+
key=k,
|
|
48
|
+
circuit_breaker=CircuitBreaker(
|
|
49
|
+
failure_threshold=self._failure_threshold,
|
|
50
|
+
recovery_timeout=self._recovery_timeout,
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
for k in keys
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
def next_key(self) -> str:
|
|
57
|
+
"""Return the best available key (round-robin with skip-bad)."""
|
|
58
|
+
n = len(self._entries)
|
|
59
|
+
for _ in range(n):
|
|
60
|
+
entry = self._entries[self._index % n]
|
|
61
|
+
self._index = (self._index + 1) % n
|
|
62
|
+
if entry.circuit_breaker.can_execute():
|
|
63
|
+
entry.last_used = time.time()
|
|
64
|
+
return entry.key
|
|
65
|
+
# All keys have open circuits — fail-open
|
|
66
|
+
entry = self._entries[0]
|
|
67
|
+
entry.last_used = time.time()
|
|
68
|
+
return entry.key
|
|
69
|
+
|
|
70
|
+
def record_success(self, key: str) -> None:
|
|
71
|
+
entry = self._find(key)
|
|
72
|
+
if entry:
|
|
73
|
+
entry.circuit_breaker.record_success()
|
|
74
|
+
entry.success_count += 1
|
|
75
|
+
|
|
76
|
+
def record_failure(self, key: str, status_code: int) -> None:
|
|
77
|
+
entry = self._find(key)
|
|
78
|
+
if not entry:
|
|
79
|
+
return
|
|
80
|
+
entry.error_count += 1
|
|
81
|
+
entry.last_error_status = status_code
|
|
82
|
+
if status_code in _FAIL_CODES:
|
|
83
|
+
entry.circuit_breaker.record_failure()
|
|
84
|
+
|
|
85
|
+
def _find(self, key: str) -> _KeyEntry | None:
|
|
86
|
+
for e in self._entries:
|
|
87
|
+
if e.key == key:
|
|
88
|
+
return e
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
def get_status(self) -> list[dict]:
|
|
92
|
+
"""Return status dicts for each key."""
|
|
93
|
+
return [
|
|
94
|
+
{
|
|
95
|
+
"key": _mask_key(e.key),
|
|
96
|
+
"state": e.circuit_breaker.get_status().get("state", "UNKNOWN"),
|
|
97
|
+
"errors": e.error_count,
|
|
98
|
+
"successes": e.success_count,
|
|
99
|
+
"last_error": e.last_error_status,
|
|
100
|
+
}
|
|
101
|
+
for e in self._entries
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
def reset(self, keys: list[str]) -> None:
|
|
105
|
+
"""Rebuild the pool with new keys."""
|
|
106
|
+
self._keys = keys
|
|
107
|
+
self._index = 0
|
|
108
|
+
self._rebuild_entries(keys)
|