eggpool 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eggpool/__init__.py +5 -0
- eggpool/__main__.py +7 -0
- eggpool/_share/.env.example +9 -0
- eggpool/_share/config.example.toml +551 -0
- eggpool/accounts/__init__.py +1 -0
- eggpool/accounts/registry.py +181 -0
- eggpool/accounts/state.py +170 -0
- eggpool/api/__init__.py +1 -0
- eggpool/api/chat_completions.py +31 -0
- eggpool/api/errors.py +51 -0
- eggpool/api/messages.py +31 -0
- eggpool/api/models.py +76 -0
- eggpool/api/proxy_request.py +242 -0
- eggpool/api/stats.py +284 -0
- eggpool/app.py +893 -0
- eggpool/auth.py +92 -0
- eggpool/background/__init__.py +147 -0
- eggpool/background/cleanup.py +206 -0
- eggpool/catalog/__init__.py +1 -0
- eggpool/catalog/cache.py +645 -0
- eggpool/catalog/fetcher.py +188 -0
- eggpool/catalog/limits.py +306 -0
- eggpool/catalog/normalizer.py +119 -0
- eggpool/catalog/pricing.py +438 -0
- eggpool/catalog/protocols.py +194 -0
- eggpool/catalog/service.py +920 -0
- eggpool/cli.py +1481 -0
- eggpool/constants.py +34 -0
- eggpool/dashboard/__init__.py +3 -0
- eggpool/dashboard/_resources.py +14 -0
- eggpool/dashboard/escape.py +104 -0
- eggpool/dashboard/render.py +1448 -0
- eggpool/dashboard/routes.py +451 -0
- eggpool/dashboard/static/chart.umd.min.js +20 -0
- eggpool/dashboard/static/dashboard.css +341 -0
- eggpool/dashboard/static/favicon.svg +23 -0
- eggpool/dashboard/theme.py +514 -0
- eggpool/dashboard/themes/Booberry.toml +42 -0
- eggpool/dashboard/themes/Catppuccin Latte.toml +42 -0
- eggpool/dashboard/themes/Catppuccin Macchiato.toml +42 -0
- eggpool/dashboard/themes/Catppuccin Mocha.toml +42 -0
- eggpool/dashboard/themes/Cyber Red.toml +42 -0
- eggpool/dashboard/themes/Cyberpunk.toml +42 -0
- eggpool/dashboard/themes/Dark Green.toml +43 -0
- eggpool/dashboard/themes/Discord (80_ Saturation).toml +50 -0
- eggpool/dashboard/themes/Discord.toml +50 -0
- eggpool/dashboard/themes/Dracula.toml +42 -0
- eggpool/dashboard/themes/Ferra Light.toml +42 -0
- eggpool/dashboard/themes/Flexor Dark.toml +42 -0
- eggpool/dashboard/themes/Gruvbox.toml +42 -0
- eggpool/dashboard/themes/Halcyon Dark.toml +42 -0
- eggpool/dashboard/themes/IntelliJ Light.toml +42 -0
- eggpool/dashboard/themes/Kanagawa.toml +42 -0
- eggpool/dashboard/themes/Macaw Dark.toml +42 -0
- eggpool/dashboard/themes/Macaw Light.toml +42 -0
- eggpool/dashboard/themes/Matrix.toml +42 -0
- eggpool/dashboard/themes/Noctis Lilac.toml +42 -0
- eggpool/dashboard/themes/Nord.toml +42 -0
- eggpool/dashboard/themes/Nostromo Terminal.toml +42 -0
- eggpool/dashboard/themes/One Dark.toml +42 -0
- eggpool/dashboard/themes/Oxocarbon.toml +42 -0
- eggpool/dashboard/themes/Rose Pine Dawn.toml +42 -0
- eggpool/dashboard/themes/Rose Pine Moon.toml +42 -0
- eggpool/dashboard/themes/Rose Pine.toml +42 -0
- eggpool/dashboard/themes/Solarized Dark.toml +42 -0
- eggpool/dashboard/themes/Sonokai.toml +42 -0
- eggpool/dashboard/themes/Tokyo Night Storm.toml +42 -0
- eggpool/dashboard/themes/VESPER.toml +42 -0
- eggpool/dashboard/themes/Zenburn.toml +42 -0
- eggpool/dashboard/themes/acton.toml +42 -0
- eggpool/dashboard/themes/bam.toml +42 -0
- eggpool/dashboard/themes/base16-atelier-forest-light.toml +45 -0
- eggpool/dashboard/themes/berlin.toml +42 -0
- eggpool/dashboard/themes/black but with important highlights.toml +42 -0
- eggpool/dashboard/themes/broc.toml +42 -0
- eggpool/dashboard/themes/cork.toml +42 -0
- eggpool/dashboard/themes/ferra.toml +42 -0
- eggpool/dashboard/themes/forest.toml +42 -0
- eggpool/dashboard/themes/lisbon.toml +42 -0
- eggpool/dashboard/themes/midnight.toml +42 -0
- eggpool/dashboard/themes/oslo.toml +42 -0
- eggpool/dashboard/themes/plum.toml +43 -0
- eggpool/dashboard/themes/portland.toml +42 -0
- eggpool/dashboard/themes/sunset.toml +42 -0
- eggpool/dashboard/themes/tofino.toml +42 -0
- eggpool/dashboard/themes/vanimo.toml +42 -0
- eggpool/dashboard/themes/vik.toml +42 -0
- eggpool/db/__init__.py +0 -0
- eggpool/db/connection.py +401 -0
- eggpool/db/migrations.py +125 -0
- eggpool/db/repositories.py +989 -0
- eggpool/db/schema/0001_initial.sql +78 -0
- eggpool/db/schema/0002_indexes.sql +24 -0
- eggpool/db/schema/0003_request_attempts.sql +20 -0
- eggpool/db/schema/0004_integration_hardening.sql +34 -0
- eggpool/db/schema/0005_price_microdollars.sql +15 -0
- eggpool/db/schema/0006_correct_price_microdollars.sql +12 -0
- eggpool/db/schema/0007_price_cache_rates.sql +5 -0
- eggpool/db/schema/0008_proxy_request_identity.sql +9 -0
- eggpool/db/schema/0009_model_protocol_source.sql +3 -0
- eggpool/db/schema/0010_health_probe.sql +5 -0
- eggpool/db/schema/0011_model_resolution_status.sql +1 -0
- eggpool/db/schema/0012_drop_reservations_estimated_microdollars.sql +6 -0
- eggpool/db/schema/0013_request_attempts_account_id_index.sql +7 -0
- eggpool/db/schema/0014_bandwidth_tracking.sql +2 -0
- eggpool/db/schema/0015_multi_provider.sql +23 -0
- eggpool/db/schema/0016_requests_provider_id.sql +2 -0
- eggpool/db/schema/0017_price_snapshots_provider_id.sql +4 -0
- eggpool/db/schema/0018_provider_pings.sql +16 -0
- eggpool/db/schema/0019_client_ip.sql +4 -0
- eggpool/db/schema/0020_performance_indexes.sql +19 -0
- eggpool/db/schema/0021_provider_model_metadata.sql +19 -0
- eggpool/db/schema/0022_dashboard_indexes.sql +14 -0
- eggpool/db/schema/checksums.json +26 -0
- eggpool/deploy/__init__.py +126 -0
- eggpool/errors.py +123 -0
- eggpool/health/__init__.py +8 -0
- eggpool/health/circuit_breaker.py +146 -0
- eggpool/health/health_manager.py +339 -0
- eggpool/integrations/__init__.py +1 -0
- eggpool/integrations/opencode.py +90 -0
- eggpool/logging.py +50 -0
- eggpool/models/__init__.py +0 -0
- eggpool/models/api.py +32 -0
- eggpool/models/config.py +658 -0
- eggpool/models/database.py +99 -0
- eggpool/models/domain.py +59 -0
- eggpool/onboard.py +111 -0
- eggpool/providers/__init__.py +0 -0
- eggpool/providers/_templates.toml +574 -0
- eggpool/providers/client_pool.py +131 -0
- eggpool/providers/connect.py +988 -0
- eggpool/providers/contract.py +91 -0
- eggpool/providers/pproxy_transport.py +293 -0
- eggpool/proxy/__init__.py +1 -0
- eggpool/proxy/client.py +140 -0
- eggpool/proxy/sse_observer.py +283 -0
- eggpool/proxy/usage.py +114 -0
- eggpool/py.typed +1 -0
- eggpool/quota/__init__.py +13 -0
- eggpool/quota/estimation.py +639 -0
- eggpool/quota/reservation.py +193 -0
- eggpool/quota/scorer.py +215 -0
- eggpool/request/__init__.py +13 -0
- eggpool/request/attempt_finalizer.py +125 -0
- eggpool/request/body.py +70 -0
- eggpool/request/coordinator.py +1638 -0
- eggpool/request/finalizer.py +392 -0
- eggpool/request/limits.py +152 -0
- eggpool/retry/__init__.py +7 -0
- eggpool/retry/classification.py +207 -0
- eggpool/routing/__init__.py +1 -0
- eggpool/routing/eligibility.py +95 -0
- eggpool/routing/provider.py +20 -0
- eggpool/routing/router.py +395 -0
- eggpool/security/__init__.py +7 -0
- eggpool/security/redaction.py +327 -0
- eggpool/stats/__init__.py +37 -0
- eggpool/stats/queries.py +598 -0
- eggpool/stats/service.py +548 -0
- eggpool/toml_edit.py +101 -0
- eggpool-0.1.0.dist-info/METADATA +512 -0
- eggpool-0.1.0.dist-info/RECORD +166 -0
- eggpool-0.1.0.dist-info/WHEEL +4 -0
- eggpool-0.1.0.dist-info/entry_points.txt +2 -0
- eggpool-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Account registry: loads accounts from config, manages runtime state."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
from eggpool.accounts.state import AccountRuntimeState
|
|
10
|
+
from eggpool.errors import ConfigError
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from eggpool.models.config import AppConfig
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def account_config_rows(config: AppConfig) -> list[dict[str, Any]]:
|
|
19
|
+
"""Serialize configured accounts into rows for persistence.
|
|
20
|
+
|
|
21
|
+
Returns a list of dicts with the exact fields consumed by
|
|
22
|
+
:meth:`eggpool.db.repositories.AccountRepository.sync_from_config`.
|
|
23
|
+
Keeping the shape in one place prevents the app lifespan and the
|
|
24
|
+
``models refresh`` CLI command from drifting out of sync.
|
|
25
|
+
"""
|
|
26
|
+
rows: list[dict[str, Any]] = []
|
|
27
|
+
for provider_id, provider in config.providers.items():
|
|
28
|
+
for acct in provider.accounts:
|
|
29
|
+
rows.append(
|
|
30
|
+
{
|
|
31
|
+
"name": acct.name,
|
|
32
|
+
"api_key_env": acct.api_key_env,
|
|
33
|
+
"enabled": acct.enabled,
|
|
34
|
+
"weight": acct.weight,
|
|
35
|
+
"provider_id": provider_id,
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
return rows
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AccountRegistry:
|
|
42
|
+
"""Manages account configurations and their runtime states."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, config: AppConfig) -> None:
|
|
45
|
+
self._config = config
|
|
46
|
+
self._states: dict[str, AccountRuntimeState] = {}
|
|
47
|
+
self._api_keys: dict[str, str] = {}
|
|
48
|
+
self._account_providers: dict[str, str] = {}
|
|
49
|
+
self._initialize()
|
|
50
|
+
|
|
51
|
+
def _initialize(self) -> None:
|
|
52
|
+
"""Load accounts from config and resolve API keys."""
|
|
53
|
+
for provider_id, provider_cfg in self._config.providers.items():
|
|
54
|
+
for acct_config in provider_cfg.accounts:
|
|
55
|
+
api_key = acct_config.api_key or os.environ.get(
|
|
56
|
+
acct_config.api_key_env, ""
|
|
57
|
+
)
|
|
58
|
+
if (
|
|
59
|
+
acct_config.enabled
|
|
60
|
+
and provider_cfg.auth.mode != "none"
|
|
61
|
+
and not api_key
|
|
62
|
+
):
|
|
63
|
+
raise ConfigError(
|
|
64
|
+
f"Account {acct_config.name!r} is enabled but "
|
|
65
|
+
f"no API key available"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
state = AccountRuntimeState(
|
|
69
|
+
name=acct_config.name,
|
|
70
|
+
enabled=acct_config.enabled,
|
|
71
|
+
weight=acct_config.weight,
|
|
72
|
+
routing_priority=provider_cfg.routing_priority,
|
|
73
|
+
)
|
|
74
|
+
self._states[acct_config.name] = state
|
|
75
|
+
self._api_keys[acct_config.name] = api_key
|
|
76
|
+
self._account_providers[acct_config.name] = provider_id
|
|
77
|
+
|
|
78
|
+
if acct_config.enabled:
|
|
79
|
+
logger.info(
|
|
80
|
+
"Loaded account %r (weight=%.2f, priority=%d, provider=%r)",
|
|
81
|
+
acct_config.name,
|
|
82
|
+
acct_config.weight,
|
|
83
|
+
provider_cfg.routing_priority,
|
|
84
|
+
provider_id,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def reload(self, config: AppConfig) -> None:
|
|
88
|
+
"""Reload account configurations from a new config."""
|
|
89
|
+
self._config = config
|
|
90
|
+
self._states.clear()
|
|
91
|
+
self._api_keys.clear()
|
|
92
|
+
self._account_providers.clear()
|
|
93
|
+
self._initialize()
|
|
94
|
+
|
|
95
|
+
def get_state(self, name: str) -> AccountRuntimeState | None:
|
|
96
|
+
"""Get runtime state for an account by name."""
|
|
97
|
+
return self._states.get(name)
|
|
98
|
+
|
|
99
|
+
def get_api_key(self, name: str) -> str | None:
|
|
100
|
+
"""Get the resolved API key for an account."""
|
|
101
|
+
return self._api_keys.get(name)
|
|
102
|
+
|
|
103
|
+
def has_usable_credentials(self, name: str) -> bool:
|
|
104
|
+
"""Return whether an account can satisfy its provider auth contract."""
|
|
105
|
+
provider_id = self._account_providers.get(name)
|
|
106
|
+
if provider_id is None:
|
|
107
|
+
return False
|
|
108
|
+
provider = self._config.providers.get(provider_id)
|
|
109
|
+
if provider is None:
|
|
110
|
+
return False
|
|
111
|
+
return provider.auth.mode == "none" or bool(self._api_keys.get(name))
|
|
112
|
+
|
|
113
|
+
def get_all_states(self) -> list[AccountRuntimeState]:
|
|
114
|
+
"""Get all account runtime states."""
|
|
115
|
+
return list(self._states.values())
|
|
116
|
+
|
|
117
|
+
def get_enabled_states(self) -> list[AccountRuntimeState]:
|
|
118
|
+
"""Get runtime states for enabled accounts."""
|
|
119
|
+
return [s for s in self._states.values() if s.enabled]
|
|
120
|
+
|
|
121
|
+
def get_eligible_states(self) -> list[AccountRuntimeState]:
|
|
122
|
+
"""Get runtime states for eligible accounts."""
|
|
123
|
+
return [s for s in self._states.values() if s.is_eligible()]
|
|
124
|
+
|
|
125
|
+
def get_account_config(self, name: str):
|
|
126
|
+
"""Get the config for an account by name."""
|
|
127
|
+
for acct in self._config.all_accounts():
|
|
128
|
+
if acct.name == name:
|
|
129
|
+
return acct
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def get_account_offsets(self, name: str) -> dict[str, int]:
|
|
133
|
+
"""Get quota offsets for an account."""
|
|
134
|
+
acct = self.get_account_config(name)
|
|
135
|
+
if acct is None:
|
|
136
|
+
return {}
|
|
137
|
+
return {
|
|
138
|
+
"five_hour": acct.five_hour_offset_microdollars,
|
|
139
|
+
"weekly": acct.weekly_offset_microdollars,
|
|
140
|
+
"monthly": acct.monthly_offset_microdollars,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
def get_provider_for_account(self, account_name: str) -> str | None:
|
|
144
|
+
"""Get the provider ID for an account."""
|
|
145
|
+
return self._account_providers.get(account_name)
|
|
146
|
+
|
|
147
|
+
def get_provider_protocols(self, provider_id: str) -> set[str]:
|
|
148
|
+
"""Get protocols configured for a provider."""
|
|
149
|
+
provider = self._config.providers.get(provider_id)
|
|
150
|
+
if provider is None:
|
|
151
|
+
return set()
|
|
152
|
+
return set(provider.protocols)
|
|
153
|
+
|
|
154
|
+
def account_supports_protocol(self, account_name: str, protocol: str) -> bool:
|
|
155
|
+
"""Return whether an account's configured provider supports a protocol."""
|
|
156
|
+
provider_id = self.get_provider_for_account(account_name)
|
|
157
|
+
if provider_id is None:
|
|
158
|
+
return False
|
|
159
|
+
return protocol in self.get_provider_protocols(provider_id)
|
|
160
|
+
|
|
161
|
+
def get_accounts_for_provider(self, provider_id: str) -> list[AccountRuntimeState]:
|
|
162
|
+
"""Get all account states belonging to a provider."""
|
|
163
|
+
return [
|
|
164
|
+
state
|
|
165
|
+
for name, state in self._states.items()
|
|
166
|
+
if self._account_providers.get(name) == provider_id
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
def get_enabled_accounts_for_provider(
|
|
170
|
+
self, provider_id: str
|
|
171
|
+
) -> list[AccountRuntimeState]:
|
|
172
|
+
"""Get enabled account states belonging to a provider."""
|
|
173
|
+
return [
|
|
174
|
+
state
|
|
175
|
+
for name, state in self._states.items()
|
|
176
|
+
if self._account_providers.get(name) == provider_id and state.enabled
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
def get_provider_ids(self) -> list[str]:
|
|
180
|
+
"""Get all unique provider IDs."""
|
|
181
|
+
return sorted(set(self._account_providers.values()))
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Account runtime state and health management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
# Default cooldown durations used when the caller does not supply an
|
|
9
|
+
# explicit value. These mirror the configured defaults in
|
|
10
|
+
# ``RoutingConfig`` and exist only as a safety net for tests or
|
|
11
|
+
# command-line tools that instantiate ``AccountRuntimeState``
|
|
12
|
+
# directly. Production code paths should pass the configured cooldown
|
|
13
|
+
# explicitly so the runtime state stays in lock-step with the
|
|
14
|
+
# authoritative ``HealthManager``.
|
|
15
|
+
DEFAULT_QUOTA_EXHAUSTED_COOLDOWN_SECONDS = 300.0
|
|
16
|
+
DEFAULT_BACKOFF_BASE_SECONDS = 30.0
|
|
17
|
+
DEFAULT_BACKOFF_MAX_SECONDS = 3600.0 # 1 hour max backoff for rate limits
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _failure_backoff(consecutive_failures: int) -> float:
|
|
21
|
+
"""Return capped exponential backoff without constructing huge integers."""
|
|
22
|
+
if consecutive_failures <= 1:
|
|
23
|
+
return DEFAULT_BACKOFF_BASE_SECONDS
|
|
24
|
+
max_doublings = int(
|
|
25
|
+
DEFAULT_BACKOFF_MAX_SECONDS / DEFAULT_BACKOFF_BASE_SECONDS
|
|
26
|
+
).bit_length()
|
|
27
|
+
doublings = min(consecutive_failures - 1, max_doublings)
|
|
28
|
+
return min(
|
|
29
|
+
DEFAULT_BACKOFF_BASE_SECONDS * (2**doublings),
|
|
30
|
+
DEFAULT_BACKOFF_MAX_SECONDS,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class AccountRuntimeState:
|
|
36
|
+
"""Mutable runtime state for an account."""
|
|
37
|
+
|
|
38
|
+
name: str
|
|
39
|
+
enabled: bool = True
|
|
40
|
+
weight: float = 1.0
|
|
41
|
+
routing_priority: int = 0
|
|
42
|
+
|
|
43
|
+
health_state: str = "healthy"
|
|
44
|
+
cooldown_until: float = 0.0
|
|
45
|
+
consecutive_failures: int = 0
|
|
46
|
+
last_success_at: float = 0.0
|
|
47
|
+
last_failure_at: float = 0.0
|
|
48
|
+
last_failure_category: str = ""
|
|
49
|
+
|
|
50
|
+
active_request_count: int = 0
|
|
51
|
+
reserved_microdollars: int = 0
|
|
52
|
+
|
|
53
|
+
# Account-specific model availability: model_id -> available
|
|
54
|
+
model_availability: dict[str, bool] = field( # pyright: ignore[reportUnknownVariableType]
|
|
55
|
+
default_factory=dict
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def refresh_transient_state(self, now: float | None = None) -> None:
|
|
59
|
+
"""Clear transient cooldown status when it expires.
|
|
60
|
+
|
|
61
|
+
Auto-recoverable states ("rate_limited", "cooldown") are
|
|
62
|
+
cleared either when the configured cooldown has elapsed or
|
|
63
|
+
when no cooldown is set. "quota_exhausted" recovers
|
|
64
|
+
automatically when the cooldown expires.
|
|
65
|
+
"""
|
|
66
|
+
if now is None:
|
|
67
|
+
now = time.time()
|
|
68
|
+
if self.health_state == "quota_exhausted":
|
|
69
|
+
if self.cooldown_until > 0 and now >= self.cooldown_until:
|
|
70
|
+
self.health_state = "healthy"
|
|
71
|
+
self.cooldown_until = 0.0
|
|
72
|
+
self.consecutive_failures = 0
|
|
73
|
+
return
|
|
74
|
+
if self.health_state in ("rate_limited", "cooldown") and (
|
|
75
|
+
self.cooldown_until == 0.0 or now >= self.cooldown_until
|
|
76
|
+
):
|
|
77
|
+
self.health_state = "healthy"
|
|
78
|
+
self.cooldown_until = 0.0
|
|
79
|
+
self.consecutive_failures = 0
|
|
80
|
+
|
|
81
|
+
def is_eligible(self) -> bool:
|
|
82
|
+
"""Check if account is eligible for routing."""
|
|
83
|
+
if not self.enabled:
|
|
84
|
+
return False
|
|
85
|
+
self.refresh_transient_state()
|
|
86
|
+
if self.health_state in (
|
|
87
|
+
"authentication_failed",
|
|
88
|
+
"quota_exhausted",
|
|
89
|
+
"cooldown",
|
|
90
|
+
"rate_limited",
|
|
91
|
+
):
|
|
92
|
+
return False
|
|
93
|
+
return self.cooldown_until <= time.time()
|
|
94
|
+
|
|
95
|
+
def record_success(self) -> None:
|
|
96
|
+
"""Record a successful request."""
|
|
97
|
+
self.consecutive_failures = 0
|
|
98
|
+
self.last_success_at = time.time()
|
|
99
|
+
self.last_failure_category = ""
|
|
100
|
+
if self.health_state in ("cooldown", "rate_limited", "quota_exhausted"):
|
|
101
|
+
self.health_state = "healthy"
|
|
102
|
+
self.cooldown_until = 0.0
|
|
103
|
+
|
|
104
|
+
def record_failure(
|
|
105
|
+
self,
|
|
106
|
+
error_class: str,
|
|
107
|
+
*,
|
|
108
|
+
cooldown_seconds: float | None = None,
|
|
109
|
+
rate_limit_retry_after: float | None = None,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Record a failed request and update health state.
|
|
112
|
+
|
|
113
|
+
``cooldown_seconds`` is the configured quota-exhausted cooldown
|
|
114
|
+
duration; the same value used by the authoritative
|
|
115
|
+
``HealthManager`` must be passed here so the two cooldown
|
|
116
|
+
representations cannot diverge. ``rate_limit_retry_after`` is
|
|
117
|
+
the parsed ``Retry-After`` value for 429 responses; when
|
|
118
|
+
supplied, it takes precedence over the exponential backoff
|
|
119
|
+
schedule. Authentication failures remain terminal until
|
|
120
|
+
explicitly reset.
|
|
121
|
+
"""
|
|
122
|
+
self.consecutive_failures += 1
|
|
123
|
+
self.last_failure_at = time.time()
|
|
124
|
+
|
|
125
|
+
if self.last_failure_category and error_class != self.last_failure_category:
|
|
126
|
+
self.consecutive_failures = 1
|
|
127
|
+
self.last_failure_category = error_class
|
|
128
|
+
|
|
129
|
+
if error_class in ("authentication_failed", "authentication"):
|
|
130
|
+
self.health_state = "authentication_failed"
|
|
131
|
+
elif error_class == "quota_exhausted":
|
|
132
|
+
self.health_state = "quota_exhausted"
|
|
133
|
+
duration = (
|
|
134
|
+
cooldown_seconds
|
|
135
|
+
if cooldown_seconds is not None
|
|
136
|
+
else DEFAULT_QUOTA_EXHAUSTED_COOLDOWN_SECONDS
|
|
137
|
+
)
|
|
138
|
+
self.cooldown_until = time.time() + duration
|
|
139
|
+
elif error_class == "rate_limited":
|
|
140
|
+
# Mirror HealthManager.record_rate_limit so both state
|
|
141
|
+
# machines expose the same label for the same event.
|
|
142
|
+
self.health_state = "rate_limited"
|
|
143
|
+
if rate_limit_retry_after is not None:
|
|
144
|
+
self.cooldown_until = time.time() + max(0.0, rate_limit_retry_after)
|
|
145
|
+
else:
|
|
146
|
+
self.cooldown_until = time.time() + _failure_backoff(
|
|
147
|
+
self.consecutive_failures
|
|
148
|
+
)
|
|
149
|
+
elif error_class in (
|
|
150
|
+
"connect_timeout",
|
|
151
|
+
"read_timeout",
|
|
152
|
+
"connection_failure",
|
|
153
|
+
"connection_error",
|
|
154
|
+
):
|
|
155
|
+
self.health_state = "cooldown"
|
|
156
|
+
self.cooldown_until = time.time() + _failure_backoff(
|
|
157
|
+
self.consecutive_failures
|
|
158
|
+
)
|
|
159
|
+
# upstream_server_error, protocol_error, unknown, etc. - no cooldown
|
|
160
|
+
|
|
161
|
+
def reset_health(self) -> None:
|
|
162
|
+
"""Reset health state to healthy."""
|
|
163
|
+
self.health_state = "healthy"
|
|
164
|
+
self.cooldown_until = 0.0
|
|
165
|
+
self.consecutive_failures = 0
|
|
166
|
+
self.last_failure_category = ""
|
|
167
|
+
# Per-model disable map must be cleared alongside the
|
|
168
|
+
# account-level state so reset_health mirrors the same
|
|
169
|
+
# reset semantics as HealthManager.enable_account.
|
|
170
|
+
self.model_availability.clear()
|
eggpool/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API endpoint handlers."""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""OpenAI-compatible ``/v1/chat/completions`` endpoint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from fastapi import Request # noqa: TCH002 — FastAPI needs runtime access
|
|
8
|
+
|
|
9
|
+
from eggpool.api.errors import openai_error_response
|
|
10
|
+
from eggpool.api.proxy_request import (
|
|
11
|
+
ProxyEndpointConfig,
|
|
12
|
+
handle_proxy_request,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from fastapi.responses import Response
|
|
17
|
+
|
|
18
|
+
_ENDPOINT = ProxyEndpointConfig(
|
|
19
|
+
protocol="openai",
|
|
20
|
+
request_label="chat completion",
|
|
21
|
+
error_response=openai_error_response,
|
|
22
|
+
not_found_error_type="invalid_request_error",
|
|
23
|
+
service_error_type="server_error",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def handle_chat_completions(
|
|
28
|
+
request: Request,
|
|
29
|
+
) -> Response:
|
|
30
|
+
"""Handle POST /v1/chat/completions."""
|
|
31
|
+
return await handle_proxy_request(request, _ENDPOINT)
|
eggpool/api/errors.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Protocol-specific error response renderers.
|
|
2
|
+
|
|
3
|
+
Provides OpenAI-style and Anthropic-style error response formats
|
|
4
|
+
so that upstream clients receive protocol-compatible error payloads.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from fastapi.responses import JSONResponse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def openai_error_response(
|
|
13
|
+
status_code: int,
|
|
14
|
+
message: str,
|
|
15
|
+
error_type: str = "invalid_request_error",
|
|
16
|
+
) -> JSONResponse:
|
|
17
|
+
"""Return an OpenAI-compatible error response."""
|
|
18
|
+
return JSONResponse(
|
|
19
|
+
status_code=status_code,
|
|
20
|
+
content={
|
|
21
|
+
"error": {
|
|
22
|
+
"message": message,
|
|
23
|
+
"type": error_type,
|
|
24
|
+
"code": str(status_code),
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def anthropic_error_response(
|
|
31
|
+
status_code: int,
|
|
32
|
+
message: str,
|
|
33
|
+
error_type: str = "invalid_request_error",
|
|
34
|
+
) -> JSONResponse:
|
|
35
|
+
"""Return an Anthropic-compatible error response."""
|
|
36
|
+
return JSONResponse(
|
|
37
|
+
status_code=status_code,
|
|
38
|
+
content={
|
|
39
|
+
"type": "error",
|
|
40
|
+
"error": {
|
|
41
|
+
"type": error_type,
|
|
42
|
+
"message": message,
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
__all__ = [
|
|
49
|
+
"anthropic_error_response",
|
|
50
|
+
"openai_error_response",
|
|
51
|
+
]
|
eggpool/api/messages.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Anthropic-compatible ``/v1/messages`` endpoint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from fastapi import Request # noqa: TCH002 — FastAPI needs runtime access
|
|
8
|
+
|
|
9
|
+
from eggpool.api.errors import anthropic_error_response
|
|
10
|
+
from eggpool.api.proxy_request import (
|
|
11
|
+
ProxyEndpointConfig,
|
|
12
|
+
handle_proxy_request,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from fastapi.responses import Response
|
|
17
|
+
|
|
18
|
+
_ENDPOINT = ProxyEndpointConfig(
|
|
19
|
+
protocol="anthropic",
|
|
20
|
+
request_label="messages request",
|
|
21
|
+
error_response=anthropic_error_response,
|
|
22
|
+
not_found_error_type="not_found_error",
|
|
23
|
+
service_error_type="api_error",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def handle_messages(
|
|
28
|
+
request: Request,
|
|
29
|
+
) -> Response:
|
|
30
|
+
"""Handle POST /v1/messages."""
|
|
31
|
+
return await handle_proxy_request(request, _ENDPOINT)
|
eggpool/api/models.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Serialization helpers for /v1/models responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from collections.abc import Mapping
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def serialize_openai_model(
|
|
12
|
+
model: Mapping[str, Any],
|
|
13
|
+
*,
|
|
14
|
+
routing_priority: int | None = None,
|
|
15
|
+
routing_priority_max: int | None = None,
|
|
16
|
+
providers: list[str] | None = None,
|
|
17
|
+
) -> dict[str, Any]:
|
|
18
|
+
"""Serialize a catalog model entry to OpenAI-compatible model dict.
|
|
19
|
+
|
|
20
|
+
Includes the namespaced ``eggpool`` extension with base model ID,
|
|
21
|
+
provider ID, routing priority (when supplied), and effective limits
|
|
22
|
+
when available.
|
|
23
|
+
|
|
24
|
+
For collapsed entries (no per-provider ``provider_id``), pass the
|
|
25
|
+
``routing_priority_max`` (highest priority across contributing
|
|
26
|
+
providers) and ``providers`` list so clients can see the routing
|
|
27
|
+
topology.
|
|
28
|
+
"""
|
|
29
|
+
result: dict[str, Any] = {
|
|
30
|
+
"id": model["model_id"],
|
|
31
|
+
"object": "model",
|
|
32
|
+
"created": int(model.get("first_seen_at", 0)),
|
|
33
|
+
"owned_by": model.get("provider_id", "opencode"),
|
|
34
|
+
"name": model.get("display_name") or model["model_id"],
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Add namespaced EggPool metadata
|
|
38
|
+
eggpool_meta: dict[str, Any] = {}
|
|
39
|
+
base_model_id = model.get("base_model_id")
|
|
40
|
+
provider_id = model.get("provider_id")
|
|
41
|
+
if base_model_id is not None:
|
|
42
|
+
eggpool_meta["base_model_id"] = base_model_id
|
|
43
|
+
if provider_id is not None:
|
|
44
|
+
eggpool_meta["provider_id"] = provider_id
|
|
45
|
+
if routing_priority is not None:
|
|
46
|
+
eggpool_meta["routing_priority"] = routing_priority
|
|
47
|
+
|
|
48
|
+
# Collapsed-entry metadata: contributing providers and the highest
|
|
49
|
+
# routing priority across them. Both are omitted when the entry is
|
|
50
|
+
# already provider-scoped (the singular `routing_priority` above
|
|
51
|
+
# covers that case).
|
|
52
|
+
if provider_id is None:
|
|
53
|
+
if providers is not None:
|
|
54
|
+
eggpool_meta["providers"] = list(providers)
|
|
55
|
+
if routing_priority_max is not None:
|
|
56
|
+
eggpool_meta["routing_priority_max"] = routing_priority_max
|
|
57
|
+
|
|
58
|
+
effective = model.get("effective_limits", {})
|
|
59
|
+
if effective:
|
|
60
|
+
limits: dict[str, Any] = {}
|
|
61
|
+
ctx = effective.get("context_tokens")
|
|
62
|
+
inp = effective.get("input_tokens")
|
|
63
|
+
out = effective.get("output_tokens")
|
|
64
|
+
if ctx is not None:
|
|
65
|
+
limits["context"] = ctx
|
|
66
|
+
if inp is not None:
|
|
67
|
+
limits["input"] = inp
|
|
68
|
+
if out is not None:
|
|
69
|
+
limits["output"] = out
|
|
70
|
+
if limits:
|
|
71
|
+
eggpool_meta["limits"] = limits
|
|
72
|
+
|
|
73
|
+
if eggpool_meta:
|
|
74
|
+
result["eggpool"] = eggpool_meta
|
|
75
|
+
|
|
76
|
+
return result
|