codex-lb 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +5 -0
- app/cli.py +24 -0
- app/core/__init__.py +0 -0
- app/core/auth/__init__.py +96 -0
- app/core/auth/models.py +49 -0
- app/core/auth/refresh.py +144 -0
- app/core/balancer/__init__.py +19 -0
- app/core/balancer/logic.py +140 -0
- app/core/balancer/types.py +9 -0
- app/core/clients/__init__.py +0 -0
- app/core/clients/http.py +39 -0
- app/core/clients/oauth.py +340 -0
- app/core/clients/proxy.py +265 -0
- app/core/clients/usage.py +143 -0
- app/core/config/__init__.py +0 -0
- app/core/config/settings.py +69 -0
- app/core/crypto.py +37 -0
- app/core/errors.py +73 -0
- app/core/openai/__init__.py +0 -0
- app/core/openai/models.py +122 -0
- app/core/openai/parsing.py +55 -0
- app/core/openai/requests.py +59 -0
- app/core/types.py +4 -0
- app/core/usage/__init__.py +185 -0
- app/core/usage/logs.py +57 -0
- app/core/usage/models.py +35 -0
- app/core/usage/pricing.py +172 -0
- app/core/usage/types.py +95 -0
- app/core/utils/__init__.py +0 -0
- app/core/utils/request_id.py +30 -0
- app/core/utils/retry.py +16 -0
- app/core/utils/sse.py +13 -0
- app/core/utils/time.py +19 -0
- app/db/__init__.py +0 -0
- app/db/models.py +82 -0
- app/db/session.py +44 -0
- app/dependencies.py +123 -0
- app/main.py +124 -0
- app/modules/__init__.py +0 -0
- app/modules/accounts/__init__.py +0 -0
- app/modules/accounts/api.py +81 -0
- app/modules/accounts/repository.py +80 -0
- app/modules/accounts/schemas.py +66 -0
- app/modules/accounts/service.py +211 -0
- app/modules/health/__init__.py +0 -0
- app/modules/health/api.py +10 -0
- app/modules/oauth/__init__.py +0 -0
- app/modules/oauth/api.py +57 -0
- app/modules/oauth/schemas.py +32 -0
- app/modules/oauth/service.py +356 -0
- app/modules/oauth/templates/oauth_success.html +122 -0
- app/modules/proxy/__init__.py +0 -0
- app/modules/proxy/api.py +76 -0
- app/modules/proxy/auth_manager.py +51 -0
- app/modules/proxy/load_balancer.py +208 -0
- app/modules/proxy/schemas.py +85 -0
- app/modules/proxy/service.py +707 -0
- app/modules/proxy/types.py +37 -0
- app/modules/proxy/usage_updater.py +147 -0
- app/modules/request_logs/__init__.py +0 -0
- app/modules/request_logs/api.py +31 -0
- app/modules/request_logs/repository.py +86 -0
- app/modules/request_logs/schemas.py +25 -0
- app/modules/request_logs/service.py +77 -0
- app/modules/shared/__init__.py +0 -0
- app/modules/shared/schemas.py +8 -0
- app/modules/usage/__init__.py +0 -0
- app/modules/usage/api.py +31 -0
- app/modules/usage/repository.py +113 -0
- app/modules/usage/schemas.py +62 -0
- app/modules/usage/service.py +246 -0
- app/static/7.css +1336 -0
- app/static/index.css +543 -0
- app/static/index.html +457 -0
- app/static/index.js +1898 -0
- codex_lb-0.1.2.dist-info/METADATA +108 -0
- codex_lb-0.1.2.dist-info/RECORD +80 -0
- codex_lb-0.1.2.dist-info/WHEEL +4 -0
- codex_lb-0.1.2.dist-info/entry_points.txt +2 -0
- codex_lb-0.1.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,707 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from datetime import timedelta
|
|
5
|
+
from typing import AsyncIterator, Iterable, Mapping
|
|
6
|
+
|
|
7
|
+
from pydantic import ValidationError
|
|
8
|
+
|
|
9
|
+
from app.core import usage as usage_core
|
|
10
|
+
from app.core.auth.refresh import RefreshError
|
|
11
|
+
from app.core.balancer import PERMANENT_FAILURE_CODES
|
|
12
|
+
from app.core.balancer.types import UpstreamError
|
|
13
|
+
from app.core.clients.proxy import ProxyResponseError, filter_inbound_headers
|
|
14
|
+
from app.core.clients.proxy import compact_responses as core_compact_responses
|
|
15
|
+
from app.core.clients.proxy import stream_responses as core_stream_responses
|
|
16
|
+
from app.core.crypto import TokenEncryptor
|
|
17
|
+
from app.core.errors import OpenAIErrorDetail, OpenAIErrorEnvelope, openai_error, response_failed_event
|
|
18
|
+
from app.core.openai.models import OpenAIError, OpenAIResponsePayload
|
|
19
|
+
from app.core.openai.parsing import parse_sse_event
|
|
20
|
+
from app.core.openai.requests import ResponsesCompactRequest, ResponsesRequest
|
|
21
|
+
from app.core.usage.types import UsageWindowRow, UsageWindowSummary
|
|
22
|
+
from app.core.utils.request_id import ensure_request_id
|
|
23
|
+
from app.core.utils.sse import format_sse_event
|
|
24
|
+
from app.core.utils.time import utcnow
|
|
25
|
+
from app.db.models import Account, AccountStatus, UsageHistory
|
|
26
|
+
from app.modules.accounts.repository import AccountsRepository
|
|
27
|
+
from app.modules.proxy.auth_manager import AuthManager
|
|
28
|
+
from app.modules.proxy.load_balancer import LoadBalancer
|
|
29
|
+
from app.modules.proxy.types import (
|
|
30
|
+
CreditStatusDetailsData,
|
|
31
|
+
RateLimitStatusDetailsData,
|
|
32
|
+
RateLimitStatusPayloadData,
|
|
33
|
+
RateLimitWindowSnapshotData,
|
|
34
|
+
)
|
|
35
|
+
from app.modules.proxy.usage_updater import UsageUpdater
|
|
36
|
+
from app.modules.request_logs.repository import RequestLogsRepository
|
|
37
|
+
from app.modules.usage.repository import UsageRepository
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ProxyService:
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
accounts_repo: AccountsRepository,
|
|
44
|
+
usage_repo: UsageRepository,
|
|
45
|
+
logs_repo: RequestLogsRepository,
|
|
46
|
+
) -> None:
|
|
47
|
+
self._accounts_repo = accounts_repo
|
|
48
|
+
self._usage_repo = usage_repo
|
|
49
|
+
self._logs_repo = logs_repo
|
|
50
|
+
self._encryptor = TokenEncryptor()
|
|
51
|
+
self._auth_manager = AuthManager(accounts_repo)
|
|
52
|
+
self._load_balancer = LoadBalancer(accounts_repo, usage_repo)
|
|
53
|
+
self._usage_updater = UsageUpdater(usage_repo, accounts_repo)
|
|
54
|
+
|
|
55
|
+
def stream_responses(
|
|
56
|
+
self,
|
|
57
|
+
payload: ResponsesRequest,
|
|
58
|
+
headers: Mapping[str, str],
|
|
59
|
+
*,
|
|
60
|
+
propagate_http_errors: bool = False,
|
|
61
|
+
) -> AsyncIterator[str]:
|
|
62
|
+
filtered = filter_inbound_headers(headers)
|
|
63
|
+
return self._stream_with_retry(
|
|
64
|
+
payload,
|
|
65
|
+
filtered,
|
|
66
|
+
propagate_http_errors=propagate_http_errors,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
async def compact_responses(
|
|
70
|
+
self,
|
|
71
|
+
payload: ResponsesCompactRequest,
|
|
72
|
+
headers: Mapping[str, str],
|
|
73
|
+
) -> OpenAIResponsePayload:
|
|
74
|
+
filtered = filter_inbound_headers(headers)
|
|
75
|
+
selection = await self._load_balancer.select_account()
|
|
76
|
+
account = selection.account
|
|
77
|
+
if not account:
|
|
78
|
+
raise ProxyResponseError(
|
|
79
|
+
503,
|
|
80
|
+
openai_error("no_accounts", selection.error_message or "No active accounts available"),
|
|
81
|
+
)
|
|
82
|
+
account = await self._ensure_fresh(account)
|
|
83
|
+
account_id = _header_account_id(account.id)
|
|
84
|
+
|
|
85
|
+
async def _call_compact(target: Account) -> OpenAIResponsePayload:
|
|
86
|
+
access_token = self._encryptor.decrypt(target.access_token_encrypted)
|
|
87
|
+
return await core_compact_responses(payload, filtered, access_token, account_id)
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
return await _call_compact(account)
|
|
91
|
+
except ProxyResponseError as exc:
|
|
92
|
+
if exc.status_code != 401:
|
|
93
|
+
await self._handle_proxy_error(account, exc)
|
|
94
|
+
raise
|
|
95
|
+
try:
|
|
96
|
+
account = await self._ensure_fresh(account, force=True)
|
|
97
|
+
except RefreshError as refresh_exc:
|
|
98
|
+
if refresh_exc.is_permanent:
|
|
99
|
+
await self._load_balancer.mark_permanent_failure(account, refresh_exc.code)
|
|
100
|
+
raise exc
|
|
101
|
+
try:
|
|
102
|
+
return await _call_compact(account)
|
|
103
|
+
except ProxyResponseError as exc:
|
|
104
|
+
await self._handle_proxy_error(account, exc)
|
|
105
|
+
raise
|
|
106
|
+
|
|
107
|
+
async def rate_limit_headers(self) -> dict[str, str]:
|
|
108
|
+
now = utcnow()
|
|
109
|
+
accounts = await self._accounts_repo.list_accounts()
|
|
110
|
+
account_map = {account.id: account for account in accounts}
|
|
111
|
+
|
|
112
|
+
headers: dict[str, str] = {}
|
|
113
|
+
primary_minutes = await self._usage_repo.latest_window_minutes("primary")
|
|
114
|
+
if primary_minutes is None:
|
|
115
|
+
primary_minutes = usage_core.default_window_minutes("primary")
|
|
116
|
+
if primary_minutes:
|
|
117
|
+
primary_rows = await self._usage_repo.aggregate_since(
|
|
118
|
+
now - timedelta(minutes=primary_minutes),
|
|
119
|
+
window="primary",
|
|
120
|
+
)
|
|
121
|
+
if primary_rows:
|
|
122
|
+
summary = usage_core.summarize_usage_window(
|
|
123
|
+
[row.to_window_row() for row in primary_rows],
|
|
124
|
+
account_map,
|
|
125
|
+
"primary",
|
|
126
|
+
)
|
|
127
|
+
headers.update(_rate_limit_headers("primary", summary))
|
|
128
|
+
|
|
129
|
+
secondary_minutes = await self._usage_repo.latest_window_minutes("secondary")
|
|
130
|
+
if secondary_minutes is None:
|
|
131
|
+
secondary_minutes = usage_core.default_window_minutes("secondary")
|
|
132
|
+
if secondary_minutes:
|
|
133
|
+
secondary_rows = await self._usage_repo.aggregate_since(
|
|
134
|
+
now - timedelta(minutes=secondary_minutes),
|
|
135
|
+
window="secondary",
|
|
136
|
+
)
|
|
137
|
+
if secondary_rows:
|
|
138
|
+
summary = usage_core.summarize_usage_window(
|
|
139
|
+
[row.to_window_row() for row in secondary_rows],
|
|
140
|
+
account_map,
|
|
141
|
+
"secondary",
|
|
142
|
+
)
|
|
143
|
+
headers.update(_rate_limit_headers("secondary", summary))
|
|
144
|
+
|
|
145
|
+
latest_usage = await self._usage_repo.latest_by_account()
|
|
146
|
+
headers.update(_credits_headers(latest_usage.values()))
|
|
147
|
+
return headers
|
|
148
|
+
|
|
149
|
+
async def get_rate_limit_payload(self) -> RateLimitStatusPayloadData:
|
|
150
|
+
accounts = await self._accounts_repo.list_accounts()
|
|
151
|
+
await self._refresh_usage(accounts)
|
|
152
|
+
selected_accounts = _select_accounts_for_limits(accounts)
|
|
153
|
+
if not selected_accounts:
|
|
154
|
+
return RateLimitStatusPayloadData(plan_type="guest")
|
|
155
|
+
|
|
156
|
+
account_map = {account.id: account for account in selected_accounts}
|
|
157
|
+
primary_rows = await self._latest_usage_rows(account_map, "primary")
|
|
158
|
+
secondary_rows = await self._latest_usage_rows(account_map, "secondary")
|
|
159
|
+
|
|
160
|
+
primary_summary = _summarize_window(primary_rows, account_map, "primary")
|
|
161
|
+
secondary_summary = _summarize_window(secondary_rows, account_map, "secondary")
|
|
162
|
+
|
|
163
|
+
now_epoch = int(time.time())
|
|
164
|
+
primary_window = _window_snapshot(primary_summary, primary_rows, "primary", now_epoch)
|
|
165
|
+
secondary_window = _window_snapshot(secondary_summary, secondary_rows, "secondary", now_epoch)
|
|
166
|
+
|
|
167
|
+
return RateLimitStatusPayloadData(
|
|
168
|
+
plan_type=_plan_type_for_accounts(selected_accounts),
|
|
169
|
+
rate_limit=_rate_limit_details(primary_window, secondary_window),
|
|
170
|
+
credits=_credits_snapshot(await self._latest_usage_entries(account_map)),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
async def _stream_with_retry(
|
|
174
|
+
self,
|
|
175
|
+
payload: ResponsesRequest,
|
|
176
|
+
headers: Mapping[str, str],
|
|
177
|
+
*,
|
|
178
|
+
propagate_http_errors: bool,
|
|
179
|
+
) -> AsyncIterator[str]:
|
|
180
|
+
request_id = ensure_request_id()
|
|
181
|
+
max_attempts = 3
|
|
182
|
+
for attempt in range(max_attempts):
|
|
183
|
+
selection = await self._load_balancer.select_account()
|
|
184
|
+
account = selection.account
|
|
185
|
+
if not account:
|
|
186
|
+
event = response_failed_event(
|
|
187
|
+
"no_accounts",
|
|
188
|
+
selection.error_message or "No active accounts available",
|
|
189
|
+
response_id=request_id,
|
|
190
|
+
)
|
|
191
|
+
yield format_sse_event(event)
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
account = await self._ensure_fresh(account)
|
|
196
|
+
async for line in self._stream_once(
|
|
197
|
+
account,
|
|
198
|
+
payload,
|
|
199
|
+
headers,
|
|
200
|
+
request_id,
|
|
201
|
+
attempt < max_attempts - 1,
|
|
202
|
+
):
|
|
203
|
+
yield line
|
|
204
|
+
return
|
|
205
|
+
except _RetryableStreamError as exc:
|
|
206
|
+
await self._handle_stream_error(account, exc.error, exc.code)
|
|
207
|
+
continue
|
|
208
|
+
except ProxyResponseError as exc:
|
|
209
|
+
if exc.status_code == 401:
|
|
210
|
+
try:
|
|
211
|
+
account = await self._ensure_fresh(account, force=True)
|
|
212
|
+
except RefreshError as refresh_exc:
|
|
213
|
+
if refresh_exc.is_permanent:
|
|
214
|
+
await self._load_balancer.mark_permanent_failure(account, refresh_exc.code)
|
|
215
|
+
continue
|
|
216
|
+
async for line in self._stream_once(account, payload, headers, request_id, False):
|
|
217
|
+
yield line
|
|
218
|
+
return
|
|
219
|
+
error = _parse_openai_error(exc.payload)
|
|
220
|
+
error_code = _normalize_error_code(error.code if error else None, error.type if error else None)
|
|
221
|
+
error_message = error.message if error else None
|
|
222
|
+
error_type = error.type if error else None
|
|
223
|
+
error_param = error.param if error else None
|
|
224
|
+
await self._handle_stream_error(
|
|
225
|
+
account,
|
|
226
|
+
_upstream_error_from_openai(error),
|
|
227
|
+
error_code,
|
|
228
|
+
)
|
|
229
|
+
if propagate_http_errors:
|
|
230
|
+
raise
|
|
231
|
+
event = response_failed_event(
|
|
232
|
+
error_code,
|
|
233
|
+
error_message or "Upstream error",
|
|
234
|
+
error_type=error_type or "server_error",
|
|
235
|
+
response_id=request_id,
|
|
236
|
+
error_param=error_param,
|
|
237
|
+
)
|
|
238
|
+
_apply_error_metadata(event["response"]["error"], error)
|
|
239
|
+
yield format_sse_event(event)
|
|
240
|
+
return
|
|
241
|
+
except RefreshError as exc:
|
|
242
|
+
if exc.is_permanent:
|
|
243
|
+
await self._load_balancer.mark_permanent_failure(account, exc.code)
|
|
244
|
+
continue
|
|
245
|
+
except Exception:
|
|
246
|
+
await self._load_balancer.record_error(account)
|
|
247
|
+
if attempt == max_attempts - 1:
|
|
248
|
+
event = response_failed_event(
|
|
249
|
+
"upstream_error",
|
|
250
|
+
"Proxy streaming failed",
|
|
251
|
+
response_id=request_id,
|
|
252
|
+
)
|
|
253
|
+
yield format_sse_event(event)
|
|
254
|
+
return
|
|
255
|
+
event = response_failed_event(
|
|
256
|
+
"no_accounts",
|
|
257
|
+
"No available accounts after retries",
|
|
258
|
+
response_id=request_id,
|
|
259
|
+
)
|
|
260
|
+
yield format_sse_event(event)
|
|
261
|
+
|
|
262
|
+
async def _stream_once(
|
|
263
|
+
self,
|
|
264
|
+
account: Account,
|
|
265
|
+
payload: ResponsesRequest,
|
|
266
|
+
headers: Mapping[str, str],
|
|
267
|
+
request_id: str,
|
|
268
|
+
allow_retry: bool,
|
|
269
|
+
) -> AsyncIterator[str]:
|
|
270
|
+
access_token = self._encryptor.decrypt(account.access_token_encrypted)
|
|
271
|
+
account_id = _header_account_id(account.id)
|
|
272
|
+
model = payload.model
|
|
273
|
+
start = time.monotonic()
|
|
274
|
+
status = "success"
|
|
275
|
+
error_code = None
|
|
276
|
+
error_message = None
|
|
277
|
+
usage = None
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
stream = core_stream_responses(
|
|
281
|
+
payload,
|
|
282
|
+
headers,
|
|
283
|
+
access_token,
|
|
284
|
+
account_id,
|
|
285
|
+
raise_for_status=True,
|
|
286
|
+
)
|
|
287
|
+
iterator = stream.__aiter__()
|
|
288
|
+
try:
|
|
289
|
+
first = await iterator.__anext__()
|
|
290
|
+
except StopAsyncIteration:
|
|
291
|
+
return
|
|
292
|
+
event = parse_sse_event(first)
|
|
293
|
+
if event and event.type in ("response.failed", "error"):
|
|
294
|
+
error = event.response.error if event.type == "response.failed" else event.error
|
|
295
|
+
code = _normalize_error_code(
|
|
296
|
+
error.code if error else None,
|
|
297
|
+
error.type if error else None,
|
|
298
|
+
)
|
|
299
|
+
status = "error"
|
|
300
|
+
error_code = code
|
|
301
|
+
error_message = error.message if error else None
|
|
302
|
+
if allow_retry:
|
|
303
|
+
error_payload = _upstream_error_from_openai(error)
|
|
304
|
+
raise _RetryableStreamError(code, error_payload)
|
|
305
|
+
|
|
306
|
+
if event and event.type == "response.completed":
|
|
307
|
+
usage = event.response.usage if event.response else None
|
|
308
|
+
yield first
|
|
309
|
+
|
|
310
|
+
async for line in iterator:
|
|
311
|
+
event = parse_sse_event(line)
|
|
312
|
+
if event:
|
|
313
|
+
event_type = event.type
|
|
314
|
+
if event_type in ("response.failed", "error"):
|
|
315
|
+
status = "error"
|
|
316
|
+
error = event.response.error if event_type == "response.failed" else event.error
|
|
317
|
+
error_code = _normalize_error_code(
|
|
318
|
+
error.code if error else None,
|
|
319
|
+
error.type if error else None,
|
|
320
|
+
)
|
|
321
|
+
error_message = error.message if error else None
|
|
322
|
+
if event_type == "response.completed":
|
|
323
|
+
usage = event.response.usage if event.response else None
|
|
324
|
+
yield line
|
|
325
|
+
except ProxyResponseError as exc:
|
|
326
|
+
error = _parse_openai_error(exc.payload)
|
|
327
|
+
status = "error"
|
|
328
|
+
error_code = _normalize_error_code(
|
|
329
|
+
error.code if error else None,
|
|
330
|
+
error.type if error else None,
|
|
331
|
+
)
|
|
332
|
+
error_message = error.message if error else None
|
|
333
|
+
raise
|
|
334
|
+
finally:
|
|
335
|
+
latency_ms = int((time.monotonic() - start) * 1000)
|
|
336
|
+
input_tokens = usage.input_tokens if usage else None
|
|
337
|
+
output_tokens = usage.output_tokens if usage else None
|
|
338
|
+
cached_input_tokens = (
|
|
339
|
+
usage.input_tokens_details.cached_tokens if usage and usage.input_tokens_details else None
|
|
340
|
+
)
|
|
341
|
+
reasoning_tokens = (
|
|
342
|
+
usage.output_tokens_details.reasoning_tokens if usage and usage.output_tokens_details else None
|
|
343
|
+
)
|
|
344
|
+
await self._logs_repo.add_log(
|
|
345
|
+
account_id=account.id,
|
|
346
|
+
request_id=request_id,
|
|
347
|
+
model=model,
|
|
348
|
+
input_tokens=input_tokens,
|
|
349
|
+
output_tokens=output_tokens,
|
|
350
|
+
cached_input_tokens=cached_input_tokens,
|
|
351
|
+
reasoning_tokens=reasoning_tokens,
|
|
352
|
+
latency_ms=latency_ms,
|
|
353
|
+
status=status,
|
|
354
|
+
error_code=error_code,
|
|
355
|
+
error_message=error_message,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
async def _refresh_usage(self, accounts: list[Account]) -> None:
|
|
359
|
+
latest_usage = await self._usage_repo.latest_by_account(window="primary")
|
|
360
|
+
await self._usage_updater.refresh_accounts(accounts, latest_usage)
|
|
361
|
+
|
|
362
|
+
async def _latest_usage_rows(
|
|
363
|
+
self,
|
|
364
|
+
account_map: dict[str, Account],
|
|
365
|
+
window: str,
|
|
366
|
+
) -> list[UsageWindowRow]:
|
|
367
|
+
if not account_map:
|
|
368
|
+
return []
|
|
369
|
+
latest = await self._usage_repo.latest_by_account(window=window)
|
|
370
|
+
return [
|
|
371
|
+
UsageWindowRow(
|
|
372
|
+
account_id=entry.account_id,
|
|
373
|
+
used_percent=entry.used_percent,
|
|
374
|
+
reset_at=entry.reset_at,
|
|
375
|
+
window_minutes=entry.window_minutes,
|
|
376
|
+
)
|
|
377
|
+
for entry in latest.values()
|
|
378
|
+
if entry.account_id in account_map
|
|
379
|
+
]
|
|
380
|
+
|
|
381
|
+
async def _latest_usage_entries(
|
|
382
|
+
self,
|
|
383
|
+
account_map: dict[str, Account],
|
|
384
|
+
) -> list[UsageHistory]:
|
|
385
|
+
if not account_map:
|
|
386
|
+
return []
|
|
387
|
+
latest = await self._usage_repo.latest_by_account()
|
|
388
|
+
return [entry for entry in latest.values() if entry.account_id in account_map]
|
|
389
|
+
|
|
390
|
+
async def _ensure_fresh(self, account: Account, *, force: bool = False) -> Account:
|
|
391
|
+
return await self._auth_manager.ensure_fresh(account, force=force)
|
|
392
|
+
|
|
393
|
+
async def _handle_proxy_error(self, account: Account, exc: ProxyResponseError) -> None:
|
|
394
|
+
error = _parse_openai_error(exc.payload)
|
|
395
|
+
code = _normalize_error_code(
|
|
396
|
+
error.code if error else None,
|
|
397
|
+
error.type if error else None,
|
|
398
|
+
)
|
|
399
|
+
await self._handle_stream_error(account, _upstream_error_from_openai(error), code)
|
|
400
|
+
|
|
401
|
+
async def _handle_stream_error(self, account: Account, error: UpstreamError, code: str) -> None:
|
|
402
|
+
if code in {"rate_limit_exceeded", "usage_limit_reached"}:
|
|
403
|
+
await self._load_balancer.mark_rate_limit(account, error)
|
|
404
|
+
return
|
|
405
|
+
if code in {"insufficient_quota", "usage_not_included", "quota_exceeded"}:
|
|
406
|
+
await self._load_balancer.mark_quota_exceeded(account, error)
|
|
407
|
+
return
|
|
408
|
+
if code in PERMANENT_FAILURE_CODES:
|
|
409
|
+
await self._load_balancer.mark_permanent_failure(account, code)
|
|
410
|
+
return
|
|
411
|
+
await self._load_balancer.record_error(account)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def _header_account_id(account_id: str | None) -> str | None:
|
|
415
|
+
if not account_id:
|
|
416
|
+
return None
|
|
417
|
+
if account_id.startswith(("email_", "local_")):
|
|
418
|
+
return None
|
|
419
|
+
return account_id
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
KNOWN_PLAN_TYPES = {
|
|
423
|
+
"guest",
|
|
424
|
+
"free",
|
|
425
|
+
"go",
|
|
426
|
+
"plus",
|
|
427
|
+
"pro",
|
|
428
|
+
"free_workspace",
|
|
429
|
+
"team",
|
|
430
|
+
"business",
|
|
431
|
+
"education",
|
|
432
|
+
"quorum",
|
|
433
|
+
"k12",
|
|
434
|
+
"enterprise",
|
|
435
|
+
"edu",
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
PLAN_TYPE_PRIORITY = (
|
|
439
|
+
"enterprise",
|
|
440
|
+
"business",
|
|
441
|
+
"team",
|
|
442
|
+
"pro",
|
|
443
|
+
"plus",
|
|
444
|
+
"education",
|
|
445
|
+
"edu",
|
|
446
|
+
"free_workspace",
|
|
447
|
+
"free",
|
|
448
|
+
"go",
|
|
449
|
+
"guest",
|
|
450
|
+
"quorum",
|
|
451
|
+
"k12",
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _select_accounts_for_limits(accounts: Iterable[Account]) -> list[Account]:
|
|
456
|
+
return [account for account in accounts if account.status not in (AccountStatus.DEACTIVATED, AccountStatus.PAUSED)]
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _summarize_window(
|
|
460
|
+
rows: list[UsageWindowRow],
|
|
461
|
+
account_map: dict[str, Account],
|
|
462
|
+
window: str,
|
|
463
|
+
) -> UsageWindowSummary | None:
|
|
464
|
+
if not rows:
|
|
465
|
+
return None
|
|
466
|
+
return usage_core.summarize_usage_window(rows, account_map, window)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _window_snapshot(
|
|
470
|
+
summary: UsageWindowSummary | None,
|
|
471
|
+
rows: list[UsageWindowRow],
|
|
472
|
+
window: str,
|
|
473
|
+
now_epoch: int,
|
|
474
|
+
) -> RateLimitWindowSnapshotData | None:
|
|
475
|
+
if summary is None:
|
|
476
|
+
return None
|
|
477
|
+
|
|
478
|
+
used_percent = _normalize_used_percent(summary.used_percent, rows)
|
|
479
|
+
if used_percent is None:
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
reset_at = summary.reset_at
|
|
483
|
+
if reset_at is None:
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
window_minutes = summary.window_minutes or usage_core.default_window_minutes(window)
|
|
487
|
+
if not window_minutes:
|
|
488
|
+
return None
|
|
489
|
+
|
|
490
|
+
limit_window_seconds = int(window_minutes * 60)
|
|
491
|
+
reset_after_seconds = max(0, int(reset_at) - now_epoch)
|
|
492
|
+
|
|
493
|
+
return RateLimitWindowSnapshotData(
|
|
494
|
+
used_percent=_percent_to_int(used_percent),
|
|
495
|
+
limit_window_seconds=limit_window_seconds,
|
|
496
|
+
reset_after_seconds=reset_after_seconds,
|
|
497
|
+
reset_at=int(reset_at),
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _normalize_used_percent(
|
|
502
|
+
value: float | None,
|
|
503
|
+
rows: Iterable[UsageWindowRow],
|
|
504
|
+
) -> float | None:
|
|
505
|
+
if value is not None:
|
|
506
|
+
return value
|
|
507
|
+
values = [row.used_percent for row in rows if row.used_percent is not None]
|
|
508
|
+
if not values:
|
|
509
|
+
return None
|
|
510
|
+
return sum(values) / len(values)
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _percent_to_int(value: float) -> int:
|
|
514
|
+
bounded = max(0.0, min(100.0, value))
|
|
515
|
+
return int(bounded)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def _rate_limit_details(
|
|
519
|
+
primary: RateLimitWindowSnapshotData | None,
|
|
520
|
+
secondary: RateLimitWindowSnapshotData | None,
|
|
521
|
+
) -> RateLimitStatusDetailsData | None:
|
|
522
|
+
if not primary and not secondary:
|
|
523
|
+
return None
|
|
524
|
+
used_percents = [window.used_percent for window in (primary, secondary) if window]
|
|
525
|
+
limit_reached = any(used >= 100 for used in used_percents)
|
|
526
|
+
return RateLimitStatusDetailsData(
|
|
527
|
+
allowed=not limit_reached,
|
|
528
|
+
limit_reached=limit_reached,
|
|
529
|
+
primary_window=primary,
|
|
530
|
+
secondary_window=secondary,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _aggregate_credits(entries: Iterable[UsageHistory]) -> tuple[bool, bool, float] | None:
|
|
535
|
+
has_data = False
|
|
536
|
+
has_credits = False
|
|
537
|
+
unlimited = False
|
|
538
|
+
balance_total = 0.0
|
|
539
|
+
|
|
540
|
+
for entry in entries:
|
|
541
|
+
credits_has = entry.credits_has
|
|
542
|
+
credits_unlimited = entry.credits_unlimited
|
|
543
|
+
credits_balance = entry.credits_balance
|
|
544
|
+
if credits_has is None and credits_unlimited is None and credits_balance is None:
|
|
545
|
+
continue
|
|
546
|
+
has_data = True
|
|
547
|
+
if credits_has is True:
|
|
548
|
+
has_credits = True
|
|
549
|
+
if credits_unlimited is True:
|
|
550
|
+
unlimited = True
|
|
551
|
+
if credits_balance is not None and not credits_unlimited:
|
|
552
|
+
try:
|
|
553
|
+
balance_total += float(credits_balance)
|
|
554
|
+
except (TypeError, ValueError):
|
|
555
|
+
continue
|
|
556
|
+
|
|
557
|
+
if not has_data:
|
|
558
|
+
return None
|
|
559
|
+
if unlimited:
|
|
560
|
+
has_credits = True
|
|
561
|
+
return has_credits, unlimited, balance_total
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _credits_snapshot(entries: Iterable[UsageHistory]) -> CreditStatusDetailsData | None:
|
|
565
|
+
aggregate = _aggregate_credits(entries)
|
|
566
|
+
if aggregate is None:
|
|
567
|
+
return None
|
|
568
|
+
has_credits, unlimited, balance_total = aggregate
|
|
569
|
+
balance_value = str(round(balance_total, 2))
|
|
570
|
+
return CreditStatusDetailsData(
|
|
571
|
+
has_credits=has_credits,
|
|
572
|
+
unlimited=unlimited,
|
|
573
|
+
balance=balance_value,
|
|
574
|
+
approx_local_messages=None,
|
|
575
|
+
approx_cloud_messages=None,
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def _plan_type_for_accounts(accounts: Iterable[Account]) -> str:
|
|
580
|
+
normalized = [_normalize_plan_type(account.plan_type) for account in accounts]
|
|
581
|
+
filtered = [plan for plan in normalized if plan is not None]
|
|
582
|
+
if not filtered:
|
|
583
|
+
return "guest"
|
|
584
|
+
unique = set(filtered)
|
|
585
|
+
if len(unique) == 1:
|
|
586
|
+
return filtered[0]
|
|
587
|
+
for plan in PLAN_TYPE_PRIORITY:
|
|
588
|
+
if plan in unique:
|
|
589
|
+
return plan
|
|
590
|
+
return "guest"
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def _normalize_plan_type(value: str | None) -> str | None:
|
|
594
|
+
if not value:
|
|
595
|
+
return None
|
|
596
|
+
normalized = value.strip().lower()
|
|
597
|
+
if normalized not in KNOWN_PLAN_TYPES:
|
|
598
|
+
return None
|
|
599
|
+
return normalized
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def _rate_limit_headers(
|
|
603
|
+
window_label: str,
|
|
604
|
+
summary: UsageWindowSummary,
|
|
605
|
+
) -> dict[str, str]:
|
|
606
|
+
used_percent = summary.used_percent
|
|
607
|
+
window_minutes = summary.window_minutes
|
|
608
|
+
if used_percent is None or window_minutes is None:
|
|
609
|
+
return {}
|
|
610
|
+
headers = {
|
|
611
|
+
f"x-codex-{window_label}-used-percent": str(float(used_percent)),
|
|
612
|
+
f"x-codex-{window_label}-window-minutes": str(int(window_minutes)),
|
|
613
|
+
}
|
|
614
|
+
reset_at = summary.reset_at
|
|
615
|
+
if reset_at is not None:
|
|
616
|
+
headers[f"x-codex-{window_label}-reset-at"] = str(int(reset_at))
|
|
617
|
+
return headers
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _credits_headers(entries: Iterable[UsageHistory]) -> dict[str, str]:
|
|
621
|
+
aggregate = _aggregate_credits(entries)
|
|
622
|
+
if aggregate is None:
|
|
623
|
+
return {}
|
|
624
|
+
has_credits, unlimited, balance_total = aggregate
|
|
625
|
+
balance_value = f"{balance_total:.2f}"
|
|
626
|
+
return {
|
|
627
|
+
"x-codex-credits-has-credits": "true" if has_credits else "false",
|
|
628
|
+
"x-codex-credits-unlimited": "true" if unlimited else "false",
|
|
629
|
+
"x-codex-credits-balance": balance_value,
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def _normalize_error_code(code: str | None, error_type: str | None) -> str:
|
|
634
|
+
value = code or error_type
|
|
635
|
+
if not value:
|
|
636
|
+
return "upstream_error"
|
|
637
|
+
return value.lower()
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def _parse_openai_error(payload: OpenAIErrorEnvelope) -> OpenAIError | None:
|
|
641
|
+
error = payload.get("error")
|
|
642
|
+
if not error:
|
|
643
|
+
return None
|
|
644
|
+
try:
|
|
645
|
+
return OpenAIError.model_validate(error)
|
|
646
|
+
except ValidationError:
|
|
647
|
+
if not isinstance(error, dict):
|
|
648
|
+
return None
|
|
649
|
+
return OpenAIError(
|
|
650
|
+
message=_coerce_str(error.get("message")),
|
|
651
|
+
type=_coerce_str(error.get("type")),
|
|
652
|
+
code=_coerce_str(error.get("code")),
|
|
653
|
+
param=_coerce_str(error.get("param")),
|
|
654
|
+
plan_type=_coerce_str(error.get("plan_type")),
|
|
655
|
+
resets_at=_coerce_number(error.get("resets_at")),
|
|
656
|
+
resets_in_seconds=_coerce_number(error.get("resets_in_seconds")),
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def _coerce_str(value: object) -> str | None:
|
|
661
|
+
return value if isinstance(value, str) else None
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def _coerce_number(value: object) -> int | float | None:
|
|
665
|
+
if isinstance(value, (int, float)):
|
|
666
|
+
return value
|
|
667
|
+
if isinstance(value, str):
|
|
668
|
+
try:
|
|
669
|
+
return float(value.strip())
|
|
670
|
+
except ValueError:
|
|
671
|
+
return None
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def _apply_error_metadata(target: OpenAIErrorDetail, error: OpenAIError | None) -> None:
|
|
676
|
+
if not error:
|
|
677
|
+
return
|
|
678
|
+
if error.plan_type is not None:
|
|
679
|
+
target["plan_type"] = error.plan_type
|
|
680
|
+
if error.resets_at is not None:
|
|
681
|
+
target["resets_at"] = error.resets_at
|
|
682
|
+
if error.resets_in_seconds is not None:
|
|
683
|
+
target["resets_in_seconds"] = error.resets_in_seconds
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
class _RetryableStreamError(Exception):
|
|
687
|
+
def __init__(self, code: str, error: UpstreamError) -> None:
|
|
688
|
+
super().__init__(code)
|
|
689
|
+
self.code = code
|
|
690
|
+
self.error = error
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def _upstream_error_from_openai(error: OpenAIError | None) -> UpstreamError:
|
|
694
|
+
if not error:
|
|
695
|
+
return {}
|
|
696
|
+
data = error.model_dump(exclude_none=True)
|
|
697
|
+
payload: UpstreamError = {}
|
|
698
|
+
message = data.get("message")
|
|
699
|
+
if isinstance(message, str):
|
|
700
|
+
payload["message"] = message
|
|
701
|
+
resets_at = data.get("resets_at")
|
|
702
|
+
if isinstance(resets_at, (int, float)):
|
|
703
|
+
payload["resets_at"] = resets_at
|
|
704
|
+
resets_in_seconds = data.get("resets_in_seconds")
|
|
705
|
+
if isinstance(resets_in_seconds, (int, float)):
|
|
706
|
+
payload["resets_in_seconds"] = resets_in_seconds
|
|
707
|
+
return payload
|