codex-lb 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/core/clients/proxy.py +33 -3
- app/core/config/settings.py +9 -8
- app/core/handlers/__init__.py +3 -0
- app/core/handlers/exceptions.py +39 -0
- app/core/middleware/__init__.py +9 -0
- app/core/middleware/api_errors.py +33 -0
- app/core/middleware/request_decompression.py +101 -0
- app/core/middleware/request_id.py +27 -0
- app/core/openai/chat_requests.py +172 -0
- app/core/openai/chat_responses.py +534 -0
- app/core/openai/message_coercion.py +60 -0
- app/core/openai/models_catalog.py +72 -0
- app/core/openai/requests.py +23 -5
- app/core/openai/v1_requests.py +92 -0
- app/db/models.py +3 -3
- app/db/session.py +25 -8
- app/dependencies.py +43 -16
- app/main.py +13 -67
- app/modules/accounts/repository.py +25 -10
- app/modules/proxy/api.py +94 -0
- app/modules/proxy/load_balancer.py +75 -58
- app/modules/proxy/repo_bundle.py +23 -0
- app/modules/proxy/service.py +127 -102
- app/modules/request_logs/api.py +61 -7
- app/modules/request_logs/repository.py +131 -16
- app/modules/request_logs/schemas.py +11 -2
- app/modules/request_logs/service.py +97 -20
- app/modules/usage/service.py +65 -4
- app/modules/usage/updater.py +58 -26
- app/static/index.css +378 -1
- app/static/index.html +183 -8
- app/static/index.js +308 -13
- {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/METADATA +42 -3
- {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/RECORD +37 -25
- {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/WHEEL +0 -0
- {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/entry_points.txt +0 -0
- {codex_lb-0.3.1.dist-info → codex_lb-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -16,8 +16,8 @@ from app.core.balancer.types import UpstreamError
|
|
|
16
16
|
from app.core.usage.quota import apply_usage_quota
|
|
17
17
|
from app.db.models import Account, UsageHistory
|
|
18
18
|
from app.modules.accounts.repository import AccountsRepository
|
|
19
|
+
from app.modules.proxy.repo_bundle import ProxyRepoFactory
|
|
19
20
|
from app.modules.proxy.sticky_repository import StickySessionsRepository
|
|
20
|
-
from app.modules.usage.repository import UsageRepository
|
|
21
21
|
from app.modules.usage.updater import UsageUpdater
|
|
22
22
|
|
|
23
23
|
|
|
@@ -37,16 +37,8 @@ class AccountSelection:
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class LoadBalancer:
|
|
40
|
-
def __init__(
|
|
41
|
-
self
|
|
42
|
-
accounts_repo: AccountsRepository,
|
|
43
|
-
usage_repo: UsageRepository,
|
|
44
|
-
sticky_repo: StickySessionsRepository | None = None,
|
|
45
|
-
) -> None:
|
|
46
|
-
self._accounts_repo = accounts_repo
|
|
47
|
-
self._usage_repo = usage_repo
|
|
48
|
-
self._usage_updater = UsageUpdater(usage_repo, accounts_repo)
|
|
49
|
-
self._sticky_repo = sticky_repo
|
|
40
|
+
def __init__(self, repo_factory: ProxyRepoFactory) -> None:
|
|
41
|
+
self._repo_factory = repo_factory
|
|
50
42
|
self._runtime: dict[str, RuntimeState] = {}
|
|
51
43
|
|
|
52
44
|
async def select_account(
|
|
@@ -56,43 +48,53 @@ class LoadBalancer:
|
|
|
56
48
|
reallocate_sticky: bool = False,
|
|
57
49
|
prefer_earlier_reset_accounts: bool = False,
|
|
58
50
|
) -> AccountSelection:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
51
|
+
selected_snapshot: Account | None = None
|
|
52
|
+
error_message: str | None = None
|
|
53
|
+
async with self._repo_factory() as repos:
|
|
54
|
+
accounts = await repos.accounts.list_accounts()
|
|
55
|
+
latest_primary = await repos.usage.latest_by_account()
|
|
56
|
+
updater = UsageUpdater(repos.usage, repos.accounts)
|
|
57
|
+
await updater.refresh_accounts(accounts, latest_primary)
|
|
58
|
+
latest_primary = await repos.usage.latest_by_account()
|
|
59
|
+
latest_secondary = await repos.usage.latest_by_account(window="secondary")
|
|
60
|
+
|
|
61
|
+
states, account_map = _build_states(
|
|
62
|
+
accounts=accounts,
|
|
63
|
+
latest_primary=latest_primary,
|
|
64
|
+
latest_secondary=latest_secondary,
|
|
65
|
+
runtime=self._runtime,
|
|
66
|
+
)
|
|
71
67
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
68
|
+
result = await self._select_with_stickiness(
|
|
69
|
+
states=states,
|
|
70
|
+
account_map=account_map,
|
|
71
|
+
sticky_key=sticky_key,
|
|
72
|
+
reallocate_sticky=reallocate_sticky,
|
|
73
|
+
prefer_earlier_reset_accounts=prefer_earlier_reset_accounts,
|
|
74
|
+
sticky_repo=repos.sticky_sessions,
|
|
75
|
+
)
|
|
76
|
+
for state in states:
|
|
77
|
+
account = account_map.get(state.account_id)
|
|
78
|
+
if account:
|
|
79
|
+
await self._sync_state(repos.accounts, account, state)
|
|
80
|
+
|
|
81
|
+
if result.account is None:
|
|
82
|
+
error_message = result.error_message
|
|
83
|
+
else:
|
|
84
|
+
selected = account_map.get(result.account.account_id)
|
|
85
|
+
if selected is None:
|
|
86
|
+
error_message = result.error_message
|
|
87
|
+
else:
|
|
88
|
+
selected.status = result.account.status
|
|
89
|
+
selected.deactivation_reason = result.account.deactivation_reason
|
|
90
|
+
selected_snapshot = _clone_account(selected)
|
|
91
|
+
|
|
92
|
+
if selected_snapshot is None:
|
|
93
|
+
return AccountSelection(account=None, error_message=error_message)
|
|
94
|
+
|
|
95
|
+
runtime = self._runtime.setdefault(selected_snapshot.id, RuntimeState())
|
|
96
|
+
runtime.last_selected_at = time.time()
|
|
97
|
+
return AccountSelection(account=selected_snapshot, error_message=None)
|
|
96
98
|
|
|
97
99
|
async def _select_with_stickiness(
|
|
98
100
|
self,
|
|
@@ -102,21 +104,22 @@ class LoadBalancer:
|
|
|
102
104
|
sticky_key: str | None,
|
|
103
105
|
reallocate_sticky: bool,
|
|
104
106
|
prefer_earlier_reset_accounts: bool,
|
|
107
|
+
sticky_repo: StickySessionsRepository | None,
|
|
105
108
|
) -> SelectionResult:
|
|
106
|
-
if not sticky_key or not
|
|
109
|
+
if not sticky_key or not sticky_repo:
|
|
107
110
|
return select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
108
111
|
|
|
109
112
|
if reallocate_sticky:
|
|
110
113
|
chosen = select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
111
114
|
if chosen.account is not None and chosen.account.account_id in account_map:
|
|
112
|
-
await
|
|
115
|
+
await sticky_repo.upsert(sticky_key, chosen.account.account_id)
|
|
113
116
|
return chosen
|
|
114
117
|
|
|
115
|
-
existing = await
|
|
118
|
+
existing = await sticky_repo.get_account_id(sticky_key)
|
|
116
119
|
if existing:
|
|
117
120
|
pinned = next((state for state in states if state.account_id == existing), None)
|
|
118
121
|
if pinned is None:
|
|
119
|
-
await
|
|
122
|
+
await sticky_repo.delete(sticky_key)
|
|
120
123
|
else:
|
|
121
124
|
pinned_result = select_account([pinned], prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
122
125
|
if pinned_result.account is not None:
|
|
@@ -124,29 +127,33 @@ class LoadBalancer:
|
|
|
124
127
|
|
|
125
128
|
chosen = select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
126
129
|
if chosen.account is not None and chosen.account.account_id in account_map:
|
|
127
|
-
await
|
|
130
|
+
await sticky_repo.upsert(sticky_key, chosen.account.account_id)
|
|
128
131
|
return chosen
|
|
129
132
|
|
|
130
133
|
async def mark_rate_limit(self, account: Account, error: UpstreamError) -> None:
|
|
131
134
|
state = self._state_for(account)
|
|
132
135
|
handle_rate_limit(state, error)
|
|
133
|
-
|
|
136
|
+
async with self._repo_factory() as repos:
|
|
137
|
+
await self._sync_state(repos.accounts, account, state)
|
|
134
138
|
|
|
135
139
|
async def mark_quota_exceeded(self, account: Account, error: UpstreamError) -> None:
|
|
136
140
|
state = self._state_for(account)
|
|
137
141
|
handle_quota_exceeded(state, error)
|
|
138
|
-
|
|
142
|
+
async with self._repo_factory() as repos:
|
|
143
|
+
await self._sync_state(repos.accounts, account, state)
|
|
139
144
|
|
|
140
145
|
async def mark_permanent_failure(self, account: Account, error_code: str) -> None:
|
|
141
146
|
state = self._state_for(account)
|
|
142
147
|
handle_permanent_failure(state, error_code)
|
|
143
|
-
|
|
148
|
+
async with self._repo_factory() as repos:
|
|
149
|
+
await self._sync_state(repos.accounts, account, state)
|
|
144
150
|
|
|
145
151
|
async def record_error(self, account: Account) -> None:
|
|
146
152
|
state = self._state_for(account)
|
|
147
153
|
state.error_count += 1
|
|
148
154
|
state.last_error_at = time.time()
|
|
149
|
-
|
|
155
|
+
async with self._repo_factory() as repos:
|
|
156
|
+
await self._sync_state(repos.accounts, account, state)
|
|
150
157
|
|
|
151
158
|
def _state_for(self, account: Account) -> AccountState:
|
|
152
159
|
runtime = self._runtime.setdefault(account.id, RuntimeState())
|
|
@@ -164,7 +171,12 @@ class LoadBalancer:
|
|
|
164
171
|
deactivation_reason=account.deactivation_reason,
|
|
165
172
|
)
|
|
166
173
|
|
|
167
|
-
async def _sync_state(
|
|
174
|
+
async def _sync_state(
|
|
175
|
+
self,
|
|
176
|
+
accounts_repo: AccountsRepository,
|
|
177
|
+
account: Account,
|
|
178
|
+
state: AccountState,
|
|
179
|
+
) -> None:
|
|
168
180
|
runtime = self._runtime.setdefault(account.id, RuntimeState())
|
|
169
181
|
runtime.reset_at = state.reset_at
|
|
170
182
|
runtime.cooldown_until = state.cooldown_until
|
|
@@ -177,7 +189,7 @@ class LoadBalancer:
|
|
|
177
189
|
reset_changed = account.reset_at != reset_at_int
|
|
178
190
|
|
|
179
191
|
if status_changed or reason_changed or reset_changed:
|
|
180
|
-
await
|
|
192
|
+
await accounts_repo.update_status(
|
|
181
193
|
account.id,
|
|
182
194
|
state.status,
|
|
183
195
|
state.deactivation_reason,
|
|
@@ -251,3 +263,8 @@ def _state_from_account(
|
|
|
251
263
|
error_count=runtime.error_count,
|
|
252
264
|
deactivation_reason=account.deactivation_reason,
|
|
253
265
|
)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _clone_account(account: Account) -> Account:
|
|
269
|
+
data = {column.name: getattr(account, column.name) for column in Account.__table__.columns}
|
|
270
|
+
return Account(**data)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import AsyncContextManager
|
|
6
|
+
|
|
7
|
+
from app.modules.accounts.repository import AccountsRepository
|
|
8
|
+
from app.modules.proxy.sticky_repository import StickySessionsRepository
|
|
9
|
+
from app.modules.request_logs.repository import RequestLogsRepository
|
|
10
|
+
from app.modules.settings.repository import SettingsRepository
|
|
11
|
+
from app.modules.usage.repository import UsageRepository
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class ProxyRepositories:
|
|
16
|
+
accounts: AccountsRepository
|
|
17
|
+
usage: UsageRepository
|
|
18
|
+
request_logs: RequestLogsRepository
|
|
19
|
+
sticky_sessions: StickySessionsRepository
|
|
20
|
+
settings: SettingsRepository
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
ProxyRepoFactory = Callable[[], AsyncContextManager[ProxyRepositories]]
|
app/modules/proxy/service.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
import time
|
|
5
6
|
from collections.abc import Sequence
|
|
@@ -28,7 +29,6 @@ from app.core.utils.sse import format_sse_event
|
|
|
28
29
|
from app.core.utils.time import utcnow
|
|
29
30
|
from app.db.models import Account, UsageHistory
|
|
30
31
|
from app.modules.accounts.auth_manager import AuthManager
|
|
31
|
-
from app.modules.accounts.repository import AccountsRepository
|
|
32
32
|
from app.modules.proxy.helpers import (
|
|
33
33
|
_apply_error_metadata,
|
|
34
34
|
_credits_headers,
|
|
@@ -45,33 +45,18 @@ from app.modules.proxy.helpers import (
|
|
|
45
45
|
_window_snapshot,
|
|
46
46
|
)
|
|
47
47
|
from app.modules.proxy.load_balancer import LoadBalancer
|
|
48
|
-
from app.modules.proxy.
|
|
48
|
+
from app.modules.proxy.repo_bundle import ProxyRepoFactory, ProxyRepositories
|
|
49
49
|
from app.modules.proxy.types import RateLimitStatusPayloadData
|
|
50
|
-
from app.modules.request_logs.repository import RequestLogsRepository
|
|
51
|
-
from app.modules.settings.repository import SettingsRepository
|
|
52
|
-
from app.modules.usage.repository import UsageRepository
|
|
53
50
|
from app.modules.usage.updater import UsageUpdater
|
|
54
51
|
|
|
55
52
|
logger = logging.getLogger(__name__)
|
|
56
53
|
|
|
57
54
|
|
|
58
55
|
class ProxyService:
|
|
59
|
-
def __init__(
|
|
60
|
-
self
|
|
61
|
-
accounts_repo: AccountsRepository,
|
|
62
|
-
usage_repo: UsageRepository,
|
|
63
|
-
logs_repo: RequestLogsRepository,
|
|
64
|
-
sticky_repo: StickySessionsRepository,
|
|
65
|
-
settings_repo: SettingsRepository,
|
|
66
|
-
) -> None:
|
|
67
|
-
self._accounts_repo = accounts_repo
|
|
68
|
-
self._usage_repo = usage_repo
|
|
69
|
-
self._logs_repo = logs_repo
|
|
70
|
-
self._settings_repo = settings_repo
|
|
56
|
+
def __init__(self, repo_factory: ProxyRepoFactory) -> None:
|
|
57
|
+
self._repo_factory = repo_factory
|
|
71
58
|
self._encryptor = TokenEncryptor()
|
|
72
|
-
self.
|
|
73
|
-
self._load_balancer = LoadBalancer(accounts_repo, usage_repo, sticky_repo)
|
|
74
|
-
self._usage_updater = UsageUpdater(usage_repo, accounts_repo)
|
|
59
|
+
self._load_balancer = LoadBalancer(repo_factory)
|
|
75
60
|
|
|
76
61
|
def stream_responses(
|
|
77
62
|
self,
|
|
@@ -80,6 +65,7 @@ class ProxyService:
|
|
|
80
65
|
*,
|
|
81
66
|
propagate_http_errors: bool = False,
|
|
82
67
|
) -> AsyncIterator[str]:
|
|
68
|
+
_maybe_log_proxy_request_payload("stream", payload, headers)
|
|
83
69
|
_maybe_log_proxy_request_shape("stream", payload, headers)
|
|
84
70
|
filtered = filter_inbound_headers(headers)
|
|
85
71
|
return self._stream_with_retry(
|
|
@@ -93,11 +79,14 @@ class ProxyService:
|
|
|
93
79
|
payload: ResponsesCompactRequest,
|
|
94
80
|
headers: Mapping[str, str],
|
|
95
81
|
) -> OpenAIResponsePayload:
|
|
82
|
+
_maybe_log_proxy_request_payload("compact", payload, headers)
|
|
96
83
|
_maybe_log_proxy_request_shape("compact", payload, headers)
|
|
97
84
|
filtered = filter_inbound_headers(headers)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
85
|
+
async with self._repo_factory() as repos:
|
|
86
|
+
settings = await repos.settings.get_or_create()
|
|
87
|
+
prefer_earlier_reset = settings.prefer_earlier_reset_accounts
|
|
88
|
+
sticky_threads_enabled = settings.sticky_threads_enabled
|
|
89
|
+
sticky_key = _sticky_key_from_compact_payload(payload) if sticky_threads_enabled else None
|
|
101
90
|
selection = await self._load_balancer.select_account(
|
|
102
91
|
sticky_key=sticky_key,
|
|
103
92
|
reallocate_sticky=sticky_key is not None,
|
|
@@ -136,69 +125,71 @@ class ProxyService:
|
|
|
136
125
|
|
|
137
126
|
async def rate_limit_headers(self) -> dict[str, str]:
|
|
138
127
|
now = utcnow()
|
|
139
|
-
accounts = await self._accounts_repo.list_accounts()
|
|
140
|
-
account_map = {account.id: account for account in accounts}
|
|
141
|
-
|
|
142
128
|
headers: dict[str, str] = {}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
account_map,
|
|
155
|
-
"primary",
|
|
129
|
+
async with self._repo_factory() as repos:
|
|
130
|
+
accounts = await repos.accounts.list_accounts()
|
|
131
|
+
account_map = {account.id: account for account in accounts}
|
|
132
|
+
|
|
133
|
+
primary_minutes = await repos.usage.latest_window_minutes("primary")
|
|
134
|
+
if primary_minutes is None:
|
|
135
|
+
primary_minutes = usage_core.default_window_minutes("primary")
|
|
136
|
+
if primary_minutes:
|
|
137
|
+
primary_rows = await repos.usage.aggregate_since(
|
|
138
|
+
now - timedelta(minutes=primary_minutes),
|
|
139
|
+
window="primary",
|
|
156
140
|
)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
"secondary",
|
|
141
|
+
if primary_rows:
|
|
142
|
+
summary = usage_core.summarize_usage_window(
|
|
143
|
+
[row.to_window_row() for row in primary_rows],
|
|
144
|
+
account_map,
|
|
145
|
+
"primary",
|
|
146
|
+
)
|
|
147
|
+
headers.update(_rate_limit_headers("primary", summary))
|
|
148
|
+
|
|
149
|
+
secondary_minutes = await repos.usage.latest_window_minutes("secondary")
|
|
150
|
+
if secondary_minutes is None:
|
|
151
|
+
secondary_minutes = usage_core.default_window_minutes("secondary")
|
|
152
|
+
if secondary_minutes:
|
|
153
|
+
secondary_rows = await repos.usage.aggregate_since(
|
|
154
|
+
now - timedelta(minutes=secondary_minutes),
|
|
155
|
+
window="secondary",
|
|
172
156
|
)
|
|
173
|
-
|
|
157
|
+
if secondary_rows:
|
|
158
|
+
summary = usage_core.summarize_usage_window(
|
|
159
|
+
[row.to_window_row() for row in secondary_rows],
|
|
160
|
+
account_map,
|
|
161
|
+
"secondary",
|
|
162
|
+
)
|
|
163
|
+
headers.update(_rate_limit_headers("secondary", summary))
|
|
174
164
|
|
|
175
|
-
|
|
176
|
-
|
|
165
|
+
latest_usage = await repos.usage.latest_by_account()
|
|
166
|
+
headers.update(_credits_headers(latest_usage.values()))
|
|
177
167
|
return headers
|
|
178
168
|
|
|
179
169
|
async def get_rate_limit_payload(self) -> RateLimitStatusPayloadData:
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
170
|
+
async with self._repo_factory() as repos:
|
|
171
|
+
accounts = await repos.accounts.list_accounts()
|
|
172
|
+
await self._refresh_usage(repos, accounts)
|
|
173
|
+
selected_accounts = _select_accounts_for_limits(accounts)
|
|
174
|
+
if not selected_accounts:
|
|
175
|
+
return RateLimitStatusPayloadData(plan_type="guest")
|
|
176
|
+
|
|
177
|
+
account_map = {account.id: account for account in selected_accounts}
|
|
178
|
+
primary_rows = await self._latest_usage_rows(repos, account_map, "primary")
|
|
179
|
+
secondary_rows = await self._latest_usage_rows(repos, account_map, "secondary")
|
|
180
|
+
|
|
181
|
+
primary_summary = _summarize_window(primary_rows, account_map, "primary")
|
|
182
|
+
secondary_summary = _summarize_window(secondary_rows, account_map, "secondary")
|
|
183
|
+
|
|
184
|
+
now_epoch = int(time.time())
|
|
185
|
+
primary_window = _window_snapshot(primary_summary, primary_rows, "primary", now_epoch)
|
|
186
|
+
secondary_window = _window_snapshot(secondary_summary, secondary_rows, "secondary", now_epoch)
|
|
187
|
+
|
|
188
|
+
return RateLimitStatusPayloadData(
|
|
189
|
+
plan_type=_plan_type_for_accounts(selected_accounts),
|
|
190
|
+
rate_limit=_rate_limit_details(primary_window, secondary_window),
|
|
191
|
+
credits=_credits_snapshot(await self._latest_usage_entries(repos, account_map)),
|
|
192
|
+
)
|
|
202
193
|
|
|
203
194
|
async def _stream_with_retry(
|
|
204
195
|
self,
|
|
@@ -208,9 +199,11 @@ class ProxyService:
|
|
|
208
199
|
propagate_http_errors: bool,
|
|
209
200
|
) -> AsyncIterator[str]:
|
|
210
201
|
request_id = ensure_request_id()
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
202
|
+
async with self._repo_factory() as repos:
|
|
203
|
+
settings = await repos.settings.get_or_create()
|
|
204
|
+
prefer_earlier_reset = settings.prefer_earlier_reset_accounts
|
|
205
|
+
sticky_threads_enabled = settings.sticky_threads_enabled
|
|
206
|
+
sticky_key = _sticky_key_from_payload(payload) if sticky_threads_enabled else None
|
|
214
207
|
max_attempts = 3
|
|
215
208
|
for attempt in range(max_attempts):
|
|
216
209
|
selection = await self._load_balancer.select_account(
|
|
@@ -398,20 +391,21 @@ class ProxyService:
|
|
|
398
391
|
)
|
|
399
392
|
with anyio.CancelScope(shield=True):
|
|
400
393
|
try:
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
394
|
+
async with self._repo_factory() as repos:
|
|
395
|
+
await repos.request_logs.add_log(
|
|
396
|
+
account_id=account_id_value,
|
|
397
|
+
request_id=request_id,
|
|
398
|
+
model=model,
|
|
399
|
+
input_tokens=input_tokens,
|
|
400
|
+
output_tokens=output_tokens,
|
|
401
|
+
cached_input_tokens=cached_input_tokens,
|
|
402
|
+
reasoning_tokens=reasoning_tokens,
|
|
403
|
+
reasoning_effort=reasoning_effort,
|
|
404
|
+
latency_ms=latency_ms,
|
|
405
|
+
status=status,
|
|
406
|
+
error_code=error_code,
|
|
407
|
+
error_message=error_message,
|
|
408
|
+
)
|
|
415
409
|
except Exception:
|
|
416
410
|
logger.warning(
|
|
417
411
|
"Failed to persist request log account_id=%s request_id=%s",
|
|
@@ -420,18 +414,20 @@ class ProxyService:
|
|
|
420
414
|
exc_info=True,
|
|
421
415
|
)
|
|
422
416
|
|
|
423
|
-
async def _refresh_usage(self, accounts: list[Account]) -> None:
|
|
424
|
-
latest_usage = await
|
|
425
|
-
|
|
417
|
+
async def _refresh_usage(self, repos: ProxyRepositories, accounts: list[Account]) -> None:
|
|
418
|
+
latest_usage = await repos.usage.latest_by_account(window="primary")
|
|
419
|
+
updater = UsageUpdater(repos.usage, repos.accounts)
|
|
420
|
+
await updater.refresh_accounts(accounts, latest_usage)
|
|
426
421
|
|
|
427
422
|
async def _latest_usage_rows(
|
|
428
423
|
self,
|
|
424
|
+
repos: ProxyRepositories,
|
|
429
425
|
account_map: dict[str, Account],
|
|
430
426
|
window: str,
|
|
431
427
|
) -> list[UsageWindowRow]:
|
|
432
428
|
if not account_map:
|
|
433
429
|
return []
|
|
434
|
-
latest = await
|
|
430
|
+
latest = await repos.usage.latest_by_account(window=window)
|
|
435
431
|
return [
|
|
436
432
|
UsageWindowRow(
|
|
437
433
|
account_id=entry.account_id,
|
|
@@ -445,15 +441,18 @@ class ProxyService:
|
|
|
445
441
|
|
|
446
442
|
async def _latest_usage_entries(
|
|
447
443
|
self,
|
|
444
|
+
repos: ProxyRepositories,
|
|
448
445
|
account_map: dict[str, Account],
|
|
449
446
|
) -> list[UsageHistory]:
|
|
450
447
|
if not account_map:
|
|
451
448
|
return []
|
|
452
|
-
latest = await
|
|
449
|
+
latest = await repos.usage.latest_by_account()
|
|
453
450
|
return [entry for entry in latest.values() if entry.account_id in account_map]
|
|
454
451
|
|
|
455
452
|
async def _ensure_fresh(self, account: Account, *, force: bool = False) -> Account:
|
|
456
|
-
|
|
453
|
+
async with self._repo_factory() as repos:
|
|
454
|
+
auth_manager = AuthManager(repos.accounts)
|
|
455
|
+
return await auth_manager.ensure_fresh(account, force=force)
|
|
457
456
|
|
|
458
457
|
async def _handle_proxy_error(self, account: Account, exc: ProxyResponseError) -> None:
|
|
459
458
|
error = _parse_openai_error(exc.payload)
|
|
@@ -526,6 +525,32 @@ def _maybe_log_proxy_request_shape(
|
|
|
526
525
|
)
|
|
527
526
|
|
|
528
527
|
|
|
528
|
+
def _maybe_log_proxy_request_payload(
|
|
529
|
+
kind: str,
|
|
530
|
+
payload: ResponsesRequest | ResponsesCompactRequest,
|
|
531
|
+
headers: Mapping[str, str],
|
|
532
|
+
) -> None:
|
|
533
|
+
settings = get_settings()
|
|
534
|
+
if not settings.log_proxy_request_payload:
|
|
535
|
+
return
|
|
536
|
+
|
|
537
|
+
request_id = get_request_id()
|
|
538
|
+
payload_dict = payload.model_dump(mode="json", exclude_none=True)
|
|
539
|
+
extra = payload.model_extra or {}
|
|
540
|
+
if extra:
|
|
541
|
+
payload_dict = {**payload_dict, "_extra": extra}
|
|
542
|
+
header_keys = _interesting_header_keys(headers)
|
|
543
|
+
payload_json = json.dumps(payload_dict, ensure_ascii=True, separators=(",", ":"))
|
|
544
|
+
|
|
545
|
+
logger.warning(
|
|
546
|
+
"proxy_request_payload request_id=%s kind=%s payload=%s headers=%s",
|
|
547
|
+
request_id,
|
|
548
|
+
kind,
|
|
549
|
+
payload_json,
|
|
550
|
+
header_keys,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
|
|
529
554
|
def _hash_identifier(value: str) -> str:
|
|
530
555
|
digest = sha256(value.encode("utf-8")).hexdigest()
|
|
531
556
|
return f"sha256:{digest[:12]}"
|