codex-lb 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/core/auth/__init__.py +10 -0
- app/core/balancer/logic.py +33 -6
- app/core/config/settings.py +2 -0
- app/core/usage/__init__.py +2 -0
- app/core/usage/logs.py +12 -2
- app/core/usage/quota.py +10 -4
- app/core/usage/types.py +3 -2
- app/db/migrations/__init__.py +14 -3
- app/db/migrations/versions/add_accounts_chatgpt_account_id.py +29 -0
- app/db/migrations/versions/add_accounts_reset_at.py +29 -0
- app/db/migrations/versions/add_dashboard_settings.py +31 -0
- app/db/migrations/versions/add_request_logs_reasoning_effort.py +21 -0
- app/db/models.py +33 -0
- app/db/session.py +71 -11
- app/dependencies.py +27 -1
- app/main.py +11 -2
- app/modules/accounts/auth_manager.py +44 -3
- app/modules/accounts/repository.py +14 -6
- app/modules/accounts/service.py +4 -2
- app/modules/oauth/service.py +4 -3
- app/modules/proxy/load_balancer.py +74 -5
- app/modules/proxy/service.py +155 -31
- app/modules/proxy/sticky_repository.py +56 -0
- app/modules/request_logs/repository.py +6 -3
- app/modules/request_logs/schemas.py +2 -0
- app/modules/request_logs/service.py +8 -1
- app/modules/settings/__init__.py +1 -0
- app/modules/settings/api.py +37 -0
- app/modules/settings/repository.py +40 -0
- app/modules/settings/schemas.py +13 -0
- app/modules/settings/service.py +33 -0
- app/modules/shared/schemas.py +16 -2
- app/modules/usage/schemas.py +1 -0
- app/modules/usage/service.py +17 -1
- app/modules/usage/updater.py +36 -7
- app/static/index.css +1024 -319
- app/static/index.html +461 -377
- app/static/index.js +327 -49
- {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/METADATA +33 -7
- {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/RECORD +43 -34
- app/static/7.css +0 -1336
- {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/WHEEL +0 -0
- {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/entry_points.txt +0 -0
- {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
from typing import Protocol
|
|
5
6
|
|
|
6
|
-
from app.core.auth import DEFAULT_PLAN
|
|
7
|
+
from app.core.auth import DEFAULT_PLAN, OpenAIAuthClaims, extract_id_token_claims
|
|
7
8
|
from app.core.auth.refresh import RefreshError, refresh_access_token, should_refresh
|
|
8
9
|
from app.core.balancer import PERMANENT_FAILURE_CODES
|
|
9
10
|
from app.core.crypto import TokenEncryptor
|
|
@@ -29,9 +30,13 @@ class AccountsRepositoryPort(Protocol):
|
|
|
29
30
|
last_refresh: datetime,
|
|
30
31
|
plan_type: str | None = None,
|
|
31
32
|
email: str | None = None,
|
|
33
|
+
chatgpt_account_id: str | None = None,
|
|
32
34
|
) -> bool: ...
|
|
33
35
|
|
|
34
36
|
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
35
40
|
class AuthManager:
|
|
36
41
|
def __init__(self, repo: AccountsRepositoryPort) -> None:
|
|
37
42
|
self._repo = repo
|
|
@@ -39,8 +44,8 @@ class AuthManager:
|
|
|
39
44
|
|
|
40
45
|
async def ensure_fresh(self, account: Account, *, force: bool = False) -> Account:
|
|
41
46
|
if force or should_refresh(account.last_refresh):
|
|
42
|
-
|
|
43
|
-
return account
|
|
47
|
+
account = await self.refresh_account(account)
|
|
48
|
+
return await self._ensure_chatgpt_account_id(account)
|
|
44
49
|
|
|
45
50
|
async def refresh_account(self, account: Account) -> Account:
|
|
46
51
|
refresh_token = self._encryptor.decrypt(account.refresh_token_encrypted)
|
|
@@ -58,6 +63,8 @@ class AuthManager:
|
|
|
58
63
|
account.refresh_token_encrypted = self._encryptor.encrypt(result.refresh_token)
|
|
59
64
|
account.id_token_encrypted = self._encryptor.encrypt(result.id_token)
|
|
60
65
|
account.last_refresh = utcnow()
|
|
66
|
+
if result.account_id:
|
|
67
|
+
account.chatgpt_account_id = result.account_id
|
|
61
68
|
if result.plan_type is not None:
|
|
62
69
|
account.plan_type = coerce_account_plan_type(
|
|
63
70
|
result.plan_type,
|
|
@@ -76,5 +83,39 @@ class AuthManager:
|
|
|
76
83
|
last_refresh=account.last_refresh,
|
|
77
84
|
plan_type=account.plan_type,
|
|
78
85
|
email=account.email,
|
|
86
|
+
chatgpt_account_id=account.chatgpt_account_id,
|
|
79
87
|
)
|
|
80
88
|
return account
|
|
89
|
+
|
|
90
|
+
async def _ensure_chatgpt_account_id(self, account: Account) -> Account:
|
|
91
|
+
if account.chatgpt_account_id:
|
|
92
|
+
return account
|
|
93
|
+
try:
|
|
94
|
+
id_token = self._encryptor.decrypt(account.id_token_encrypted)
|
|
95
|
+
except Exception:
|
|
96
|
+
return account
|
|
97
|
+
raw_account_id = _chatgpt_account_id_from_id_token(id_token)
|
|
98
|
+
if not raw_account_id:
|
|
99
|
+
return account
|
|
100
|
+
|
|
101
|
+
account.chatgpt_account_id = raw_account_id
|
|
102
|
+
try:
|
|
103
|
+
await self._repo.update_tokens(
|
|
104
|
+
account.id,
|
|
105
|
+
access_token_encrypted=account.access_token_encrypted,
|
|
106
|
+
refresh_token_encrypted=account.refresh_token_encrypted,
|
|
107
|
+
id_token_encrypted=account.id_token_encrypted,
|
|
108
|
+
last_refresh=account.last_refresh,
|
|
109
|
+
plan_type=account.plan_type,
|
|
110
|
+
email=account.email,
|
|
111
|
+
chatgpt_account_id=raw_account_id,
|
|
112
|
+
)
|
|
113
|
+
except Exception:
|
|
114
|
+
logger.warning("Failed to persist chatgpt_account_id account_id=%s", account.id, exc_info=True)
|
|
115
|
+
return account
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _chatgpt_account_id_from_id_token(id_token: str) -> str | None:
|
|
119
|
+
claims = extract_id_token_claims(id_token)
|
|
120
|
+
auth_claims = claims.auth or OpenAIAuthClaims()
|
|
121
|
+
return auth_claims.chatgpt_account_id or claims.chatgpt_account_id
|
|
@@ -19,6 +19,7 @@ class AccountsRepository:
|
|
|
19
19
|
async def upsert(self, account: Account) -> Account:
|
|
20
20
|
existing = await self._session.get(Account, account.id)
|
|
21
21
|
if existing:
|
|
22
|
+
existing.chatgpt_account_id = account.chatgpt_account_id
|
|
22
23
|
existing.email = account.email
|
|
23
24
|
existing.plan_type = account.plan_type
|
|
24
25
|
existing.access_token_encrypted = account.access_token_encrypted
|
|
@@ -41,19 +42,21 @@ class AccountsRepository:
|
|
|
41
42
|
account_id: str,
|
|
42
43
|
status: AccountStatus,
|
|
43
44
|
deactivation_reason: str | None = None,
|
|
45
|
+
reset_at: int | None = None,
|
|
44
46
|
) -> bool:
|
|
45
47
|
result = await self._session.execute(
|
|
46
48
|
update(Account)
|
|
47
49
|
.where(Account.id == account_id)
|
|
48
|
-
.values(status=status, deactivation_reason=deactivation_reason)
|
|
50
|
+
.values(status=status, deactivation_reason=deactivation_reason, reset_at=reset_at)
|
|
51
|
+
.returning(Account.id)
|
|
49
52
|
)
|
|
50
53
|
await self._session.commit()
|
|
51
|
-
return
|
|
54
|
+
return result.scalar_one_or_none() is not None
|
|
52
55
|
|
|
53
56
|
async def delete(self, account_id: str) -> bool:
|
|
54
|
-
result = await self._session.execute(delete(Account).where(Account.id == account_id))
|
|
57
|
+
result = await self._session.execute(delete(Account).where(Account.id == account_id).returning(Account.id))
|
|
55
58
|
await self._session.commit()
|
|
56
|
-
return
|
|
59
|
+
return result.scalar_one_or_none() is not None
|
|
57
60
|
|
|
58
61
|
async def update_tokens(
|
|
59
62
|
self,
|
|
@@ -64,6 +67,7 @@ class AccountsRepository:
|
|
|
64
67
|
last_refresh: datetime,
|
|
65
68
|
plan_type: str | None = None,
|
|
66
69
|
email: str | None = None,
|
|
70
|
+
chatgpt_account_id: str | None = None,
|
|
67
71
|
) -> bool:
|
|
68
72
|
values = {
|
|
69
73
|
"access_token_encrypted": access_token_encrypted,
|
|
@@ -75,6 +79,10 @@ class AccountsRepository:
|
|
|
75
79
|
values["plan_type"] = plan_type
|
|
76
80
|
if email is not None:
|
|
77
81
|
values["email"] = email
|
|
78
|
-
|
|
82
|
+
if chatgpt_account_id is not None:
|
|
83
|
+
values["chatgpt_account_id"] = chatgpt_account_id
|
|
84
|
+
result = await self._session.execute(
|
|
85
|
+
update(Account).where(Account.id == account_id).values(**values).returning(Account.id)
|
|
86
|
+
)
|
|
79
87
|
await self._session.commit()
|
|
80
|
-
return
|
|
88
|
+
return result.scalar_one_or_none() is not None
|
app/modules/accounts/service.py
CHANGED
|
@@ -9,7 +9,7 @@ from app.core.auth import (
|
|
|
9
9
|
DEFAULT_PLAN,
|
|
10
10
|
claims_from_auth,
|
|
11
11
|
extract_id_token_claims,
|
|
12
|
-
|
|
12
|
+
generate_unique_account_id,
|
|
13
13
|
parse_auth_json,
|
|
14
14
|
)
|
|
15
15
|
from app.core.crypto import TokenEncryptor
|
|
@@ -66,12 +66,14 @@ class AccountsService:
|
|
|
66
66
|
claims = claims_from_auth(auth)
|
|
67
67
|
|
|
68
68
|
email = claims.email or DEFAULT_EMAIL
|
|
69
|
+
raw_account_id = claims.account_id
|
|
70
|
+
account_id = generate_unique_account_id(raw_account_id, email)
|
|
69
71
|
plan_type = coerce_account_plan_type(claims.plan_type, DEFAULT_PLAN)
|
|
70
|
-
account_id = claims.account_id or fallback_account_id(email)
|
|
71
72
|
last_refresh = to_utc_naive(auth.last_refresh_at) if auth.last_refresh_at else utcnow()
|
|
72
73
|
|
|
73
74
|
account = Account(
|
|
74
75
|
id=account_id,
|
|
76
|
+
chatgpt_account_id=raw_account_id,
|
|
75
77
|
email=email,
|
|
76
78
|
plan_type=plan_type,
|
|
77
79
|
access_token_encrypted=self._encryptor.encrypt(auth.tokens.access_token),
|
app/modules/oauth/service.py
CHANGED
|
@@ -15,7 +15,7 @@ from app.core.auth import (
|
|
|
15
15
|
DEFAULT_PLAN,
|
|
16
16
|
OpenAIAuthClaims,
|
|
17
17
|
extract_id_token_claims,
|
|
18
|
-
|
|
18
|
+
generate_unique_account_id,
|
|
19
19
|
)
|
|
20
20
|
from app.core.clients.oauth import (
|
|
21
21
|
OAuthError,
|
|
@@ -294,16 +294,17 @@ class OauthService:
|
|
|
294
294
|
async def _persist_tokens(self, tokens: OAuthTokens) -> None:
|
|
295
295
|
claims = extract_id_token_claims(tokens.id_token)
|
|
296
296
|
auth_claims = claims.auth or OpenAIAuthClaims()
|
|
297
|
-
|
|
297
|
+
raw_account_id = auth_claims.chatgpt_account_id or claims.chatgpt_account_id
|
|
298
298
|
email = claims.email or DEFAULT_EMAIL
|
|
299
|
+
account_id = generate_unique_account_id(raw_account_id, email)
|
|
299
300
|
plan_type = coerce_account_plan_type(
|
|
300
301
|
auth_claims.chatgpt_plan_type or claims.chatgpt_plan_type,
|
|
301
302
|
DEFAULT_PLAN,
|
|
302
303
|
)
|
|
303
|
-
account_id = account_id or fallback_account_id(email)
|
|
304
304
|
|
|
305
305
|
account = Account(
|
|
306
306
|
id=account_id,
|
|
307
|
+
chatgpt_account_id=raw_account_id,
|
|
307
308
|
email=email,
|
|
308
309
|
plan_type=plan_type,
|
|
309
310
|
access_token_encrypted=self._encryptor.encrypt(tokens.access_token),
|
|
@@ -6,6 +6,7 @@ from typing import Iterable
|
|
|
6
6
|
|
|
7
7
|
from app.core.balancer import (
|
|
8
8
|
AccountState,
|
|
9
|
+
SelectionResult,
|
|
9
10
|
handle_permanent_failure,
|
|
10
11
|
handle_quota_exceeded,
|
|
11
12
|
handle_rate_limit,
|
|
@@ -15,6 +16,7 @@ from app.core.balancer.types import UpstreamError
|
|
|
15
16
|
from app.core.usage.quota import apply_usage_quota
|
|
16
17
|
from app.db.models import Account, UsageHistory
|
|
17
18
|
from app.modules.accounts.repository import AccountsRepository
|
|
19
|
+
from app.modules.proxy.sticky_repository import StickySessionsRepository
|
|
18
20
|
from app.modules.usage.repository import UsageRepository
|
|
19
21
|
from app.modules.usage.updater import UsageUpdater
|
|
20
22
|
|
|
@@ -35,13 +37,25 @@ class AccountSelection:
|
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
class LoadBalancer:
|
|
38
|
-
def __init__(
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
accounts_repo: AccountsRepository,
|
|
43
|
+
usage_repo: UsageRepository,
|
|
44
|
+
sticky_repo: StickySessionsRepository | None = None,
|
|
45
|
+
) -> None:
|
|
39
46
|
self._accounts_repo = accounts_repo
|
|
40
47
|
self._usage_repo = usage_repo
|
|
41
48
|
self._usage_updater = UsageUpdater(usage_repo, accounts_repo)
|
|
49
|
+
self._sticky_repo = sticky_repo
|
|
42
50
|
self._runtime: dict[str, RuntimeState] = {}
|
|
43
51
|
|
|
44
|
-
async def select_account(
|
|
52
|
+
async def select_account(
|
|
53
|
+
self,
|
|
54
|
+
sticky_key: str | None = None,
|
|
55
|
+
*,
|
|
56
|
+
reallocate_sticky: bool = False,
|
|
57
|
+
prefer_earlier_reset_accounts: bool = False,
|
|
58
|
+
) -> AccountSelection:
|
|
45
59
|
accounts = await self._accounts_repo.list_accounts()
|
|
46
60
|
latest_primary = await self._usage_repo.latest_by_account()
|
|
47
61
|
await self._usage_updater.refresh_accounts(accounts, latest_primary)
|
|
@@ -55,7 +69,13 @@ class LoadBalancer:
|
|
|
55
69
|
runtime=self._runtime,
|
|
56
70
|
)
|
|
57
71
|
|
|
58
|
-
result =
|
|
72
|
+
result = await self._select_with_stickiness(
|
|
73
|
+
states=states,
|
|
74
|
+
account_map=account_map,
|
|
75
|
+
sticky_key=sticky_key,
|
|
76
|
+
reallocate_sticky=reallocate_sticky,
|
|
77
|
+
prefer_earlier_reset_accounts=prefer_earlier_reset_accounts,
|
|
78
|
+
)
|
|
59
79
|
for state in states:
|
|
60
80
|
account = account_map.get(state.account_id)
|
|
61
81
|
if account:
|
|
@@ -74,6 +94,39 @@ class LoadBalancer:
|
|
|
74
94
|
return AccountSelection(account=None, error_message=result.error_message)
|
|
75
95
|
return AccountSelection(account=selected, error_message=None)
|
|
76
96
|
|
|
97
|
+
async def _select_with_stickiness(
|
|
98
|
+
self,
|
|
99
|
+
*,
|
|
100
|
+
states: list[AccountState],
|
|
101
|
+
account_map: dict[str, Account],
|
|
102
|
+
sticky_key: str | None,
|
|
103
|
+
reallocate_sticky: bool,
|
|
104
|
+
prefer_earlier_reset_accounts: bool,
|
|
105
|
+
) -> SelectionResult:
|
|
106
|
+
if not sticky_key or not self._sticky_repo:
|
|
107
|
+
return select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
108
|
+
|
|
109
|
+
if reallocate_sticky:
|
|
110
|
+
chosen = select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
111
|
+
if chosen.account is not None and chosen.account.account_id in account_map:
|
|
112
|
+
await self._sticky_repo.upsert(sticky_key, chosen.account.account_id)
|
|
113
|
+
return chosen
|
|
114
|
+
|
|
115
|
+
existing = await self._sticky_repo.get_account_id(sticky_key)
|
|
116
|
+
if existing:
|
|
117
|
+
pinned = next((state for state in states if state.account_id == existing), None)
|
|
118
|
+
if pinned is None:
|
|
119
|
+
await self._sticky_repo.delete(sticky_key)
|
|
120
|
+
else:
|
|
121
|
+
pinned_result = select_account([pinned], prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
122
|
+
if pinned_result.account is not None:
|
|
123
|
+
return pinned_result
|
|
124
|
+
|
|
125
|
+
chosen = select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
|
|
126
|
+
if chosen.account is not None and chosen.account.account_id in account_map:
|
|
127
|
+
await self._sticky_repo.upsert(sticky_key, chosen.account.account_id)
|
|
128
|
+
return chosen
|
|
129
|
+
|
|
77
130
|
async def mark_rate_limit(self, account: Account, error: UpstreamError) -> None:
|
|
78
131
|
state = self._state_for(account)
|
|
79
132
|
handle_rate_limit(state, error)
|
|
@@ -103,6 +156,8 @@ class LoadBalancer:
|
|
|
103
156
|
used_percent=None,
|
|
104
157
|
reset_at=runtime.reset_at,
|
|
105
158
|
cooldown_until=runtime.cooldown_until,
|
|
159
|
+
secondary_used_percent=None,
|
|
160
|
+
secondary_reset_at=None,
|
|
106
161
|
last_error_at=runtime.last_error_at,
|
|
107
162
|
last_selected_at=runtime.last_selected_at,
|
|
108
163
|
error_count=runtime.error_count,
|
|
@@ -116,14 +171,21 @@ class LoadBalancer:
|
|
|
116
171
|
runtime.last_error_at = state.last_error_at
|
|
117
172
|
runtime.error_count = state.error_count
|
|
118
173
|
|
|
119
|
-
|
|
174
|
+
reset_at_int = int(state.reset_at) if state.reset_at else None
|
|
175
|
+
status_changed = account.status != state.status
|
|
176
|
+
reason_changed = account.deactivation_reason != state.deactivation_reason
|
|
177
|
+
reset_changed = account.reset_at != reset_at_int
|
|
178
|
+
|
|
179
|
+
if status_changed or reason_changed or reset_changed:
|
|
120
180
|
await self._accounts_repo.update_status(
|
|
121
181
|
account.id,
|
|
122
182
|
state.status,
|
|
123
183
|
state.deactivation_reason,
|
|
184
|
+
reset_at_int,
|
|
124
185
|
)
|
|
125
186
|
account.status = state.status
|
|
126
187
|
account.deactivation_reason = state.deactivation_reason
|
|
188
|
+
account.reset_at = reset_at_int
|
|
127
189
|
|
|
128
190
|
|
|
129
191
|
def _build_states(
|
|
@@ -161,12 +223,17 @@ def _state_from_account(
|
|
|
161
223
|
secondary_used = secondary_entry.used_percent if secondary_entry else None
|
|
162
224
|
secondary_reset = secondary_entry.reset_at if secondary_entry else None
|
|
163
225
|
|
|
226
|
+
# Use account.reset_at from DB as the authoritative source for runtime reset
|
|
227
|
+
# This survives across requests since LoadBalancer is instantiated per-request
|
|
228
|
+
db_reset_at = float(account.reset_at) if account.reset_at else None
|
|
229
|
+
effective_runtime_reset = db_reset_at or runtime.reset_at
|
|
230
|
+
|
|
164
231
|
status, used_percent, reset_at = apply_usage_quota(
|
|
165
232
|
status=account.status,
|
|
166
233
|
primary_used=primary_used,
|
|
167
234
|
primary_reset=primary_reset,
|
|
168
235
|
primary_window_minutes=primary_window_minutes,
|
|
169
|
-
runtime_reset=
|
|
236
|
+
runtime_reset=effective_runtime_reset,
|
|
170
237
|
secondary_used=secondary_used,
|
|
171
238
|
secondary_reset=secondary_reset,
|
|
172
239
|
)
|
|
@@ -177,6 +244,8 @@ def _state_from_account(
|
|
|
177
244
|
used_percent=used_percent,
|
|
178
245
|
reset_at=reset_at,
|
|
179
246
|
cooldown_until=runtime.cooldown_until,
|
|
247
|
+
secondary_used_percent=secondary_used,
|
|
248
|
+
secondary_reset_at=secondary_reset,
|
|
180
249
|
last_error_at=runtime.last_error_at,
|
|
181
250
|
last_selected_at=runtime.last_selected_at,
|
|
182
251
|
error_count=runtime.error_count,
|
app/modules/proxy/service.py
CHANGED
|
@@ -2,9 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
+
from collections.abc import Sequence
|
|
5
6
|
from datetime import timedelta
|
|
7
|
+
from hashlib import sha256
|
|
6
8
|
from typing import AsyncIterator, Mapping
|
|
7
9
|
|
|
10
|
+
import anyio
|
|
11
|
+
|
|
8
12
|
from app.core import usage as usage_core
|
|
9
13
|
from app.core.auth.refresh import RefreshError
|
|
10
14
|
from app.core.balancer import PERMANENT_FAILURE_CODES
|
|
@@ -12,13 +16,14 @@ from app.core.balancer.types import UpstreamError
|
|
|
12
16
|
from app.core.clients.proxy import ProxyResponseError, filter_inbound_headers
|
|
13
17
|
from app.core.clients.proxy import compact_responses as core_compact_responses
|
|
14
18
|
from app.core.clients.proxy import stream_responses as core_stream_responses
|
|
19
|
+
from app.core.config.settings import get_settings
|
|
15
20
|
from app.core.crypto import TokenEncryptor
|
|
16
21
|
from app.core.errors import openai_error, response_failed_event
|
|
17
22
|
from app.core.openai.models import OpenAIResponsePayload
|
|
18
23
|
from app.core.openai.parsing import parse_sse_event
|
|
19
24
|
from app.core.openai.requests import ResponsesCompactRequest, ResponsesRequest
|
|
20
25
|
from app.core.usage.types import UsageWindowRow
|
|
21
|
-
from app.core.utils.request_id import ensure_request_id
|
|
26
|
+
from app.core.utils.request_id import ensure_request_id, get_request_id
|
|
22
27
|
from app.core.utils.sse import format_sse_event
|
|
23
28
|
from app.core.utils.time import utcnow
|
|
24
29
|
from app.db.models import Account, UsageHistory
|
|
@@ -40,8 +45,10 @@ from app.modules.proxy.helpers import (
|
|
|
40
45
|
_window_snapshot,
|
|
41
46
|
)
|
|
42
47
|
from app.modules.proxy.load_balancer import LoadBalancer
|
|
48
|
+
from app.modules.proxy.sticky_repository import StickySessionsRepository
|
|
43
49
|
from app.modules.proxy.types import RateLimitStatusPayloadData
|
|
44
50
|
from app.modules.request_logs.repository import RequestLogsRepository
|
|
51
|
+
from app.modules.settings.repository import SettingsRepository
|
|
45
52
|
from app.modules.usage.repository import UsageRepository
|
|
46
53
|
from app.modules.usage.updater import UsageUpdater
|
|
47
54
|
|
|
@@ -54,13 +61,16 @@ class ProxyService:
|
|
|
54
61
|
accounts_repo: AccountsRepository,
|
|
55
62
|
usage_repo: UsageRepository,
|
|
56
63
|
logs_repo: RequestLogsRepository,
|
|
64
|
+
sticky_repo: StickySessionsRepository,
|
|
65
|
+
settings_repo: SettingsRepository,
|
|
57
66
|
) -> None:
|
|
58
67
|
self._accounts_repo = accounts_repo
|
|
59
68
|
self._usage_repo = usage_repo
|
|
60
69
|
self._logs_repo = logs_repo
|
|
70
|
+
self._settings_repo = settings_repo
|
|
61
71
|
self._encryptor = TokenEncryptor()
|
|
62
72
|
self._auth_manager = AuthManager(accounts_repo)
|
|
63
|
-
self._load_balancer = LoadBalancer(accounts_repo, usage_repo)
|
|
73
|
+
self._load_balancer = LoadBalancer(accounts_repo, usage_repo, sticky_repo)
|
|
64
74
|
self._usage_updater = UsageUpdater(usage_repo, accounts_repo)
|
|
65
75
|
|
|
66
76
|
def stream_responses(
|
|
@@ -70,6 +80,7 @@ class ProxyService:
|
|
|
70
80
|
*,
|
|
71
81
|
propagate_http_errors: bool = False,
|
|
72
82
|
) -> AsyncIterator[str]:
|
|
83
|
+
_maybe_log_proxy_request_shape("stream", payload, headers)
|
|
73
84
|
filtered = filter_inbound_headers(headers)
|
|
74
85
|
return self._stream_with_retry(
|
|
75
86
|
payload,
|
|
@@ -82,8 +93,16 @@ class ProxyService:
|
|
|
82
93
|
payload: ResponsesCompactRequest,
|
|
83
94
|
headers: Mapping[str, str],
|
|
84
95
|
) -> OpenAIResponsePayload:
|
|
96
|
+
_maybe_log_proxy_request_shape("compact", payload, headers)
|
|
85
97
|
filtered = filter_inbound_headers(headers)
|
|
86
|
-
|
|
98
|
+
settings = await self._settings_repo.get_or_create()
|
|
99
|
+
prefer_earlier_reset = settings.prefer_earlier_reset_accounts
|
|
100
|
+
sticky_key = _sticky_key_from_compact_payload(payload) if settings.sticky_threads_enabled else None
|
|
101
|
+
selection = await self._load_balancer.select_account(
|
|
102
|
+
sticky_key=sticky_key,
|
|
103
|
+
reallocate_sticky=sticky_key is not None,
|
|
104
|
+
prefer_earlier_reset_accounts=prefer_earlier_reset,
|
|
105
|
+
)
|
|
87
106
|
account = selection.account
|
|
88
107
|
if not account:
|
|
89
108
|
raise ProxyResponseError(
|
|
@@ -91,7 +110,7 @@ class ProxyService:
|
|
|
91
110
|
openai_error("no_accounts", selection.error_message or "No active accounts available"),
|
|
92
111
|
)
|
|
93
112
|
account = await self._ensure_fresh(account)
|
|
94
|
-
account_id = _header_account_id(account.
|
|
113
|
+
account_id = _header_account_id(account.chatgpt_account_id)
|
|
95
114
|
|
|
96
115
|
async def _call_compact(target: Account) -> OpenAIResponsePayload:
|
|
97
116
|
access_token = self._encryptor.decrypt(target.access_token_encrypted)
|
|
@@ -189,9 +208,15 @@ class ProxyService:
|
|
|
189
208
|
propagate_http_errors: bool,
|
|
190
209
|
) -> AsyncIterator[str]:
|
|
191
210
|
request_id = ensure_request_id()
|
|
211
|
+
settings = await self._settings_repo.get_or_create()
|
|
212
|
+
prefer_earlier_reset = settings.prefer_earlier_reset_accounts
|
|
213
|
+
sticky_key = _sticky_key_from_payload(payload) if settings.sticky_threads_enabled else None
|
|
192
214
|
max_attempts = 3
|
|
193
215
|
for attempt in range(max_attempts):
|
|
194
|
-
selection = await self._load_balancer.select_account(
|
|
216
|
+
selection = await self._load_balancer.select_account(
|
|
217
|
+
sticky_key=sticky_key,
|
|
218
|
+
prefer_earlier_reset_accounts=prefer_earlier_reset,
|
|
219
|
+
)
|
|
195
220
|
account = selection.account
|
|
196
221
|
if not account:
|
|
197
222
|
event = response_failed_event(
|
|
@@ -289,8 +314,9 @@ class ProxyService:
|
|
|
289
314
|
) -> AsyncIterator[str]:
|
|
290
315
|
account_id_value = account.id
|
|
291
316
|
access_token = self._encryptor.decrypt(account.access_token_encrypted)
|
|
292
|
-
account_id = _header_account_id(
|
|
317
|
+
account_id = _header_account_id(account.chatgpt_account_id)
|
|
293
318
|
model = payload.model
|
|
319
|
+
reasoning_effort = payload.reasoning.effort if payload.reasoning else None
|
|
294
320
|
start = time.monotonic()
|
|
295
321
|
status = "success"
|
|
296
322
|
error_code = None
|
|
@@ -370,27 +396,29 @@ class ProxyService:
|
|
|
370
396
|
reasoning_tokens = (
|
|
371
397
|
usage.output_tokens_details.reasoning_tokens if usage and usage.output_tokens_details else None
|
|
372
398
|
)
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
399
|
+
with anyio.CancelScope(shield=True):
|
|
400
|
+
try:
|
|
401
|
+
await self._logs_repo.add_log(
|
|
402
|
+
account_id=account_id_value,
|
|
403
|
+
request_id=request_id,
|
|
404
|
+
model=model,
|
|
405
|
+
input_tokens=input_tokens,
|
|
406
|
+
output_tokens=output_tokens,
|
|
407
|
+
cached_input_tokens=cached_input_tokens,
|
|
408
|
+
reasoning_tokens=reasoning_tokens,
|
|
409
|
+
reasoning_effort=reasoning_effort,
|
|
410
|
+
latency_ms=latency_ms,
|
|
411
|
+
status=status,
|
|
412
|
+
error_code=error_code,
|
|
413
|
+
error_message=error_message,
|
|
414
|
+
)
|
|
415
|
+
except Exception:
|
|
416
|
+
logger.warning(
|
|
417
|
+
"Failed to persist request log account_id=%s request_id=%s",
|
|
418
|
+
account_id_value,
|
|
419
|
+
request_id,
|
|
420
|
+
exc_info=True,
|
|
421
|
+
)
|
|
394
422
|
|
|
395
423
|
async def _refresh_usage(self, accounts: list[Account]) -> None:
|
|
396
424
|
latest_usage = await self._usage_repo.latest_by_account(window="primary")
|
|
@@ -436,12 +464,9 @@ class ProxyService:
|
|
|
436
464
|
await self._handle_stream_error(account, _upstream_error_from_openai(error), code)
|
|
437
465
|
|
|
438
466
|
async def _handle_stream_error(self, account: Account, error: UpstreamError, code: str) -> None:
|
|
439
|
-
if code
|
|
467
|
+
if code in {"rate_limit_exceeded", "usage_limit_reached"}:
|
|
440
468
|
await self._load_balancer.mark_rate_limit(account, error)
|
|
441
469
|
return
|
|
442
|
-
if code == "usage_limit_reached":
|
|
443
|
-
await self._load_balancer.mark_quota_exceeded(account, error)
|
|
444
|
-
return
|
|
445
470
|
if code in {"insufficient_quota", "usage_not_included", "quota_exceeded"}:
|
|
446
471
|
await self._load_balancer.mark_quota_exceeded(account, error)
|
|
447
472
|
return
|
|
@@ -456,3 +481,102 @@ class _RetryableStreamError(Exception):
|
|
|
456
481
|
super().__init__(code)
|
|
457
482
|
self.code = code
|
|
458
483
|
self.error = error
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _maybe_log_proxy_request_shape(
|
|
487
|
+
kind: str,
|
|
488
|
+
payload: ResponsesRequest | ResponsesCompactRequest,
|
|
489
|
+
headers: Mapping[str, str],
|
|
490
|
+
) -> None:
|
|
491
|
+
settings = get_settings()
|
|
492
|
+
if not settings.log_proxy_request_shape:
|
|
493
|
+
return
|
|
494
|
+
|
|
495
|
+
request_id = get_request_id()
|
|
496
|
+
prompt_cache_key = getattr(payload, "prompt_cache_key", None)
|
|
497
|
+
if prompt_cache_key is None and payload.model_extra:
|
|
498
|
+
extra_value = payload.model_extra.get("prompt_cache_key")
|
|
499
|
+
if isinstance(extra_value, str):
|
|
500
|
+
prompt_cache_key = extra_value
|
|
501
|
+
prompt_cache_key_hash = _hash_identifier(prompt_cache_key) if isinstance(prompt_cache_key, str) else None
|
|
502
|
+
prompt_cache_key_raw = (
|
|
503
|
+
_truncate_identifier(prompt_cache_key)
|
|
504
|
+
if settings.log_proxy_request_shape_raw_cache_key and isinstance(prompt_cache_key, str)
|
|
505
|
+
else None
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
extra_keys = sorted(payload.model_extra.keys()) if payload.model_extra else []
|
|
509
|
+
fields_set = sorted(payload.model_fields_set)
|
|
510
|
+
input_summary = _summarize_input(payload.input)
|
|
511
|
+
header_keys = _interesting_header_keys(headers)
|
|
512
|
+
|
|
513
|
+
logger.warning(
|
|
514
|
+
"proxy_request_shape request_id=%s kind=%s model=%s stream=%s input=%s "
|
|
515
|
+
"prompt_cache_key=%s prompt_cache_key_raw=%s fields=%s extra=%s headers=%s",
|
|
516
|
+
request_id,
|
|
517
|
+
kind,
|
|
518
|
+
payload.model,
|
|
519
|
+
getattr(payload, "stream", None),
|
|
520
|
+
input_summary,
|
|
521
|
+
prompt_cache_key_hash,
|
|
522
|
+
prompt_cache_key_raw,
|
|
523
|
+
fields_set,
|
|
524
|
+
extra_keys,
|
|
525
|
+
header_keys,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _hash_identifier(value: str) -> str:
|
|
530
|
+
digest = sha256(value.encode("utf-8")).hexdigest()
|
|
531
|
+
return f"sha256:{digest[:12]}"
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _summarize_input(items: Sequence[object]) -> str:
|
|
535
|
+
if not items:
|
|
536
|
+
return "0"
|
|
537
|
+
type_counts: dict[str, int] = {}
|
|
538
|
+
for item in items:
|
|
539
|
+
type_name = type(item).__name__
|
|
540
|
+
type_counts[type_name] = type_counts.get(type_name, 0) + 1
|
|
541
|
+
summary = ",".join(f"{key}={type_counts[key]}" for key in sorted(type_counts))
|
|
542
|
+
return f"{len(items)}({summary})"
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _truncate_identifier(value: str, *, max_length: int = 96) -> str:
|
|
546
|
+
if len(value) <= max_length:
|
|
547
|
+
return value
|
|
548
|
+
return f"{value[:48]}...{value[-16:]}"
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _interesting_header_keys(headers: Mapping[str, str]) -> list[str]:
|
|
552
|
+
allowlist = {
|
|
553
|
+
"user-agent",
|
|
554
|
+
"x-request-id",
|
|
555
|
+
"request-id",
|
|
556
|
+
"x-openai-client-id",
|
|
557
|
+
"x-openai-client-version",
|
|
558
|
+
"x-openai-client-arch",
|
|
559
|
+
"x-openai-client-os",
|
|
560
|
+
"x-openai-client-user-agent",
|
|
561
|
+
"x-codex-session-id",
|
|
562
|
+
"x-codex-conversation-id",
|
|
563
|
+
}
|
|
564
|
+
return sorted({key.lower() for key in headers.keys() if key.lower() in allowlist})
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def _sticky_key_from_payload(payload: ResponsesRequest) -> str | None:
|
|
568
|
+
value = payload.prompt_cache_key
|
|
569
|
+
if not value:
|
|
570
|
+
return None
|
|
571
|
+
stripped = value.strip()
|
|
572
|
+
return stripped or None
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def _sticky_key_from_compact_payload(payload: ResponsesCompactRequest) -> str | None:
|
|
576
|
+
if not payload.model_extra:
|
|
577
|
+
return None
|
|
578
|
+
value = payload.model_extra.get("prompt_cache_key")
|
|
579
|
+
if not isinstance(value, str):
|
|
580
|
+
return None
|
|
581
|
+
stripped = value.strip()
|
|
582
|
+
return stripped or None
|