codex-lb 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. app/core/auth/__init__.py +10 -0
  2. app/core/balancer/logic.py +33 -6
  3. app/core/config/settings.py +2 -0
  4. app/core/usage/__init__.py +2 -0
  5. app/core/usage/logs.py +12 -2
  6. app/core/usage/quota.py +10 -4
  7. app/core/usage/types.py +3 -2
  8. app/db/migrations/__init__.py +14 -3
  9. app/db/migrations/versions/add_accounts_chatgpt_account_id.py +29 -0
  10. app/db/migrations/versions/add_accounts_reset_at.py +29 -0
  11. app/db/migrations/versions/add_dashboard_settings.py +31 -0
  12. app/db/migrations/versions/add_request_logs_reasoning_effort.py +21 -0
  13. app/db/models.py +33 -0
  14. app/db/session.py +71 -11
  15. app/dependencies.py +27 -1
  16. app/main.py +11 -2
  17. app/modules/accounts/auth_manager.py +44 -3
  18. app/modules/accounts/repository.py +14 -6
  19. app/modules/accounts/service.py +4 -2
  20. app/modules/oauth/service.py +4 -3
  21. app/modules/proxy/load_balancer.py +74 -5
  22. app/modules/proxy/service.py +155 -31
  23. app/modules/proxy/sticky_repository.py +56 -0
  24. app/modules/request_logs/repository.py +6 -3
  25. app/modules/request_logs/schemas.py +2 -0
  26. app/modules/request_logs/service.py +8 -1
  27. app/modules/settings/__init__.py +1 -0
  28. app/modules/settings/api.py +37 -0
  29. app/modules/settings/repository.py +40 -0
  30. app/modules/settings/schemas.py +13 -0
  31. app/modules/settings/service.py +33 -0
  32. app/modules/shared/schemas.py +16 -2
  33. app/modules/usage/schemas.py +1 -0
  34. app/modules/usage/service.py +17 -1
  35. app/modules/usage/updater.py +36 -7
  36. app/static/index.css +1024 -319
  37. app/static/index.html +461 -377
  38. app/static/index.js +327 -49
  39. {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/METADATA +33 -7
  40. {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/RECORD +43 -34
  41. app/static/7.css +0 -1336
  42. {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/WHEEL +0 -0
  43. {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/entry_points.txt +0 -0
  44. {codex_lb-0.2.0.dist-info → codex_lb-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
3
4
  from datetime import datetime
4
5
  from typing import Protocol
5
6
 
6
- from app.core.auth import DEFAULT_PLAN
7
+ from app.core.auth import DEFAULT_PLAN, OpenAIAuthClaims, extract_id_token_claims
7
8
  from app.core.auth.refresh import RefreshError, refresh_access_token, should_refresh
8
9
  from app.core.balancer import PERMANENT_FAILURE_CODES
9
10
  from app.core.crypto import TokenEncryptor
@@ -29,9 +30,13 @@ class AccountsRepositoryPort(Protocol):
29
30
  last_refresh: datetime,
30
31
  plan_type: str | None = None,
31
32
  email: str | None = None,
33
+ chatgpt_account_id: str | None = None,
32
34
  ) -> bool: ...
33
35
 
34
36
 
37
+ logger = logging.getLogger(__name__)
38
+
39
+
35
40
  class AuthManager:
36
41
  def __init__(self, repo: AccountsRepositoryPort) -> None:
37
42
  self._repo = repo
@@ -39,8 +44,8 @@ class AuthManager:
39
44
 
40
45
  async def ensure_fresh(self, account: Account, *, force: bool = False) -> Account:
41
46
  if force or should_refresh(account.last_refresh):
42
- return await self.refresh_account(account)
43
- return account
47
+ account = await self.refresh_account(account)
48
+ return await self._ensure_chatgpt_account_id(account)
44
49
 
45
50
  async def refresh_account(self, account: Account) -> Account:
46
51
  refresh_token = self._encryptor.decrypt(account.refresh_token_encrypted)
@@ -58,6 +63,8 @@ class AuthManager:
58
63
  account.refresh_token_encrypted = self._encryptor.encrypt(result.refresh_token)
59
64
  account.id_token_encrypted = self._encryptor.encrypt(result.id_token)
60
65
  account.last_refresh = utcnow()
66
+ if result.account_id:
67
+ account.chatgpt_account_id = result.account_id
61
68
  if result.plan_type is not None:
62
69
  account.plan_type = coerce_account_plan_type(
63
70
  result.plan_type,
@@ -76,5 +83,39 @@ class AuthManager:
76
83
  last_refresh=account.last_refresh,
77
84
  plan_type=account.plan_type,
78
85
  email=account.email,
86
+ chatgpt_account_id=account.chatgpt_account_id,
79
87
  )
80
88
  return account
89
+
90
+ async def _ensure_chatgpt_account_id(self, account: Account) -> Account:
91
+ if account.chatgpt_account_id:
92
+ return account
93
+ try:
94
+ id_token = self._encryptor.decrypt(account.id_token_encrypted)
95
+ except Exception:
96
+ return account
97
+ raw_account_id = _chatgpt_account_id_from_id_token(id_token)
98
+ if not raw_account_id:
99
+ return account
100
+
101
+ account.chatgpt_account_id = raw_account_id
102
+ try:
103
+ await self._repo.update_tokens(
104
+ account.id,
105
+ access_token_encrypted=account.access_token_encrypted,
106
+ refresh_token_encrypted=account.refresh_token_encrypted,
107
+ id_token_encrypted=account.id_token_encrypted,
108
+ last_refresh=account.last_refresh,
109
+ plan_type=account.plan_type,
110
+ email=account.email,
111
+ chatgpt_account_id=raw_account_id,
112
+ )
113
+ except Exception:
114
+ logger.warning("Failed to persist chatgpt_account_id account_id=%s", account.id, exc_info=True)
115
+ return account
116
+
117
+
118
+ def _chatgpt_account_id_from_id_token(id_token: str) -> str | None:
119
+ claims = extract_id_token_claims(id_token)
120
+ auth_claims = claims.auth or OpenAIAuthClaims()
121
+ return auth_claims.chatgpt_account_id or claims.chatgpt_account_id
@@ -19,6 +19,7 @@ class AccountsRepository:
19
19
  async def upsert(self, account: Account) -> Account:
20
20
  existing = await self._session.get(Account, account.id)
21
21
  if existing:
22
+ existing.chatgpt_account_id = account.chatgpt_account_id
22
23
  existing.email = account.email
23
24
  existing.plan_type = account.plan_type
24
25
  existing.access_token_encrypted = account.access_token_encrypted
@@ -41,19 +42,21 @@ class AccountsRepository:
41
42
  account_id: str,
42
43
  status: AccountStatus,
43
44
  deactivation_reason: str | None = None,
45
+ reset_at: int | None = None,
44
46
  ) -> bool:
45
47
  result = await self._session.execute(
46
48
  update(Account)
47
49
  .where(Account.id == account_id)
48
- .values(status=status, deactivation_reason=deactivation_reason)
50
+ .values(status=status, deactivation_reason=deactivation_reason, reset_at=reset_at)
51
+ .returning(Account.id)
49
52
  )
50
53
  await self._session.commit()
51
- return bool(getattr(result, "rowcount", 0) or 0)
54
+ return result.scalar_one_or_none() is not None
52
55
 
53
56
  async def delete(self, account_id: str) -> bool:
54
- result = await self._session.execute(delete(Account).where(Account.id == account_id))
57
+ result = await self._session.execute(delete(Account).where(Account.id == account_id).returning(Account.id))
55
58
  await self._session.commit()
56
- return bool(getattr(result, "rowcount", 0) or 0)
59
+ return result.scalar_one_or_none() is not None
57
60
 
58
61
  async def update_tokens(
59
62
  self,
@@ -64,6 +67,7 @@ class AccountsRepository:
64
67
  last_refresh: datetime,
65
68
  plan_type: str | None = None,
66
69
  email: str | None = None,
70
+ chatgpt_account_id: str | None = None,
67
71
  ) -> bool:
68
72
  values = {
69
73
  "access_token_encrypted": access_token_encrypted,
@@ -75,6 +79,10 @@ class AccountsRepository:
75
79
  values["plan_type"] = plan_type
76
80
  if email is not None:
77
81
  values["email"] = email
78
- result = await self._session.execute(update(Account).where(Account.id == account_id).values(**values))
82
+ if chatgpt_account_id is not None:
83
+ values["chatgpt_account_id"] = chatgpt_account_id
84
+ result = await self._session.execute(
85
+ update(Account).where(Account.id == account_id).values(**values).returning(Account.id)
86
+ )
79
87
  await self._session.commit()
80
- return bool(getattr(result, "rowcount", 0) or 0)
88
+ return result.scalar_one_or_none() is not None
@@ -9,7 +9,7 @@ from app.core.auth import (
9
9
  DEFAULT_PLAN,
10
10
  claims_from_auth,
11
11
  extract_id_token_claims,
12
- fallback_account_id,
12
+ generate_unique_account_id,
13
13
  parse_auth_json,
14
14
  )
15
15
  from app.core.crypto import TokenEncryptor
@@ -66,12 +66,14 @@ class AccountsService:
66
66
  claims = claims_from_auth(auth)
67
67
 
68
68
  email = claims.email or DEFAULT_EMAIL
69
+ raw_account_id = claims.account_id
70
+ account_id = generate_unique_account_id(raw_account_id, email)
69
71
  plan_type = coerce_account_plan_type(claims.plan_type, DEFAULT_PLAN)
70
- account_id = claims.account_id or fallback_account_id(email)
71
72
  last_refresh = to_utc_naive(auth.last_refresh_at) if auth.last_refresh_at else utcnow()
72
73
 
73
74
  account = Account(
74
75
  id=account_id,
76
+ chatgpt_account_id=raw_account_id,
75
77
  email=email,
76
78
  plan_type=plan_type,
77
79
  access_token_encrypted=self._encryptor.encrypt(auth.tokens.access_token),
@@ -15,7 +15,7 @@ from app.core.auth import (
15
15
  DEFAULT_PLAN,
16
16
  OpenAIAuthClaims,
17
17
  extract_id_token_claims,
18
- fallback_account_id,
18
+ generate_unique_account_id,
19
19
  )
20
20
  from app.core.clients.oauth import (
21
21
  OAuthError,
@@ -294,16 +294,17 @@ class OauthService:
294
294
  async def _persist_tokens(self, tokens: OAuthTokens) -> None:
295
295
  claims = extract_id_token_claims(tokens.id_token)
296
296
  auth_claims = claims.auth or OpenAIAuthClaims()
297
- account_id = auth_claims.chatgpt_account_id or claims.chatgpt_account_id
297
+ raw_account_id = auth_claims.chatgpt_account_id or claims.chatgpt_account_id
298
298
  email = claims.email or DEFAULT_EMAIL
299
+ account_id = generate_unique_account_id(raw_account_id, email)
299
300
  plan_type = coerce_account_plan_type(
300
301
  auth_claims.chatgpt_plan_type or claims.chatgpt_plan_type,
301
302
  DEFAULT_PLAN,
302
303
  )
303
- account_id = account_id or fallback_account_id(email)
304
304
 
305
305
  account = Account(
306
306
  id=account_id,
307
+ chatgpt_account_id=raw_account_id,
307
308
  email=email,
308
309
  plan_type=plan_type,
309
310
  access_token_encrypted=self._encryptor.encrypt(tokens.access_token),
@@ -6,6 +6,7 @@ from typing import Iterable
6
6
 
7
7
  from app.core.balancer import (
8
8
  AccountState,
9
+ SelectionResult,
9
10
  handle_permanent_failure,
10
11
  handle_quota_exceeded,
11
12
  handle_rate_limit,
@@ -15,6 +16,7 @@ from app.core.balancer.types import UpstreamError
15
16
  from app.core.usage.quota import apply_usage_quota
16
17
  from app.db.models import Account, UsageHistory
17
18
  from app.modules.accounts.repository import AccountsRepository
19
+ from app.modules.proxy.sticky_repository import StickySessionsRepository
18
20
  from app.modules.usage.repository import UsageRepository
19
21
  from app.modules.usage.updater import UsageUpdater
20
22
 
@@ -35,13 +37,25 @@ class AccountSelection:
35
37
 
36
38
 
37
39
  class LoadBalancer:
38
- def __init__(self, accounts_repo: AccountsRepository, usage_repo: UsageRepository) -> None:
40
+ def __init__(
41
+ self,
42
+ accounts_repo: AccountsRepository,
43
+ usage_repo: UsageRepository,
44
+ sticky_repo: StickySessionsRepository | None = None,
45
+ ) -> None:
39
46
  self._accounts_repo = accounts_repo
40
47
  self._usage_repo = usage_repo
41
48
  self._usage_updater = UsageUpdater(usage_repo, accounts_repo)
49
+ self._sticky_repo = sticky_repo
42
50
  self._runtime: dict[str, RuntimeState] = {}
43
51
 
44
- async def select_account(self) -> AccountSelection:
52
+ async def select_account(
53
+ self,
54
+ sticky_key: str | None = None,
55
+ *,
56
+ reallocate_sticky: bool = False,
57
+ prefer_earlier_reset_accounts: bool = False,
58
+ ) -> AccountSelection:
45
59
  accounts = await self._accounts_repo.list_accounts()
46
60
  latest_primary = await self._usage_repo.latest_by_account()
47
61
  await self._usage_updater.refresh_accounts(accounts, latest_primary)
@@ -55,7 +69,13 @@ class LoadBalancer:
55
69
  runtime=self._runtime,
56
70
  )
57
71
 
58
- result = select_account(states)
72
+ result = await self._select_with_stickiness(
73
+ states=states,
74
+ account_map=account_map,
75
+ sticky_key=sticky_key,
76
+ reallocate_sticky=reallocate_sticky,
77
+ prefer_earlier_reset_accounts=prefer_earlier_reset_accounts,
78
+ )
59
79
  for state in states:
60
80
  account = account_map.get(state.account_id)
61
81
  if account:
@@ -74,6 +94,39 @@ class LoadBalancer:
74
94
  return AccountSelection(account=None, error_message=result.error_message)
75
95
  return AccountSelection(account=selected, error_message=None)
76
96
 
97
+ async def _select_with_stickiness(
98
+ self,
99
+ *,
100
+ states: list[AccountState],
101
+ account_map: dict[str, Account],
102
+ sticky_key: str | None,
103
+ reallocate_sticky: bool,
104
+ prefer_earlier_reset_accounts: bool,
105
+ ) -> SelectionResult:
106
+ if not sticky_key or not self._sticky_repo:
107
+ return select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
108
+
109
+ if reallocate_sticky:
110
+ chosen = select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
111
+ if chosen.account is not None and chosen.account.account_id in account_map:
112
+ await self._sticky_repo.upsert(sticky_key, chosen.account.account_id)
113
+ return chosen
114
+
115
+ existing = await self._sticky_repo.get_account_id(sticky_key)
116
+ if existing:
117
+ pinned = next((state for state in states if state.account_id == existing), None)
118
+ if pinned is None:
119
+ await self._sticky_repo.delete(sticky_key)
120
+ else:
121
+ pinned_result = select_account([pinned], prefer_earlier_reset=prefer_earlier_reset_accounts)
122
+ if pinned_result.account is not None:
123
+ return pinned_result
124
+
125
+ chosen = select_account(states, prefer_earlier_reset=prefer_earlier_reset_accounts)
126
+ if chosen.account is not None and chosen.account.account_id in account_map:
127
+ await self._sticky_repo.upsert(sticky_key, chosen.account.account_id)
128
+ return chosen
129
+
77
130
  async def mark_rate_limit(self, account: Account, error: UpstreamError) -> None:
78
131
  state = self._state_for(account)
79
132
  handle_rate_limit(state, error)
@@ -103,6 +156,8 @@ class LoadBalancer:
103
156
  used_percent=None,
104
157
  reset_at=runtime.reset_at,
105
158
  cooldown_until=runtime.cooldown_until,
159
+ secondary_used_percent=None,
160
+ secondary_reset_at=None,
106
161
  last_error_at=runtime.last_error_at,
107
162
  last_selected_at=runtime.last_selected_at,
108
163
  error_count=runtime.error_count,
@@ -116,14 +171,21 @@ class LoadBalancer:
116
171
  runtime.last_error_at = state.last_error_at
117
172
  runtime.error_count = state.error_count
118
173
 
119
- if account.status != state.status or account.deactivation_reason != state.deactivation_reason:
174
+ reset_at_int = int(state.reset_at) if state.reset_at else None
175
+ status_changed = account.status != state.status
176
+ reason_changed = account.deactivation_reason != state.deactivation_reason
177
+ reset_changed = account.reset_at != reset_at_int
178
+
179
+ if status_changed or reason_changed or reset_changed:
120
180
  await self._accounts_repo.update_status(
121
181
  account.id,
122
182
  state.status,
123
183
  state.deactivation_reason,
184
+ reset_at_int,
124
185
  )
125
186
  account.status = state.status
126
187
  account.deactivation_reason = state.deactivation_reason
188
+ account.reset_at = reset_at_int
127
189
 
128
190
 
129
191
  def _build_states(
@@ -161,12 +223,17 @@ def _state_from_account(
161
223
  secondary_used = secondary_entry.used_percent if secondary_entry else None
162
224
  secondary_reset = secondary_entry.reset_at if secondary_entry else None
163
225
 
226
+ # Use account.reset_at from DB as the authoritative source for runtime reset
227
+ # This survives across requests since LoadBalancer is instantiated per-request
228
+ db_reset_at = float(account.reset_at) if account.reset_at else None
229
+ effective_runtime_reset = db_reset_at or runtime.reset_at
230
+
164
231
  status, used_percent, reset_at = apply_usage_quota(
165
232
  status=account.status,
166
233
  primary_used=primary_used,
167
234
  primary_reset=primary_reset,
168
235
  primary_window_minutes=primary_window_minutes,
169
- runtime_reset=runtime.reset_at,
236
+ runtime_reset=effective_runtime_reset,
170
237
  secondary_used=secondary_used,
171
238
  secondary_reset=secondary_reset,
172
239
  )
@@ -177,6 +244,8 @@ def _state_from_account(
177
244
  used_percent=used_percent,
178
245
  reset_at=reset_at,
179
246
  cooldown_until=runtime.cooldown_until,
247
+ secondary_used_percent=secondary_used,
248
+ secondary_reset_at=secondary_reset,
180
249
  last_error_at=runtime.last_error_at,
181
250
  last_selected_at=runtime.last_selected_at,
182
251
  error_count=runtime.error_count,
@@ -2,9 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import time
5
+ from collections.abc import Sequence
5
6
  from datetime import timedelta
7
+ from hashlib import sha256
6
8
  from typing import AsyncIterator, Mapping
7
9
 
10
+ import anyio
11
+
8
12
  from app.core import usage as usage_core
9
13
  from app.core.auth.refresh import RefreshError
10
14
  from app.core.balancer import PERMANENT_FAILURE_CODES
@@ -12,13 +16,14 @@ from app.core.balancer.types import UpstreamError
12
16
  from app.core.clients.proxy import ProxyResponseError, filter_inbound_headers
13
17
  from app.core.clients.proxy import compact_responses as core_compact_responses
14
18
  from app.core.clients.proxy import stream_responses as core_stream_responses
19
+ from app.core.config.settings import get_settings
15
20
  from app.core.crypto import TokenEncryptor
16
21
  from app.core.errors import openai_error, response_failed_event
17
22
  from app.core.openai.models import OpenAIResponsePayload
18
23
  from app.core.openai.parsing import parse_sse_event
19
24
  from app.core.openai.requests import ResponsesCompactRequest, ResponsesRequest
20
25
  from app.core.usage.types import UsageWindowRow
21
- from app.core.utils.request_id import ensure_request_id
26
+ from app.core.utils.request_id import ensure_request_id, get_request_id
22
27
  from app.core.utils.sse import format_sse_event
23
28
  from app.core.utils.time import utcnow
24
29
  from app.db.models import Account, UsageHistory
@@ -40,8 +45,10 @@ from app.modules.proxy.helpers import (
40
45
  _window_snapshot,
41
46
  )
42
47
  from app.modules.proxy.load_balancer import LoadBalancer
48
+ from app.modules.proxy.sticky_repository import StickySessionsRepository
43
49
  from app.modules.proxy.types import RateLimitStatusPayloadData
44
50
  from app.modules.request_logs.repository import RequestLogsRepository
51
+ from app.modules.settings.repository import SettingsRepository
45
52
  from app.modules.usage.repository import UsageRepository
46
53
  from app.modules.usage.updater import UsageUpdater
47
54
 
@@ -54,13 +61,16 @@ class ProxyService:
54
61
  accounts_repo: AccountsRepository,
55
62
  usage_repo: UsageRepository,
56
63
  logs_repo: RequestLogsRepository,
64
+ sticky_repo: StickySessionsRepository,
65
+ settings_repo: SettingsRepository,
57
66
  ) -> None:
58
67
  self._accounts_repo = accounts_repo
59
68
  self._usage_repo = usage_repo
60
69
  self._logs_repo = logs_repo
70
+ self._settings_repo = settings_repo
61
71
  self._encryptor = TokenEncryptor()
62
72
  self._auth_manager = AuthManager(accounts_repo)
63
- self._load_balancer = LoadBalancer(accounts_repo, usage_repo)
73
+ self._load_balancer = LoadBalancer(accounts_repo, usage_repo, sticky_repo)
64
74
  self._usage_updater = UsageUpdater(usage_repo, accounts_repo)
65
75
 
66
76
  def stream_responses(
@@ -70,6 +80,7 @@ class ProxyService:
70
80
  *,
71
81
  propagate_http_errors: bool = False,
72
82
  ) -> AsyncIterator[str]:
83
+ _maybe_log_proxy_request_shape("stream", payload, headers)
73
84
  filtered = filter_inbound_headers(headers)
74
85
  return self._stream_with_retry(
75
86
  payload,
@@ -82,8 +93,16 @@ class ProxyService:
82
93
  payload: ResponsesCompactRequest,
83
94
  headers: Mapping[str, str],
84
95
  ) -> OpenAIResponsePayload:
96
+ _maybe_log_proxy_request_shape("compact", payload, headers)
85
97
  filtered = filter_inbound_headers(headers)
86
- selection = await self._load_balancer.select_account()
98
+ settings = await self._settings_repo.get_or_create()
99
+ prefer_earlier_reset = settings.prefer_earlier_reset_accounts
100
+ sticky_key = _sticky_key_from_compact_payload(payload) if settings.sticky_threads_enabled else None
101
+ selection = await self._load_balancer.select_account(
102
+ sticky_key=sticky_key,
103
+ reallocate_sticky=sticky_key is not None,
104
+ prefer_earlier_reset_accounts=prefer_earlier_reset,
105
+ )
87
106
  account = selection.account
88
107
  if not account:
89
108
  raise ProxyResponseError(
@@ -91,7 +110,7 @@ class ProxyService:
91
110
  openai_error("no_accounts", selection.error_message or "No active accounts available"),
92
111
  )
93
112
  account = await self._ensure_fresh(account)
94
- account_id = _header_account_id(account.id)
113
+ account_id = _header_account_id(account.chatgpt_account_id)
95
114
 
96
115
  async def _call_compact(target: Account) -> OpenAIResponsePayload:
97
116
  access_token = self._encryptor.decrypt(target.access_token_encrypted)
@@ -189,9 +208,15 @@ class ProxyService:
189
208
  propagate_http_errors: bool,
190
209
  ) -> AsyncIterator[str]:
191
210
  request_id = ensure_request_id()
211
+ settings = await self._settings_repo.get_or_create()
212
+ prefer_earlier_reset = settings.prefer_earlier_reset_accounts
213
+ sticky_key = _sticky_key_from_payload(payload) if settings.sticky_threads_enabled else None
192
214
  max_attempts = 3
193
215
  for attempt in range(max_attempts):
194
- selection = await self._load_balancer.select_account()
216
+ selection = await self._load_balancer.select_account(
217
+ sticky_key=sticky_key,
218
+ prefer_earlier_reset_accounts=prefer_earlier_reset,
219
+ )
195
220
  account = selection.account
196
221
  if not account:
197
222
  event = response_failed_event(
@@ -289,8 +314,9 @@ class ProxyService:
289
314
  ) -> AsyncIterator[str]:
290
315
  account_id_value = account.id
291
316
  access_token = self._encryptor.decrypt(account.access_token_encrypted)
292
- account_id = _header_account_id(account_id_value)
317
+ account_id = _header_account_id(account.chatgpt_account_id)
293
318
  model = payload.model
319
+ reasoning_effort = payload.reasoning.effort if payload.reasoning else None
294
320
  start = time.monotonic()
295
321
  status = "success"
296
322
  error_code = None
@@ -370,27 +396,29 @@ class ProxyService:
370
396
  reasoning_tokens = (
371
397
  usage.output_tokens_details.reasoning_tokens if usage and usage.output_tokens_details else None
372
398
  )
373
- try:
374
- await self._logs_repo.add_log(
375
- account_id=account_id_value,
376
- request_id=request_id,
377
- model=model,
378
- input_tokens=input_tokens,
379
- output_tokens=output_tokens,
380
- cached_input_tokens=cached_input_tokens,
381
- reasoning_tokens=reasoning_tokens,
382
- latency_ms=latency_ms,
383
- status=status,
384
- error_code=error_code,
385
- error_message=error_message,
386
- )
387
- except Exception:
388
- logger.warning(
389
- "Failed to persist request log account_id=%s request_id=%s",
390
- account_id_value,
391
- request_id,
392
- exc_info=True,
393
- )
399
+ with anyio.CancelScope(shield=True):
400
+ try:
401
+ await self._logs_repo.add_log(
402
+ account_id=account_id_value,
403
+ request_id=request_id,
404
+ model=model,
405
+ input_tokens=input_tokens,
406
+ output_tokens=output_tokens,
407
+ cached_input_tokens=cached_input_tokens,
408
+ reasoning_tokens=reasoning_tokens,
409
+ reasoning_effort=reasoning_effort,
410
+ latency_ms=latency_ms,
411
+ status=status,
412
+ error_code=error_code,
413
+ error_message=error_message,
414
+ )
415
+ except Exception:
416
+ logger.warning(
417
+ "Failed to persist request log account_id=%s request_id=%s",
418
+ account_id_value,
419
+ request_id,
420
+ exc_info=True,
421
+ )
394
422
 
395
423
  async def _refresh_usage(self, accounts: list[Account]) -> None:
396
424
  latest_usage = await self._usage_repo.latest_by_account(window="primary")
@@ -436,12 +464,9 @@ class ProxyService:
436
464
  await self._handle_stream_error(account, _upstream_error_from_openai(error), code)
437
465
 
438
466
  async def _handle_stream_error(self, account: Account, error: UpstreamError, code: str) -> None:
439
- if code == "rate_limit_exceeded":
467
+ if code in {"rate_limit_exceeded", "usage_limit_reached"}:
440
468
  await self._load_balancer.mark_rate_limit(account, error)
441
469
  return
442
- if code == "usage_limit_reached":
443
- await self._load_balancer.mark_quota_exceeded(account, error)
444
- return
445
470
  if code in {"insufficient_quota", "usage_not_included", "quota_exceeded"}:
446
471
  await self._load_balancer.mark_quota_exceeded(account, error)
447
472
  return
@@ -456,3 +481,102 @@ class _RetryableStreamError(Exception):
456
481
  super().__init__(code)
457
482
  self.code = code
458
483
  self.error = error
484
+
485
+
486
+ def _maybe_log_proxy_request_shape(
487
+ kind: str,
488
+ payload: ResponsesRequest | ResponsesCompactRequest,
489
+ headers: Mapping[str, str],
490
+ ) -> None:
491
+ settings = get_settings()
492
+ if not settings.log_proxy_request_shape:
493
+ return
494
+
495
+ request_id = get_request_id()
496
+ prompt_cache_key = getattr(payload, "prompt_cache_key", None)
497
+ if prompt_cache_key is None and payload.model_extra:
498
+ extra_value = payload.model_extra.get("prompt_cache_key")
499
+ if isinstance(extra_value, str):
500
+ prompt_cache_key = extra_value
501
+ prompt_cache_key_hash = _hash_identifier(prompt_cache_key) if isinstance(prompt_cache_key, str) else None
502
+ prompt_cache_key_raw = (
503
+ _truncate_identifier(prompt_cache_key)
504
+ if settings.log_proxy_request_shape_raw_cache_key and isinstance(prompt_cache_key, str)
505
+ else None
506
+ )
507
+
508
+ extra_keys = sorted(payload.model_extra.keys()) if payload.model_extra else []
509
+ fields_set = sorted(payload.model_fields_set)
510
+ input_summary = _summarize_input(payload.input)
511
+ header_keys = _interesting_header_keys(headers)
512
+
513
+ logger.warning(
514
+ "proxy_request_shape request_id=%s kind=%s model=%s stream=%s input=%s "
515
+ "prompt_cache_key=%s prompt_cache_key_raw=%s fields=%s extra=%s headers=%s",
516
+ request_id,
517
+ kind,
518
+ payload.model,
519
+ getattr(payload, "stream", None),
520
+ input_summary,
521
+ prompt_cache_key_hash,
522
+ prompt_cache_key_raw,
523
+ fields_set,
524
+ extra_keys,
525
+ header_keys,
526
+ )
527
+
528
+
529
+ def _hash_identifier(value: str) -> str:
530
+ digest = sha256(value.encode("utf-8")).hexdigest()
531
+ return f"sha256:{digest[:12]}"
532
+
533
+
534
+ def _summarize_input(items: Sequence[object]) -> str:
535
+ if not items:
536
+ return "0"
537
+ type_counts: dict[str, int] = {}
538
+ for item in items:
539
+ type_name = type(item).__name__
540
+ type_counts[type_name] = type_counts.get(type_name, 0) + 1
541
+ summary = ",".join(f"{key}={type_counts[key]}" for key in sorted(type_counts))
542
+ return f"{len(items)}({summary})"
543
+
544
+
545
+ def _truncate_identifier(value: str, *, max_length: int = 96) -> str:
546
+ if len(value) <= max_length:
547
+ return value
548
+ return f"{value[:48]}...{value[-16:]}"
549
+
550
+
551
+ def _interesting_header_keys(headers: Mapping[str, str]) -> list[str]:
552
+ allowlist = {
553
+ "user-agent",
554
+ "x-request-id",
555
+ "request-id",
556
+ "x-openai-client-id",
557
+ "x-openai-client-version",
558
+ "x-openai-client-arch",
559
+ "x-openai-client-os",
560
+ "x-openai-client-user-agent",
561
+ "x-codex-session-id",
562
+ "x-codex-conversation-id",
563
+ }
564
+ return sorted({key.lower() for key in headers.keys() if key.lower() in allowlist})
565
+
566
+
567
+ def _sticky_key_from_payload(payload: ResponsesRequest) -> str | None:
568
+ value = payload.prompt_cache_key
569
+ if not value:
570
+ return None
571
+ stripped = value.strip()
572
+ return stripped or None
573
+
574
+
575
+ def _sticky_key_from_compact_payload(payload: ResponsesCompactRequest) -> str | None:
576
+ if not payload.model_extra:
577
+ return None
578
+ value = payload.model_extra.get("prompt_cache_key")
579
+ if not isinstance(value, str):
580
+ return None
581
+ stripped = value.strip()
582
+ return stripped or None