codex-lb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,9 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  import time
5
5
  from datetime import timedelta
6
- from typing import AsyncIterator, Iterable, Mapping
7
-
8
- from pydantic import ValidationError
6
+ from typing import AsyncIterator, Mapping
9
7
 
10
8
  from app.core import usage as usage_core
11
9
  from app.core.auth.refresh import RefreshError
@@ -15,27 +13,37 @@ from app.core.clients.proxy import ProxyResponseError, filter_inbound_headers
15
13
  from app.core.clients.proxy import compact_responses as core_compact_responses
16
14
  from app.core.clients.proxy import stream_responses as core_stream_responses
17
15
  from app.core.crypto import TokenEncryptor
18
- from app.core.errors import OpenAIErrorDetail, OpenAIErrorEnvelope, openai_error, response_failed_event
19
- from app.core.openai.models import OpenAIError, OpenAIResponsePayload
16
+ from app.core.errors import openai_error, response_failed_event
17
+ from app.core.openai.models import OpenAIResponsePayload
20
18
  from app.core.openai.parsing import parse_sse_event
21
19
  from app.core.openai.requests import ResponsesCompactRequest, ResponsesRequest
22
- from app.core.usage.types import UsageWindowRow, UsageWindowSummary
20
+ from app.core.usage.types import UsageWindowRow
23
21
  from app.core.utils.request_id import ensure_request_id
24
22
  from app.core.utils.sse import format_sse_event
25
23
  from app.core.utils.time import utcnow
26
- from app.db.models import Account, AccountStatus, UsageHistory
24
+ from app.db.models import Account, UsageHistory
25
+ from app.modules.accounts.auth_manager import AuthManager
27
26
  from app.modules.accounts.repository import AccountsRepository
28
- from app.modules.proxy.auth_manager import AuthManager
29
- from app.modules.proxy.load_balancer import LoadBalancer
30
- from app.modules.proxy.types import (
31
- CreditStatusDetailsData,
32
- RateLimitStatusDetailsData,
33
- RateLimitStatusPayloadData,
34
- RateLimitWindowSnapshotData,
27
+ from app.modules.proxy.helpers import (
28
+ _apply_error_metadata,
29
+ _credits_headers,
30
+ _credits_snapshot,
31
+ _header_account_id,
32
+ _normalize_error_code,
33
+ _parse_openai_error,
34
+ _plan_type_for_accounts,
35
+ _rate_limit_details,
36
+ _rate_limit_headers,
37
+ _select_accounts_for_limits,
38
+ _summarize_window,
39
+ _upstream_error_from_openai,
40
+ _window_snapshot,
35
41
  )
36
- from app.modules.proxy.usage_updater import UsageUpdater
42
+ from app.modules.proxy.load_balancer import LoadBalancer
43
+ from app.modules.proxy.types import RateLimitStatusPayloadData
37
44
  from app.modules.request_logs.repository import RequestLogsRepository
38
45
  from app.modules.usage.repository import UsageRepository
46
+ from app.modules.usage.updater import UsageUpdater
39
47
 
40
48
  logger = logging.getLogger(__name__)
41
49
 
@@ -304,7 +312,11 @@ class ProxyService:
304
312
  return
305
313
  event = parse_sse_event(first)
306
314
  if event and event.type in ("response.failed", "error"):
307
- error = event.response.error if event.type == "response.failed" else event.error
315
+ if event.type == "response.failed":
316
+ response = event.response
317
+ error = response.error if response else None
318
+ else:
319
+ error = event.error
308
320
  code = _normalize_error_code(
309
321
  error.code if error else None,
310
322
  error.type if error else None,
@@ -326,7 +338,11 @@ class ProxyService:
326
338
  event_type = event.type
327
339
  if event_type in ("response.failed", "error"):
328
340
  status = "error"
329
- error = event.response.error if event_type == "response.failed" else event.error
341
+ if event_type == "response.failed":
342
+ response = event.response
343
+ error = response.error if response else None
344
+ else:
345
+ error = event.error
330
346
  error_code = _normalize_error_code(
331
347
  error.code if error else None,
332
348
  error.type if error else None,
@@ -420,9 +436,12 @@ class ProxyService:
420
436
  await self._handle_stream_error(account, _upstream_error_from_openai(error), code)
421
437
 
422
438
  async def _handle_stream_error(self, account: Account, error: UpstreamError, code: str) -> None:
423
- if code in {"rate_limit_exceeded", "usage_limit_reached"}:
439
+ if code == "rate_limit_exceeded":
424
440
  await self._load_balancer.mark_rate_limit(account, error)
425
441
  return
442
+ if code == "usage_limit_reached":
443
+ await self._load_balancer.mark_quota_exceeded(account, error)
444
+ return
426
445
  if code in {"insufficient_quota", "usage_not_included", "quota_exceeded"}:
427
446
  await self._load_balancer.mark_quota_exceeded(account, error)
428
447
  return
@@ -432,297 +451,8 @@ class ProxyService:
432
451
  await self._load_balancer.record_error(account)
433
452
 
434
453
 
435
- def _header_account_id(account_id: str | None) -> str | None:
436
- if not account_id:
437
- return None
438
- if account_id.startswith(("email_", "local_")):
439
- return None
440
- return account_id
441
-
442
-
443
- KNOWN_PLAN_TYPES = {
444
- "guest",
445
- "free",
446
- "go",
447
- "plus",
448
- "pro",
449
- "free_workspace",
450
- "team",
451
- "business",
452
- "education",
453
- "quorum",
454
- "k12",
455
- "enterprise",
456
- "edu",
457
- }
458
-
459
- PLAN_TYPE_PRIORITY = (
460
- "enterprise",
461
- "business",
462
- "team",
463
- "pro",
464
- "plus",
465
- "education",
466
- "edu",
467
- "free_workspace",
468
- "free",
469
- "go",
470
- "guest",
471
- "quorum",
472
- "k12",
473
- )
474
-
475
-
476
- def _select_accounts_for_limits(accounts: Iterable[Account]) -> list[Account]:
477
- return [account for account in accounts if account.status not in (AccountStatus.DEACTIVATED, AccountStatus.PAUSED)]
478
-
479
-
480
- def _summarize_window(
481
- rows: list[UsageWindowRow],
482
- account_map: dict[str, Account],
483
- window: str,
484
- ) -> UsageWindowSummary | None:
485
- if not rows:
486
- return None
487
- return usage_core.summarize_usage_window(rows, account_map, window)
488
-
489
-
490
- def _window_snapshot(
491
- summary: UsageWindowSummary | None,
492
- rows: list[UsageWindowRow],
493
- window: str,
494
- now_epoch: int,
495
- ) -> RateLimitWindowSnapshotData | None:
496
- if summary is None:
497
- return None
498
-
499
- used_percent = _normalize_used_percent(summary.used_percent, rows)
500
- if used_percent is None:
501
- return None
502
-
503
- reset_at = summary.reset_at
504
- if reset_at is None:
505
- return None
506
-
507
- window_minutes = summary.window_minutes or usage_core.default_window_minutes(window)
508
- if not window_minutes:
509
- return None
510
-
511
- limit_window_seconds = int(window_minutes * 60)
512
- reset_after_seconds = max(0, int(reset_at) - now_epoch)
513
-
514
- return RateLimitWindowSnapshotData(
515
- used_percent=_percent_to_int(used_percent),
516
- limit_window_seconds=limit_window_seconds,
517
- reset_after_seconds=reset_after_seconds,
518
- reset_at=int(reset_at),
519
- )
520
-
521
-
522
- def _normalize_used_percent(
523
- value: float | None,
524
- rows: Iterable[UsageWindowRow],
525
- ) -> float | None:
526
- if value is not None:
527
- return value
528
- values = [row.used_percent for row in rows if row.used_percent is not None]
529
- if not values:
530
- return None
531
- return sum(values) / len(values)
532
-
533
-
534
- def _percent_to_int(value: float) -> int:
535
- bounded = max(0.0, min(100.0, value))
536
- return int(bounded)
537
-
538
-
539
- def _rate_limit_details(
540
- primary: RateLimitWindowSnapshotData | None,
541
- secondary: RateLimitWindowSnapshotData | None,
542
- ) -> RateLimitStatusDetailsData | None:
543
- if not primary and not secondary:
544
- return None
545
- used_percents = [window.used_percent for window in (primary, secondary) if window]
546
- limit_reached = any(used >= 100 for used in used_percents)
547
- return RateLimitStatusDetailsData(
548
- allowed=not limit_reached,
549
- limit_reached=limit_reached,
550
- primary_window=primary,
551
- secondary_window=secondary,
552
- )
553
-
554
-
555
- def _aggregate_credits(entries: Iterable[UsageHistory]) -> tuple[bool, bool, float] | None:
556
- has_data = False
557
- has_credits = False
558
- unlimited = False
559
- balance_total = 0.0
560
-
561
- for entry in entries:
562
- credits_has = entry.credits_has
563
- credits_unlimited = entry.credits_unlimited
564
- credits_balance = entry.credits_balance
565
- if credits_has is None and credits_unlimited is None and credits_balance is None:
566
- continue
567
- has_data = True
568
- if credits_has is True:
569
- has_credits = True
570
- if credits_unlimited is True:
571
- unlimited = True
572
- if credits_balance is not None and not credits_unlimited:
573
- try:
574
- balance_total += float(credits_balance)
575
- except (TypeError, ValueError):
576
- continue
577
-
578
- if not has_data:
579
- return None
580
- if unlimited:
581
- has_credits = True
582
- return has_credits, unlimited, balance_total
583
-
584
-
585
- def _credits_snapshot(entries: Iterable[UsageHistory]) -> CreditStatusDetailsData | None:
586
- aggregate = _aggregate_credits(entries)
587
- if aggregate is None:
588
- return None
589
- has_credits, unlimited, balance_total = aggregate
590
- balance_value = str(round(balance_total, 2))
591
- return CreditStatusDetailsData(
592
- has_credits=has_credits,
593
- unlimited=unlimited,
594
- balance=balance_value,
595
- approx_local_messages=None,
596
- approx_cloud_messages=None,
597
- )
598
-
599
-
600
- def _plan_type_for_accounts(accounts: Iterable[Account]) -> str:
601
- normalized = [_normalize_plan_type(account.plan_type) for account in accounts]
602
- filtered = [plan for plan in normalized if plan is not None]
603
- if not filtered:
604
- return "guest"
605
- unique = set(filtered)
606
- if len(unique) == 1:
607
- return filtered[0]
608
- for plan in PLAN_TYPE_PRIORITY:
609
- if plan in unique:
610
- return plan
611
- return "guest"
612
-
613
-
614
- def _normalize_plan_type(value: str | None) -> str | None:
615
- if not value:
616
- return None
617
- normalized = value.strip().lower()
618
- if normalized not in KNOWN_PLAN_TYPES:
619
- return None
620
- return normalized
621
-
622
-
623
- def _rate_limit_headers(
624
- window_label: str,
625
- summary: UsageWindowSummary,
626
- ) -> dict[str, str]:
627
- used_percent = summary.used_percent
628
- window_minutes = summary.window_minutes
629
- if used_percent is None or window_minutes is None:
630
- return {}
631
- headers = {
632
- f"x-codex-{window_label}-used-percent": str(float(used_percent)),
633
- f"x-codex-{window_label}-window-minutes": str(int(window_minutes)),
634
- }
635
- reset_at = summary.reset_at
636
- if reset_at is not None:
637
- headers[f"x-codex-{window_label}-reset-at"] = str(int(reset_at))
638
- return headers
639
-
640
-
641
- def _credits_headers(entries: Iterable[UsageHistory]) -> dict[str, str]:
642
- aggregate = _aggregate_credits(entries)
643
- if aggregate is None:
644
- return {}
645
- has_credits, unlimited, balance_total = aggregate
646
- balance_value = f"{balance_total:.2f}"
647
- return {
648
- "x-codex-credits-has-credits": "true" if has_credits else "false",
649
- "x-codex-credits-unlimited": "true" if unlimited else "false",
650
- "x-codex-credits-balance": balance_value,
651
- }
652
-
653
-
654
- def _normalize_error_code(code: str | None, error_type: str | None) -> str:
655
- value = code or error_type
656
- if not value:
657
- return "upstream_error"
658
- return value.lower()
659
-
660
-
661
- def _parse_openai_error(payload: OpenAIErrorEnvelope) -> OpenAIError | None:
662
- error = payload.get("error")
663
- if not error:
664
- return None
665
- try:
666
- return OpenAIError.model_validate(error)
667
- except ValidationError:
668
- if not isinstance(error, dict):
669
- return None
670
- return OpenAIError(
671
- message=_coerce_str(error.get("message")),
672
- type=_coerce_str(error.get("type")),
673
- code=_coerce_str(error.get("code")),
674
- param=_coerce_str(error.get("param")),
675
- plan_type=_coerce_str(error.get("plan_type")),
676
- resets_at=_coerce_number(error.get("resets_at")),
677
- resets_in_seconds=_coerce_number(error.get("resets_in_seconds")),
678
- )
679
-
680
-
681
- def _coerce_str(value: object) -> str | None:
682
- return value if isinstance(value, str) else None
683
-
684
-
685
- def _coerce_number(value: object) -> int | float | None:
686
- if isinstance(value, (int, float)):
687
- return value
688
- if isinstance(value, str):
689
- try:
690
- return float(value.strip())
691
- except ValueError:
692
- return None
693
- return None
694
-
695
-
696
- def _apply_error_metadata(target: OpenAIErrorDetail, error: OpenAIError | None) -> None:
697
- if not error:
698
- return
699
- if error.plan_type is not None:
700
- target["plan_type"] = error.plan_type
701
- if error.resets_at is not None:
702
- target["resets_at"] = error.resets_at
703
- if error.resets_in_seconds is not None:
704
- target["resets_in_seconds"] = error.resets_in_seconds
705
-
706
-
707
454
  class _RetryableStreamError(Exception):
708
455
  def __init__(self, code: str, error: UpstreamError) -> None:
709
456
  super().__init__(code)
710
457
  self.code = code
711
458
  self.error = error
712
-
713
-
714
- def _upstream_error_from_openai(error: OpenAIError | None) -> UpstreamError:
715
- if not error:
716
- return {}
717
- data = error.model_dump(exclude_none=True)
718
- payload: UpstreamError = {}
719
- message = data.get("message")
720
- if isinstance(message, str):
721
- payload["message"] = message
722
- resets_at = data.get("resets_at")
723
- if isinstance(resets_at, (int, float)):
724
- payload["resets_at"] = resets_at
725
- resets_in_seconds = data.get("resets_in_seconds")
726
- if isinstance(resets_in_seconds, (int, float)):
727
- payload["resets_in_seconds"] = resets_in_seconds
728
- return payload
@@ -1,8 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from datetime import datetime
4
+ from typing import cast
4
5
 
5
- from app.core.usage.logs import cost_from_log, total_tokens_from_log
6
+ from app.core.usage.logs import RequestLogLike, cost_from_log, total_tokens_from_log
6
7
  from app.db.models import RequestLog
7
8
  from app.modules.request_logs.repository import RequestLogsRepository
8
9
  from app.modules.request_logs.schemas import RequestLogEntry
@@ -63,6 +64,7 @@ def _log_status(log: RequestLog) -> str:
63
64
 
64
65
 
65
66
  def _to_entry(log: RequestLog) -> RequestLogEntry:
67
+ log_like = cast(RequestLogLike, log)
66
68
  return RequestLogEntry(
67
69
  requested_at=log.requested_at,
68
70
  account_id=log.account_id,
@@ -71,7 +73,7 @@ def _to_entry(log: RequestLog) -> RequestLogEntry:
71
73
  status=_log_status(log),
72
74
  error_code=log.error_code,
73
75
  error_message=log.error_message,
74
- tokens=total_tokens_from_log(log),
75
- cost_usd=cost_from_log(log, precision=6),
76
+ tokens=total_tokens_from_log(log_like),
77
+ cost_usd=cost_from_log(log_like, precision=6),
76
78
  latency_ms=log.latency_ms,
77
79
  )
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from datetime import timedelta
4
+ from typing import cast
4
5
 
5
6
  from app.core import usage as usage_core
6
- from app.core.usage.logs import cost_from_log, total_tokens_from_log, usage_tokens_from_log
7
+ from app.core.usage.logs import RequestLogLike, cost_from_log, total_tokens_from_log, usage_tokens_from_log
7
8
  from app.core.usage.pricing import CostItem, calculate_costs
8
9
  from app.core.usage.types import (
9
10
  UsageCostSummary,
@@ -15,7 +16,6 @@ from app.core.usage.types import (
15
16
  from app.core.utils.time import from_epoch_seconds, utcnow
16
17
  from app.db.models import Account, RequestLog
17
18
  from app.modules.accounts.repository import AccountsRepository
18
- from app.modules.proxy.usage_updater import UsageUpdater
19
19
  from app.modules.request_logs.repository import RequestLogsRepository
20
20
  from app.modules.usage.repository import UsageRepository
21
21
  from app.modules.usage.schemas import (
@@ -28,6 +28,7 @@ from app.modules.usage.schemas import (
28
28
  UsageWindow,
29
29
  UsageWindowResponse,
30
30
  )
31
+ from app.modules.usage.updater import UsageUpdater
31
32
 
32
33
 
33
34
  class UsageService:
@@ -137,7 +138,7 @@ def _build_account_history(
137
138
  for log in logs:
138
139
  account_id = log.account_id
139
140
  counts[account_id] = counts.get(account_id, 0) + 1
140
- cost = cost_from_log(log)
141
+ cost = cost_from_log(cast(RequestLogLike, log))
141
142
  if cost is None:
142
143
  continue
143
144
  costs[account_id] = costs.get(account_id, 0.0) + cost
@@ -166,7 +167,7 @@ def _build_account_history(
166
167
 
167
168
  def _log_to_cost_item(log: RequestLog) -> CostItem | None:
168
169
  model = log.model
169
- usage = usage_tokens_from_log(log)
170
+ usage = usage_tokens_from_log(cast(RequestLogLike, log))
170
171
  if not model or not usage:
171
172
  return None
172
173
  return CostItem(model=model, usage=usage)
@@ -191,7 +192,7 @@ def _usage_metrics(logs_secondary: list[RequestLog]) -> UsageMetricsSummary:
191
192
  def _sum_tokens(logs: list[RequestLog]) -> int:
192
193
  total = 0
193
194
  for log in logs:
194
- total += total_tokens_from_log(log) or 0
195
+ total += total_tokens_from_log(cast(RequestLogLike, log)) or 0
195
196
  return total
196
197
 
197
198
 
@@ -232,7 +233,7 @@ def _window_snapshot_to_model(snapshot: UsageWindowSnapshot) -> UsageWindow:
232
233
  def _cost_summary_to_model(cost: UsageCostSummary) -> UsageCost:
233
234
  return UsageCost(
234
235
  currency=cost.currency,
235
- total_usd_7d=cost.total_usd_7d,
236
+ totalUsd7d=cost.total_usd_7d,
236
237
  by_model=[UsageCostByModel(model=item.model, usd=item.usd) for item in cost.by_model],
237
238
  )
238
239
 
@@ -12,8 +12,8 @@ from app.core.usage.models import UsagePayload
12
12
  from app.core.utils.request_id import get_request_id
13
13
  from app.core.utils.time import utcnow
14
14
  from app.db.models import Account, AccountStatus, UsageHistory
15
+ from app.modules.accounts.auth_manager import AuthManager
15
16
  from app.modules.accounts.repository import AccountsRepository
16
- from app.modules.proxy.auth_manager import AuthManager
17
17
  from app.modules.usage.repository import UsageRepository
18
18
 
19
19
  logger = logging.getLogger(__name__)
app/static/index.js CHANGED
@@ -74,11 +74,15 @@
74
74
  error: "deactivated",
75
75
  };
76
76
 
77
- const PLAN_LABELS = {
78
- plus: "Plus",
79
- team: "Team",
80
- free: "Free",
81
- };
77
+ const KNOWN_PLAN_TYPES = new Set([
78
+ "free",
79
+ "plus",
80
+ "pro",
81
+ "team",
82
+ "business",
83
+ "enterprise",
84
+ "edu",
85
+ ]);
82
86
 
83
87
  const ROUTING_LABELS = {
84
88
  usage_weighted: "usage weighted",
@@ -92,7 +96,7 @@
92
96
  timeout: "timeout",
93
97
  upstream: "upstream",
94
98
  rate_limit_exceeded: "rate limit",
95
- usage_limit_reached: "rate limit",
99
+ usage_limit_reached: "quota",
96
100
  insufficient_quota: "quota",
97
101
  usage_not_included: "quota",
98
102
  quota_exceeded: "quota",
@@ -444,7 +448,19 @@
444
448
  REQUEST_STATUS_LABELS[status] || "Unknown";
445
449
  const requestStatusClass = (status) =>
446
450
  REQUEST_STATUS_CLASSES[status] || "deactivated";
447
- const planLabel = (plan) => PLAN_LABELS[plan] || "Unknown";
451
+ const normalizePlanType = (plan) => {
452
+ if (plan === null || plan === undefined) {
453
+ return null;
454
+ }
455
+ const value = String(plan).trim().toLowerCase();
456
+ return KNOWN_PLAN_TYPES.has(value) ? value : null;
457
+ };
458
+ const titleCase = (value) =>
459
+ value ? value.charAt(0).toUpperCase() + value.slice(1).toLowerCase() : "";
460
+ const planLabel = (plan) => {
461
+ const normalized = normalizePlanType(plan);
462
+ return normalized ? titleCase(normalized) : "Unknown";
463
+ };
448
464
  const routingLabel = (strategy) => ROUTING_LABELS[strategy] || "unknown";
449
465
  const errorLabel = (code) => ERROR_LABELS[code] || "--";
450
466
  const progressClass = (status) => PROGRESS_CLASS_BY_STATUS[status] || "";
@@ -569,16 +585,9 @@
569
585
  return acc;
570
586
  }, {});
571
587
 
572
- const mergeUsageIntoAccounts = (
573
- accounts,
574
- primaryUsage,
575
- secondaryUsage,
576
- summary,
577
- ) => {
588
+ const mergeUsageIntoAccounts = (accounts, primaryUsage, secondaryUsage) => {
578
589
  const primaryMap = buildUsageIndex(primaryUsage || []);
579
590
  const secondaryMap = buildUsageIndex(secondaryUsage || []);
580
- const resetAtPrimary = summary?.primaryWindow?.resetAt ?? null;
581
- const resetAtSecondary = summary?.secondaryWindow?.resetAt ?? null;
582
591
  return accounts.map((account) => {
583
592
  const primaryRow = primaryMap[account.id];
584
593
  const secondaryRow = secondaryMap[account.id];
@@ -598,8 +607,8 @@
598
607
  account.usage?.secondaryRemainingPercent ??
599
608
  0,
600
609
  },
601
- resetAtPrimary: resetAtPrimary ?? account.resetAtPrimary ?? null,
602
- resetAtSecondary: resetAtSecondary ?? account.resetAtSecondary ?? null,
610
+ resetAtPrimary: account.resetAtPrimary ?? null,
611
+ resetAtSecondary: account.resetAtSecondary ?? null,
603
612
  };
604
613
  });
605
614
  };
@@ -1191,7 +1200,6 @@
1191
1200
  accountsResult.value,
1192
1201
  primaryUsage,
1193
1202
  secondaryUsage,
1194
- summary,
1195
1203
  );
1196
1204
  this.applyData(
1197
1205
  {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codex-lb
3
- Version: 0.1.4
3
+ Version: 0.2.0
4
4
  Summary: Codex load balancer and proxy for ChatGPT accounts with usage dashboard
5
5
  Author-email: Soju06 <qlskssk@gmail.com>
6
6
  Maintainer-email: Soju06 <qlskssk@gmail.com>