codex-lb 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +5 -0
- app/cli.py +24 -0
- app/core/__init__.py +0 -0
- app/core/auth/__init__.py +96 -0
- app/core/auth/models.py +49 -0
- app/core/auth/refresh.py +144 -0
- app/core/balancer/__init__.py +19 -0
- app/core/balancer/logic.py +140 -0
- app/core/balancer/types.py +9 -0
- app/core/clients/__init__.py +0 -0
- app/core/clients/http.py +39 -0
- app/core/clients/oauth.py +340 -0
- app/core/clients/proxy.py +265 -0
- app/core/clients/usage.py +143 -0
- app/core/config/__init__.py +0 -0
- app/core/config/settings.py +69 -0
- app/core/crypto.py +37 -0
- app/core/errors.py +73 -0
- app/core/openai/__init__.py +0 -0
- app/core/openai/models.py +122 -0
- app/core/openai/parsing.py +55 -0
- app/core/openai/requests.py +59 -0
- app/core/types.py +4 -0
- app/core/usage/__init__.py +185 -0
- app/core/usage/logs.py +57 -0
- app/core/usage/models.py +35 -0
- app/core/usage/pricing.py +172 -0
- app/core/usage/types.py +95 -0
- app/core/utils/__init__.py +0 -0
- app/core/utils/request_id.py +30 -0
- app/core/utils/retry.py +16 -0
- app/core/utils/sse.py +13 -0
- app/core/utils/time.py +19 -0
- app/db/__init__.py +0 -0
- app/db/models.py +82 -0
- app/db/session.py +44 -0
- app/dependencies.py +123 -0
- app/main.py +124 -0
- app/modules/__init__.py +0 -0
- app/modules/accounts/__init__.py +0 -0
- app/modules/accounts/api.py +81 -0
- app/modules/accounts/repository.py +80 -0
- app/modules/accounts/schemas.py +66 -0
- app/modules/accounts/service.py +211 -0
- app/modules/health/__init__.py +0 -0
- app/modules/health/api.py +10 -0
- app/modules/oauth/__init__.py +0 -0
- app/modules/oauth/api.py +57 -0
- app/modules/oauth/schemas.py +32 -0
- app/modules/oauth/service.py +356 -0
- app/modules/oauth/templates/oauth_success.html +122 -0
- app/modules/proxy/__init__.py +0 -0
- app/modules/proxy/api.py +76 -0
- app/modules/proxy/auth_manager.py +51 -0
- app/modules/proxy/load_balancer.py +208 -0
- app/modules/proxy/schemas.py +85 -0
- app/modules/proxy/service.py +707 -0
- app/modules/proxy/types.py +37 -0
- app/modules/proxy/usage_updater.py +147 -0
- app/modules/request_logs/__init__.py +0 -0
- app/modules/request_logs/api.py +31 -0
- app/modules/request_logs/repository.py +86 -0
- app/modules/request_logs/schemas.py +25 -0
- app/modules/request_logs/service.py +77 -0
- app/modules/shared/__init__.py +0 -0
- app/modules/shared/schemas.py +8 -0
- app/modules/usage/__init__.py +0 -0
- app/modules/usage/api.py +31 -0
- app/modules/usage/repository.py +113 -0
- app/modules/usage/schemas.py +62 -0
- app/modules/usage/service.py +246 -0
- app/static/7.css +1336 -0
- app/static/index.css +543 -0
- app/static/index.html +457 -0
- app/static/index.js +1898 -0
- codex_lb-0.1.2.dist-info/METADATA +108 -0
- codex_lb-0.1.2.dist-info/RECORD +80 -0
- codex_lb-0.1.2.dist-info/WHEEL +4 -0
- codex_lb-0.1.2.dist-info/entry_points.txt +2 -0
- codex_lb-0.1.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Mapping
|
|
4
|
+
|
|
5
|
+
from app.core.usage.types import (
|
|
6
|
+
UsageCostSummary,
|
|
7
|
+
UsageHistoryPayload,
|
|
8
|
+
UsageMetricsSummary,
|
|
9
|
+
UsageSummaryPayload,
|
|
10
|
+
UsageWindowRow,
|
|
11
|
+
UsageWindowSnapshot,
|
|
12
|
+
UsageWindowSummary,
|
|
13
|
+
)
|
|
14
|
+
from app.db.models import Account
|
|
15
|
+
|
|
16
|
+
PLAN_CAPACITY_CREDITS_PRIMARY = {
|
|
17
|
+
"plus": 225.0,
|
|
18
|
+
"business": 225.0,
|
|
19
|
+
"pro": 1500.0,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
PLAN_CAPACITY_CREDITS_SECONDARY = {
|
|
23
|
+
"plus": 7560.0,
|
|
24
|
+
"business": 7560.0,
|
|
25
|
+
"pro": 50400.0,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
DEFAULT_WINDOW_MINUTES_PRIMARY = 300
|
|
29
|
+
DEFAULT_WINDOW_MINUTES_SECONDARY = 10080
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _normalize_window_key(window: str | None) -> str:
|
|
33
|
+
normalized = (window or "").lower()
|
|
34
|
+
if normalized in {"primary", "5h"}:
|
|
35
|
+
return "primary"
|
|
36
|
+
if normalized in {"secondary", "7d"}:
|
|
37
|
+
return "secondary"
|
|
38
|
+
return normalized
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _empty_cost() -> UsageCostSummary:
|
|
42
|
+
return UsageCostSummary(currency="USD", total_usd_7d=0.0, by_model=[])
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _empty_window(
|
|
46
|
+
reset_at: int | None = None,
|
|
47
|
+
window_minutes: int | None = None,
|
|
48
|
+
) -> UsageWindowSnapshot:
|
|
49
|
+
return UsageWindowSnapshot(
|
|
50
|
+
used_percent=0.0,
|
|
51
|
+
capacity_credits=0.0,
|
|
52
|
+
used_credits=0.0,
|
|
53
|
+
reset_at=reset_at,
|
|
54
|
+
window_minutes=window_minutes,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def used_credits_from_percent(used_percent: float | None, capacity_credits: float | None) -> float | None:
|
|
59
|
+
if used_percent is None or capacity_credits is None:
|
|
60
|
+
return None
|
|
61
|
+
return (capacity_credits * used_percent) / 100.0
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def remaining_percent_from_used(used_percent: float | None) -> float | None:
|
|
65
|
+
if used_percent is None:
|
|
66
|
+
return None
|
|
67
|
+
return max(0.0, 100.0 - float(used_percent))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def remaining_credits_from_used(
|
|
71
|
+
used_credits: float | None,
|
|
72
|
+
capacity_credits: float | None,
|
|
73
|
+
) -> float | None:
|
|
74
|
+
if used_credits is None or capacity_credits is None:
|
|
75
|
+
return None
|
|
76
|
+
return max(0.0, float(capacity_credits) - float(used_credits))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def remaining_credits_from_percent(
|
|
80
|
+
used_percent: float | None,
|
|
81
|
+
capacity_credits: float | None,
|
|
82
|
+
) -> float | None:
|
|
83
|
+
used_credits = used_credits_from_percent(used_percent, capacity_credits)
|
|
84
|
+
return remaining_credits_from_used(used_credits, capacity_credits)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def normalize_usage_window(summary: UsageWindowSummary) -> UsageWindowSnapshot:
|
|
88
|
+
return UsageWindowSnapshot(
|
|
89
|
+
used_percent=float(summary.used_percent or 0.0),
|
|
90
|
+
capacity_credits=float(summary.capacity_credits),
|
|
91
|
+
used_credits=float(summary.used_credits),
|
|
92
|
+
reset_at=summary.reset_at,
|
|
93
|
+
window_minutes=summary.window_minutes,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def summarize_usage_window(
|
|
98
|
+
usage_rows: Iterable[UsageWindowRow],
|
|
99
|
+
account_map: Mapping[str, Account],
|
|
100
|
+
window: str,
|
|
101
|
+
) -> UsageWindowSummary:
|
|
102
|
+
total_capacity = 0.0
|
|
103
|
+
total_used = 0.0
|
|
104
|
+
reset_candidates: list[int] = []
|
|
105
|
+
window_minutes: int | None = None
|
|
106
|
+
|
|
107
|
+
for row in usage_rows:
|
|
108
|
+
if row.reset_at is not None:
|
|
109
|
+
reset_candidates.append(row.reset_at)
|
|
110
|
+
if row.window_minutes is not None and row.window_minutes > 0:
|
|
111
|
+
if window_minutes is None or row.window_minutes > window_minutes:
|
|
112
|
+
window_minutes = row.window_minutes
|
|
113
|
+
account = account_map.get(row.account_id)
|
|
114
|
+
capacity = capacity_for_plan(account.plan_type if account else None, window)
|
|
115
|
+
if row.used_percent is None or capacity is None:
|
|
116
|
+
continue
|
|
117
|
+
total_capacity += capacity
|
|
118
|
+
total_used += (capacity * float(row.used_percent)) / 100.0
|
|
119
|
+
|
|
120
|
+
if window_minutes is None:
|
|
121
|
+
window_minutes = default_window_minutes(window)
|
|
122
|
+
|
|
123
|
+
overall = None
|
|
124
|
+
if total_capacity > 0:
|
|
125
|
+
overall = (total_used / total_capacity) * 100.0
|
|
126
|
+
reset_at_value = min(reset_candidates) if reset_candidates else None
|
|
127
|
+
return UsageWindowSummary(
|
|
128
|
+
used_percent=float(overall) if overall is not None else None,
|
|
129
|
+
capacity_credits=float(total_capacity),
|
|
130
|
+
used_credits=float(total_used),
|
|
131
|
+
reset_at=reset_at_value,
|
|
132
|
+
window_minutes=window_minutes,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def capacity_for_plan(plan_type: str | None, window: str) -> float | None:
|
|
137
|
+
if not plan_type:
|
|
138
|
+
return None
|
|
139
|
+
normalized = plan_type.lower()
|
|
140
|
+
window_key = _normalize_window_key(window)
|
|
141
|
+
if window_key == "primary":
|
|
142
|
+
return PLAN_CAPACITY_CREDITS_PRIMARY.get(normalized)
|
|
143
|
+
if window_key == "secondary":
|
|
144
|
+
return PLAN_CAPACITY_CREDITS_SECONDARY.get(normalized)
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def default_window_minutes(window: str) -> int | None:
|
|
149
|
+
window_key = _normalize_window_key(window)
|
|
150
|
+
if window_key == "primary":
|
|
151
|
+
return DEFAULT_WINDOW_MINUTES_PRIMARY
|
|
152
|
+
if window_key == "secondary":
|
|
153
|
+
return DEFAULT_WINDOW_MINUTES_SECONDARY
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def parse_usage_summary(
|
|
158
|
+
primary_window: UsageWindowSummary,
|
|
159
|
+
secondary_window: UsageWindowSummary | None,
|
|
160
|
+
cost: UsageCostSummary,
|
|
161
|
+
metrics: UsageMetricsSummary | None = None,
|
|
162
|
+
) -> UsageSummaryPayload:
|
|
163
|
+
primary = normalize_usage_window(primary_window)
|
|
164
|
+
secondary = None
|
|
165
|
+
if secondary_window is not None:
|
|
166
|
+
secondary = normalize_usage_window(secondary_window)
|
|
167
|
+
return UsageSummaryPayload(
|
|
168
|
+
primary_window=primary,
|
|
169
|
+
secondary_window=secondary,
|
|
170
|
+
cost=cost,
|
|
171
|
+
metrics=metrics,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
async def usage_summary() -> UsageSummaryPayload:
|
|
176
|
+
return UsageSummaryPayload(
|
|
177
|
+
primary_window=_empty_window(window_minutes=None),
|
|
178
|
+
secondary_window=None,
|
|
179
|
+
cost=_empty_cost(),
|
|
180
|
+
metrics=None,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
async def usage_history(hours: int) -> UsageHistoryPayload:
|
|
185
|
+
return UsageHistoryPayload(window_hours=hours, accounts=[])
|
app/core/usage/logs.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from app.core.usage.pricing import UsageTokens, calculate_cost_from_usage, get_pricing_for_model
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RequestLogLike(Protocol):
|
|
9
|
+
model: str | None
|
|
10
|
+
input_tokens: int | None
|
|
11
|
+
output_tokens: int | None
|
|
12
|
+
cached_input_tokens: int | None
|
|
13
|
+
reasoning_tokens: int | None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def usage_tokens_from_log(log: RequestLogLike) -> UsageTokens | None:
|
|
17
|
+
input_tokens = log.input_tokens
|
|
18
|
+
if input_tokens is None:
|
|
19
|
+
return None
|
|
20
|
+
output_tokens = log.output_tokens if log.output_tokens is not None else log.reasoning_tokens
|
|
21
|
+
if output_tokens is None:
|
|
22
|
+
return None
|
|
23
|
+
cached_tokens = log.cached_input_tokens or 0
|
|
24
|
+
cached_tokens = max(0, min(cached_tokens, input_tokens))
|
|
25
|
+
return UsageTokens(
|
|
26
|
+
input_tokens=float(input_tokens),
|
|
27
|
+
output_tokens=float(output_tokens),
|
|
28
|
+
cached_input_tokens=float(cached_tokens),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def cost_from_log(log: RequestLogLike, *, precision: int | None = None) -> float | None:
|
|
33
|
+
if not log.model:
|
|
34
|
+
return None
|
|
35
|
+
usage = usage_tokens_from_log(log)
|
|
36
|
+
if not usage:
|
|
37
|
+
return None
|
|
38
|
+
resolved = get_pricing_for_model(log.model, None, None)
|
|
39
|
+
if not resolved:
|
|
40
|
+
return None
|
|
41
|
+
_, price = resolved
|
|
42
|
+
cost = calculate_cost_from_usage(usage, price)
|
|
43
|
+
if cost is None:
|
|
44
|
+
return None
|
|
45
|
+
if precision is None:
|
|
46
|
+
return cost
|
|
47
|
+
return round(cost, precision)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def total_tokens_from_log(log: RequestLogLike) -> int | None:
|
|
51
|
+
input_tokens = log.input_tokens
|
|
52
|
+
output_tokens = log.output_tokens
|
|
53
|
+
if output_tokens is None and log.reasoning_tokens is not None:
|
|
54
|
+
output_tokens = log.reasoning_tokens
|
|
55
|
+
if input_tokens is None and output_tokens is None:
|
|
56
|
+
return None
|
|
57
|
+
return (input_tokens or 0) + (output_tokens or 0)
|
app/core/usage/models.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UsageWindow(BaseModel):
|
|
7
|
+
model_config = ConfigDict(extra="ignore")
|
|
8
|
+
|
|
9
|
+
used_percent: float | None = None
|
|
10
|
+
reset_at: int | None = None
|
|
11
|
+
limit_window_seconds: int | None = None
|
|
12
|
+
reset_after_seconds: int | None = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RateLimitPayload(BaseModel):
|
|
16
|
+
model_config = ConfigDict(extra="ignore")
|
|
17
|
+
|
|
18
|
+
primary_window: UsageWindow | None = None
|
|
19
|
+
secondary_window: UsageWindow | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CreditsPayload(BaseModel):
|
|
23
|
+
model_config = ConfigDict(extra="ignore")
|
|
24
|
+
|
|
25
|
+
has_credits: bool | None = None
|
|
26
|
+
unlimited: bool | None = None
|
|
27
|
+
balance: str | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class UsagePayload(BaseModel):
|
|
31
|
+
model_config = ConfigDict(extra="ignore")
|
|
32
|
+
|
|
33
|
+
plan_type: str | None = None
|
|
34
|
+
rate_limit: RateLimitPayload | None = None
|
|
35
|
+
credits: CreditsPayload | None = None
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from fnmatch import fnmatchcase
|
|
6
|
+
from typing import Iterable, Mapping
|
|
7
|
+
|
|
8
|
+
from app.core.openai.models import ResponseUsage
|
|
9
|
+
from app.core.usage.types import UsageCostByModel, UsageCostSummary
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class ModelPrice:
|
|
14
|
+
input_per_1m: float
|
|
15
|
+
output_per_1m: float
|
|
16
|
+
cached_input_per_1m: float | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class UsageTokens:
|
|
21
|
+
input_tokens: float
|
|
22
|
+
output_tokens: float
|
|
23
|
+
cached_input_tokens: float = 0.0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class CostItem:
|
|
28
|
+
model: str
|
|
29
|
+
usage: UsageTokens
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _as_number(value: object) -> float | None:
|
|
33
|
+
if isinstance(value, (int, float)):
|
|
34
|
+
return float(value)
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _normalize_usage(usage: UsageTokens | ResponseUsage | None) -> UsageTokens | None:
|
|
39
|
+
if isinstance(usage, UsageTokens):
|
|
40
|
+
return usage
|
|
41
|
+
if not usage:
|
|
42
|
+
return None
|
|
43
|
+
input_tokens = _as_number(usage.input_tokens)
|
|
44
|
+
output_tokens = _as_number(usage.output_tokens)
|
|
45
|
+
if output_tokens is None and usage.output_tokens_details is not None:
|
|
46
|
+
output_tokens = _as_number(usage.output_tokens_details.reasoning_tokens)
|
|
47
|
+
if input_tokens is None or output_tokens is None:
|
|
48
|
+
return None
|
|
49
|
+
cached_tokens = 0.0
|
|
50
|
+
if usage.input_tokens_details is not None:
|
|
51
|
+
cached_tokens = _as_number(usage.input_tokens_details.cached_tokens) or 0.0
|
|
52
|
+
cached_tokens = max(0.0, min(cached_tokens, input_tokens))
|
|
53
|
+
return UsageTokens(
|
|
54
|
+
input_tokens=input_tokens,
|
|
55
|
+
output_tokens=output_tokens,
|
|
56
|
+
cached_input_tokens=cached_tokens,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
DEFAULT_PRICING_MODELS: dict[str, ModelPrice] = {
|
|
61
|
+
"gpt-5.2": ModelPrice(input_per_1m=1.75, cached_input_per_1m=0.175, output_per_1m=14.0),
|
|
62
|
+
"gpt-5.1": ModelPrice(input_per_1m=1.25, cached_input_per_1m=0.125, output_per_1m=10.0),
|
|
63
|
+
"gpt-5": ModelPrice(input_per_1m=1.25, cached_input_per_1m=0.125, output_per_1m=10.0),
|
|
64
|
+
"gpt-5.1-codex-max": ModelPrice(
|
|
65
|
+
input_per_1m=1.25,
|
|
66
|
+
cached_input_per_1m=0.125,
|
|
67
|
+
output_per_1m=10.0,
|
|
68
|
+
),
|
|
69
|
+
"gpt-5.1-codex-mini": ModelPrice(
|
|
70
|
+
input_per_1m=0.25,
|
|
71
|
+
cached_input_per_1m=0.025,
|
|
72
|
+
output_per_1m=2.0,
|
|
73
|
+
),
|
|
74
|
+
"gpt-5.1-codex": ModelPrice(input_per_1m=1.25, cached_input_per_1m=0.125, output_per_1m=10.0),
|
|
75
|
+
"gpt-5-codex": ModelPrice(input_per_1m=1.25, cached_input_per_1m=0.125, output_per_1m=10.0),
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
DEFAULT_MODEL_ALIASES: dict[str, str] = {
|
|
79
|
+
"gpt-5.2*": "gpt-5.2",
|
|
80
|
+
"gpt-5.1*": "gpt-5.1",
|
|
81
|
+
"gpt-5*": "gpt-5",
|
|
82
|
+
"gpt-5.1-codex-max*": "gpt-5.1-codex-max",
|
|
83
|
+
"gpt-5.1-codex-mini*": "gpt-5.1-codex-mini",
|
|
84
|
+
"gpt-5.1-codex*": "gpt-5.1-codex",
|
|
85
|
+
"gpt-5-codex*": "gpt-5-codex",
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def resolve_model_alias(model: str, aliases: Mapping[str, str]) -> str | None:
|
|
90
|
+
if not model:
|
|
91
|
+
return None
|
|
92
|
+
normalized = model.lower()
|
|
93
|
+
matched: list[tuple[int, str]] = []
|
|
94
|
+
for pattern, target in aliases.items():
|
|
95
|
+
if fnmatchcase(normalized, pattern.lower()):
|
|
96
|
+
matched.append((len(pattern), target))
|
|
97
|
+
if not matched:
|
|
98
|
+
return None
|
|
99
|
+
return max(matched, key=lambda item: item[0])[1]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_pricing_for_model(
|
|
103
|
+
model: str,
|
|
104
|
+
pricing: Mapping[str, ModelPrice] | None = None,
|
|
105
|
+
aliases: Mapping[str, str] | None = None,
|
|
106
|
+
) -> tuple[str, ModelPrice] | None:
|
|
107
|
+
if not model:
|
|
108
|
+
return None
|
|
109
|
+
pricing = pricing or DEFAULT_PRICING_MODELS
|
|
110
|
+
aliases = aliases or DEFAULT_MODEL_ALIASES
|
|
111
|
+
|
|
112
|
+
normalized = model.lower()
|
|
113
|
+
for key, value in pricing.items():
|
|
114
|
+
if key.lower() == normalized:
|
|
115
|
+
return key, value
|
|
116
|
+
|
|
117
|
+
alias = resolve_model_alias(normalized, aliases)
|
|
118
|
+
if not alias:
|
|
119
|
+
return None
|
|
120
|
+
for key, value in pricing.items():
|
|
121
|
+
if key.lower() == alias.lower():
|
|
122
|
+
return key, value
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def calculate_cost_from_usage(usage: UsageTokens | ResponseUsage | None, price: ModelPrice) -> float | None:
|
|
127
|
+
normalized = _normalize_usage(usage)
|
|
128
|
+
if not normalized:
|
|
129
|
+
return None
|
|
130
|
+
billable_input = normalized.input_tokens - normalized.cached_input_tokens
|
|
131
|
+
|
|
132
|
+
input_rate = price.input_per_1m
|
|
133
|
+
cached_rate = price.cached_input_per_1m if price.cached_input_per_1m is not None else input_rate
|
|
134
|
+
output_rate = price.output_per_1m
|
|
135
|
+
|
|
136
|
+
return (
|
|
137
|
+
(billable_input / 1_000_000) * input_rate
|
|
138
|
+
+ (normalized.cached_input_tokens / 1_000_000) * cached_rate
|
|
139
|
+
+ (normalized.output_tokens / 1_000_000) * output_rate
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def calculate_costs(
|
|
144
|
+
items: Iterable[CostItem],
|
|
145
|
+
pricing: Mapping[str, ModelPrice] | None = None,
|
|
146
|
+
aliases: Mapping[str, str] | None = None,
|
|
147
|
+
) -> UsageCostSummary:
|
|
148
|
+
pricing = pricing or DEFAULT_PRICING_MODELS
|
|
149
|
+
aliases = aliases or DEFAULT_MODEL_ALIASES
|
|
150
|
+
|
|
151
|
+
totals: dict[str, float] = defaultdict(float)
|
|
152
|
+
total_usd = 0.0
|
|
153
|
+
|
|
154
|
+
for item in items:
|
|
155
|
+
model = item.model
|
|
156
|
+
usage = item.usage
|
|
157
|
+
resolved = get_pricing_for_model(model, pricing, aliases)
|
|
158
|
+
if not resolved:
|
|
159
|
+
continue
|
|
160
|
+
canonical, price = resolved
|
|
161
|
+
cost = calculate_cost_from_usage(usage, price)
|
|
162
|
+
if cost is None:
|
|
163
|
+
continue
|
|
164
|
+
totals[canonical] += cost
|
|
165
|
+
total_usd += cost
|
|
166
|
+
|
|
167
|
+
by_model = [UsageCostByModel(model=model, usd=round(value, 6)) for model, value in sorted(totals.items())]
|
|
168
|
+
return UsageCostSummary(
|
|
169
|
+
currency="USD",
|
|
170
|
+
total_usd_7d=round(total_usd, 6),
|
|
171
|
+
by_model=by_model,
|
|
172
|
+
)
|
app/core/usage/types.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class UsageWindowRow:
|
|
9
|
+
account_id: str
|
|
10
|
+
used_percent: float | None
|
|
11
|
+
reset_at: int | None = None
|
|
12
|
+
window_minutes: int | None = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class UsageAggregateRow:
|
|
17
|
+
account_id: str
|
|
18
|
+
used_percent_avg: float | None
|
|
19
|
+
input_tokens_sum: int | None
|
|
20
|
+
output_tokens_sum: int | None
|
|
21
|
+
samples: int
|
|
22
|
+
last_recorded_at: datetime | None
|
|
23
|
+
reset_at_max: int | None
|
|
24
|
+
window_minutes_max: int | None
|
|
25
|
+
|
|
26
|
+
def to_window_row(self) -> UsageWindowRow:
|
|
27
|
+
return UsageWindowRow(
|
|
28
|
+
account_id=self.account_id,
|
|
29
|
+
used_percent=self.used_percent_avg,
|
|
30
|
+
reset_at=self.reset_at_max,
|
|
31
|
+
window_minutes=self.window_minutes_max,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class UsageWindowSummary:
|
|
37
|
+
used_percent: float | None
|
|
38
|
+
capacity_credits: float
|
|
39
|
+
used_credits: float
|
|
40
|
+
reset_at: int | None
|
|
41
|
+
window_minutes: int | None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class UsageWindowSnapshot:
|
|
46
|
+
used_percent: float
|
|
47
|
+
capacity_credits: float
|
|
48
|
+
used_credits: float
|
|
49
|
+
reset_at: int | None
|
|
50
|
+
window_minutes: int | None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class UsageCostByModel:
|
|
55
|
+
model: str
|
|
56
|
+
usd: float
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass(frozen=True)
|
|
60
|
+
class UsageCostSummary:
|
|
61
|
+
currency: str
|
|
62
|
+
total_usd_7d: float
|
|
63
|
+
by_model: list[UsageCostByModel]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class UsageMetricsSummary:
|
|
68
|
+
requests_7d: int | None
|
|
69
|
+
tokens_secondary_window: int | None
|
|
70
|
+
error_rate_7d: float | None
|
|
71
|
+
top_error: str | None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass(frozen=True)
|
|
75
|
+
class UsageSummaryPayload:
|
|
76
|
+
primary_window: UsageWindowSnapshot
|
|
77
|
+
secondary_window: UsageWindowSnapshot | None
|
|
78
|
+
cost: UsageCostSummary
|
|
79
|
+
metrics: UsageMetricsSummary | None = None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass(frozen=True)
|
|
83
|
+
class UsageHistoryEntry:
|
|
84
|
+
account_id: str
|
|
85
|
+
email: str
|
|
86
|
+
used_percent_avg: float
|
|
87
|
+
used_credits: float
|
|
88
|
+
request_count: int
|
|
89
|
+
cost_usd: float
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass(frozen=True)
|
|
93
|
+
class UsageHistoryPayload:
|
|
94
|
+
window_hours: int
|
|
95
|
+
accounts: list[UsageHistoryEntry]
|
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextvars import ContextVar, Token
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
_REQUEST_ID: ContextVar[str | None] = ContextVar("request_id", default=None)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_request_id() -> str | None:
|
|
10
|
+
return _REQUEST_ID.get()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def set_request_id(value: str | None) -> Token[str | None]:
|
|
14
|
+
return _REQUEST_ID.set(value)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def reset_request_id(token: Token[str | None]) -> None:
|
|
18
|
+
_REQUEST_ID.reset(token)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def ensure_request_id(value: str | None = None) -> str:
|
|
22
|
+
if value:
|
|
23
|
+
_REQUEST_ID.set(value)
|
|
24
|
+
return value
|
|
25
|
+
current = _REQUEST_ID.get()
|
|
26
|
+
if current:
|
|
27
|
+
return current
|
|
28
|
+
generated = str(uuid4())
|
|
29
|
+
_REQUEST_ID.set(generated)
|
|
30
|
+
return generated
|
app/core/utils/retry.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
_RETRY_PATTERN = re.compile(r"(?i)try again in\s*(\d+(?:\.\d+)?)\s*(s|ms|seconds?)")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_retry_after(message: str) -> float | None:
|
|
9
|
+
match = _RETRY_PATTERN.search(message or "")
|
|
10
|
+
if not match:
|
|
11
|
+
return None
|
|
12
|
+
value = float(match.group(1))
|
|
13
|
+
unit = match.group(2).lower()
|
|
14
|
+
if unit == "ms":
|
|
15
|
+
return value / 1000
|
|
16
|
+
return value
|
app/core/utils/sse.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from app.core.types import JsonObject
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def format_sse_event(payload: JsonObject) -> str:
|
|
9
|
+
data = json.dumps(payload, ensure_ascii=True, separators=(",", ":"))
|
|
10
|
+
event_type = payload.get("type")
|
|
11
|
+
if isinstance(event_type, str) and event_type:
|
|
12
|
+
return f"event: {event_type}\ndata: {data}\n\n"
|
|
13
|
+
return f"data: {data}\n\n"
|
app/core/utils/time.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def utcnow() -> datetime:
|
|
7
|
+
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def to_utc_naive(value: datetime) -> datetime:
|
|
11
|
+
if value.tzinfo is None:
|
|
12
|
+
return value
|
|
13
|
+
return value.astimezone(timezone.utc).replace(tzinfo=None)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def from_epoch_seconds(value: int | float | None) -> datetime | None:
|
|
17
|
+
if value is None:
|
|
18
|
+
return None
|
|
19
|
+
return datetime.fromtimestamp(value, tz=timezone.utc)
|
app/db/__init__.py
ADDED
|
File without changes
|