coding-proxy 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coding/proxy/config/config.default.yaml +2 -2
- coding/proxy/logging/db.py +1 -1
- coding/proxy/routing/executor.py +47 -3
- coding/proxy/routing/quota_guard.py +40 -8
- coding/proxy/routing/router.py +8 -2
- coding/proxy/server/dashboard.py +1445 -0
- coding/proxy/server/request_normalizer.py +242 -30
- coding/proxy/server/routes.py +14 -4
- {coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/METADATA +5 -1
- {coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/RECORD +13 -12
- {coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/WHEEL +0 -0
- {coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/entry_points.txt +0 -0
- {coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -52,13 +52,13 @@ vendors:
|
|
|
52
52
|
success_threshold: 2
|
|
53
53
|
quota_guard:
|
|
54
54
|
enabled: true
|
|
55
|
-
token_budget:
|
|
55
|
+
token_budget: 50000000 # 5 小时 token 预算(根据订阅计划调整)
|
|
56
56
|
window_hours: 5.0
|
|
57
57
|
threshold_percent: 99.0
|
|
58
58
|
probe_interval_seconds: 300
|
|
59
59
|
weekly_quota_guard:
|
|
60
60
|
enabled: true
|
|
61
|
-
token_budget:
|
|
61
|
+
token_budget: 800000000 # 一周 token 预算(根据订阅计划调整)
|
|
62
62
|
window_hours: 168.0 # 7 天滑动窗口
|
|
63
63
|
threshold_percent: 99.0
|
|
64
64
|
probe_interval_seconds: 1800 # 每 30 分钟探测一次
|
coding/proxy/logging/db.py
CHANGED
|
@@ -497,7 +497,7 @@ class TokenLogger:
|
|
|
497
497
|
return 0
|
|
498
498
|
cutoff_iso = _hours_ago_utc_iso(window_hours)
|
|
499
499
|
cursor = await self._db.execute(
|
|
500
|
-
"""SELECT COALESCE(SUM(input_tokens + output_tokens), 0) AS total
|
|
500
|
+
"""SELECT COALESCE(SUM(input_tokens + output_tokens + cache_creation_tokens + cache_read_tokens), 0) AS total
|
|
501
501
|
FROM usage_log
|
|
502
502
|
WHERE vendor = ? AND success = 1
|
|
503
503
|
AND ts >= ?""",
|
coding/proxy/routing/executor.py
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
import copy
|
|
9
10
|
import logging
|
|
10
11
|
import time
|
|
11
12
|
from collections.abc import AsyncIterator
|
|
@@ -222,10 +223,43 @@ class _RouteExecutor:
|
|
|
222
223
|
|
|
223
224
|
# ── 公开执行入口 ──────────────────────────────────────
|
|
224
225
|
|
|
226
|
+
def _prepare_body_for_tier(
|
|
227
|
+
self,
|
|
228
|
+
body: dict[str, Any],
|
|
229
|
+
tier: VendorTier,
|
|
230
|
+
normalization: Any = None,
|
|
231
|
+
) -> dict[str, Any]:
|
|
232
|
+
"""为指定 tier 准备请求体,必要时应用 Anthropic 专属修复(Phase 2).
|
|
233
|
+
|
|
234
|
+
仅当 tier 为 Anthropic 且 NormalizationResult 标记需要修复时,
|
|
235
|
+
才执行 deep copy + Phase 2 修复,确保 Zhipu 等其他 vendor 不受影响。
|
|
236
|
+
"""
|
|
237
|
+
if normalization is None or not normalization.has_anthropic_fixes:
|
|
238
|
+
return body
|
|
239
|
+
if tier.name != "anthropic":
|
|
240
|
+
return body
|
|
241
|
+
|
|
242
|
+
from ..server.request_normalizer import apply_anthropic_specific_fixes
|
|
243
|
+
|
|
244
|
+
body_for_vendor = copy.deepcopy(body)
|
|
245
|
+
fixes = apply_anthropic_specific_fixes(
|
|
246
|
+
body_for_vendor.get("messages", []),
|
|
247
|
+
normalization.misplaced_tool_results,
|
|
248
|
+
normalization.misplaced_log_info,
|
|
249
|
+
)
|
|
250
|
+
if fixes:
|
|
251
|
+
logger.debug(
|
|
252
|
+
"Applied Anthropic-specific fixes for tier %s: %s",
|
|
253
|
+
tier.name,
|
|
254
|
+
", ".join(fixes),
|
|
255
|
+
)
|
|
256
|
+
return body_for_vendor
|
|
257
|
+
|
|
225
258
|
async def execute_stream(
|
|
226
259
|
self,
|
|
227
260
|
body: dict[str, Any],
|
|
228
261
|
headers: dict[str, str],
|
|
262
|
+
normalization: Any = None,
|
|
229
263
|
) -> AsyncIterator[tuple[bytes, str]]:
|
|
230
264
|
"""路由流式请求,按优先级尝试各层级."""
|
|
231
265
|
last_idx = len(self._tiers) - 1
|
|
@@ -257,7 +291,10 @@ class _RouteExecutor:
|
|
|
257
291
|
usage: dict[str, Any] = {}
|
|
258
292
|
|
|
259
293
|
try:
|
|
260
|
-
|
|
294
|
+
body_for_tier = self._prepare_body_for_tier(body, tier, normalization)
|
|
295
|
+
async for chunk in tier.vendor.send_message_stream(
|
|
296
|
+
body_for_tier, headers
|
|
297
|
+
):
|
|
261
298
|
parse_usage_from_chunk(
|
|
262
299
|
chunk,
|
|
263
300
|
usage,
|
|
@@ -276,7 +313,12 @@ class _RouteExecutor:
|
|
|
276
313
|
tier.name,
|
|
277
314
|
usage,
|
|
278
315
|
)
|
|
279
|
-
tier.record_success(
|
|
316
|
+
tier.record_success(
|
|
317
|
+
info.input_tokens
|
|
318
|
+
+ info.output_tokens
|
|
319
|
+
+ info.cache_creation_tokens
|
|
320
|
+
+ info.cache_read_tokens
|
|
321
|
+
)
|
|
280
322
|
duration = int((time.monotonic() - start) * 1000)
|
|
281
323
|
model = body.get("model", "unknown")
|
|
282
324
|
model_served = usage.get("model_served") or tier.vendor.map_model(model)
|
|
@@ -384,6 +426,7 @@ class _RouteExecutor:
|
|
|
384
426
|
self,
|
|
385
427
|
body: dict[str, Any],
|
|
386
428
|
headers: dict[str, str],
|
|
429
|
+
normalization: Any = None,
|
|
387
430
|
) -> VendorResponse:
|
|
388
431
|
"""路由非流式请求,按优先级尝试各层级."""
|
|
389
432
|
last_idx = len(self._tiers) - 1
|
|
@@ -412,7 +455,8 @@ class _RouteExecutor:
|
|
|
412
455
|
continue
|
|
413
456
|
|
|
414
457
|
try:
|
|
415
|
-
|
|
458
|
+
body_for_tier = self._prepare_body_for_tier(body, tier, normalization)
|
|
459
|
+
resp = await tier.vendor.send_message(body_for_tier, headers)
|
|
416
460
|
|
|
417
461
|
if resp.status_code < 400:
|
|
418
462
|
duration = int((time.monotonic() - start) * 1000)
|
|
@@ -56,6 +56,18 @@ class QuotaGuard:
|
|
|
56
56
|
"""滑动窗口小时数(供基线加载使用)."""
|
|
57
57
|
return self._window / 3600
|
|
58
58
|
|
|
59
|
+
@property
|
|
60
|
+
def _window_label(self) -> str:
|
|
61
|
+
"""人类可读的窗口周期短标签."""
|
|
62
|
+
w = self._window
|
|
63
|
+
if w >= 86400 and w % 86400 == 0:
|
|
64
|
+
return f"{w // 86400}d"
|
|
65
|
+
if w >= 3600 and w % 3600 == 0:
|
|
66
|
+
return f"{w // 3600}h"
|
|
67
|
+
if w >= 60 and w % 60 == 0:
|
|
68
|
+
return f"{w // 60}m"
|
|
69
|
+
return f"{w}s"
|
|
70
|
+
|
|
59
71
|
def can_use_primary(self) -> bool:
|
|
60
72
|
"""判断是否可以使用主后端."""
|
|
61
73
|
if not self._enabled:
|
|
@@ -68,7 +80,8 @@ class QuotaGuard:
|
|
|
68
80
|
):
|
|
69
81
|
self._transition_to(QuotaState.QUOTA_EXCEEDED)
|
|
70
82
|
logger.warning(
|
|
71
|
-
"Quota guard: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
|
|
83
|
+
"Quota guard [%s]: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
|
|
84
|
+
self._window_label,
|
|
72
85
|
self._total / self._budget * 100,
|
|
73
86
|
)
|
|
74
87
|
return False
|
|
@@ -80,12 +93,18 @@ class QuotaGuard:
|
|
|
80
93
|
and self._total < int(self._budget * self._threshold)
|
|
81
94
|
):
|
|
82
95
|
self._transition_to(QuotaState.WITHIN_QUOTA)
|
|
83
|
-
logger.info(
|
|
96
|
+
logger.info(
|
|
97
|
+
"Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (usage dropped)",
|
|
98
|
+
self._window_label,
|
|
99
|
+
)
|
|
84
100
|
return True
|
|
85
101
|
now = time.monotonic()
|
|
86
102
|
if now - self._last_probe >= self._effective_probe_interval:
|
|
87
103
|
self._last_probe = now
|
|
88
|
-
logger.info(
|
|
104
|
+
logger.info(
|
|
105
|
+
"Quota guard [%s]: allowing probe request",
|
|
106
|
+
self._window_label,
|
|
107
|
+
)
|
|
89
108
|
return True
|
|
90
109
|
return False
|
|
91
110
|
|
|
@@ -104,7 +123,10 @@ class QuotaGuard:
|
|
|
104
123
|
with self._lock:
|
|
105
124
|
if self._state == QuotaState.QUOTA_EXCEEDED:
|
|
106
125
|
self._transition_to(QuotaState.WITHIN_QUOTA)
|
|
107
|
-
logger.info(
|
|
126
|
+
logger.info(
|
|
127
|
+
"Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (probe success)",
|
|
128
|
+
self._window_label,
|
|
129
|
+
)
|
|
108
130
|
|
|
109
131
|
def notify_cap_error(self, retry_after_seconds: float | None = None) -> None:
|
|
110
132
|
"""外部通知检测到用量上限错误.
|
|
@@ -125,7 +147,8 @@ class QuotaGuard:
|
|
|
125
147
|
)
|
|
126
148
|
self._cap_error_active = True
|
|
127
149
|
logger.warning(
|
|
128
|
-
"Quota guard: cap error detected → EXCEEDED (effective_probe=%ds)",
|
|
150
|
+
"Quota guard [%s]: cap error detected → EXCEEDED (effective_probe=%ds)",
|
|
151
|
+
self._window_label,
|
|
129
152
|
int(self._effective_probe_interval),
|
|
130
153
|
)
|
|
131
154
|
|
|
@@ -139,12 +162,17 @@ class QuotaGuard:
|
|
|
139
162
|
self._total += total_tokens
|
|
140
163
|
if vendor:
|
|
141
164
|
logger.info(
|
|
142
|
-
"Quota guard [%s]: loaded baseline %d tokens",
|
|
165
|
+
"Quota guard [%s/%s]: loaded baseline %d tokens",
|
|
143
166
|
vendor,
|
|
167
|
+
self._window_label,
|
|
144
168
|
total_tokens,
|
|
145
169
|
)
|
|
146
170
|
else:
|
|
147
|
-
logger.info(
|
|
171
|
+
logger.info(
|
|
172
|
+
"Quota guard [%s]: loaded baseline %d tokens",
|
|
173
|
+
self._window_label,
|
|
174
|
+
total_tokens,
|
|
175
|
+
)
|
|
148
176
|
|
|
149
177
|
def reset(self) -> None:
|
|
150
178
|
"""手动重置为 WITHIN_QUOTA 状态."""
|
|
@@ -152,7 +180,10 @@ class QuotaGuard:
|
|
|
152
180
|
self._transition_to(QuotaState.WITHIN_QUOTA)
|
|
153
181
|
self._entries.clear()
|
|
154
182
|
self._total = 0
|
|
155
|
-
logger.info(
|
|
183
|
+
logger.info(
|
|
184
|
+
"Quota guard [%s]: manually reset to WITHIN_QUOTA",
|
|
185
|
+
self._window_label,
|
|
186
|
+
)
|
|
156
187
|
|
|
157
188
|
def get_info(self) -> dict:
|
|
158
189
|
"""获取配额守卫状态信息."""
|
|
@@ -166,6 +197,7 @@ class QuotaGuard:
|
|
|
166
197
|
if self._budget > 0
|
|
167
198
|
else 0,
|
|
168
199
|
"threshold_percent": self._threshold * 100,
|
|
200
|
+
"window_hours": self.window_hours,
|
|
169
201
|
}
|
|
170
202
|
|
|
171
203
|
def _expire(self) -> None:
|
coding/proxy/routing/router.py
CHANGED
|
@@ -134,18 +134,24 @@ class RequestRouter:
|
|
|
134
134
|
self,
|
|
135
135
|
body: dict[str, Any],
|
|
136
136
|
headers: dict[str, str],
|
|
137
|
+
normalization: Any = None,
|
|
137
138
|
) -> AsyncIterator[tuple[bytes, str]]:
|
|
138
139
|
"""路由流式请求,按优先级尝试各层级."""
|
|
139
|
-
async for chunk, vendor_name in self._executor.execute_stream(
|
|
140
|
+
async for chunk, vendor_name in self._executor.execute_stream(
|
|
141
|
+
body, headers, normalization=normalization
|
|
142
|
+
):
|
|
140
143
|
yield chunk, vendor_name
|
|
141
144
|
|
|
142
145
|
async def route_message(
|
|
143
146
|
self,
|
|
144
147
|
body: dict[str, Any],
|
|
145
148
|
headers: dict[str, str],
|
|
149
|
+
normalization: Any = None,
|
|
146
150
|
) -> Any:
|
|
147
151
|
"""路由非流式请求,按优先级尝试各层级."""
|
|
148
|
-
return await self._executor.execute_message(
|
|
152
|
+
return await self._executor.execute_message(
|
|
153
|
+
body, headers, normalization=normalization
|
|
154
|
+
)
|
|
149
155
|
|
|
150
156
|
# ── 生命周期 ───────────────────────────────────────────
|
|
151
157
|
|