coding-proxy 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,13 +52,13 @@ vendors:
52
52
  success_threshold: 2
53
53
  quota_guard:
54
54
  enabled: true
55
- token_budget: 45000000 # 5 小时 token 预算(根据订阅计划调整)
55
+ token_budget: 50000000 # 5 小时 token 预算(根据订阅计划调整)
56
56
  window_hours: 5.0
57
57
  threshold_percent: 99.0
58
58
  probe_interval_seconds: 300
59
59
  weekly_quota_guard:
60
60
  enabled: true
61
- token_budget: 250000000 # 一周 token 预算(根据订阅计划调整)
61
+ token_budget: 800000000 # 一周 token 预算(根据订阅计划调整)
62
62
  window_hours: 168.0 # 7 天滑动窗口
63
63
  threshold_percent: 99.0
64
64
  probe_interval_seconds: 1800 # 每 30 分钟探测一次
@@ -497,7 +497,7 @@ class TokenLogger:
497
497
  return 0
498
498
  cutoff_iso = _hours_ago_utc_iso(window_hours)
499
499
  cursor = await self._db.execute(
500
- """SELECT COALESCE(SUM(input_tokens + output_tokens), 0) AS total
500
+ """SELECT COALESCE(SUM(input_tokens + output_tokens + cache_creation_tokens + cache_read_tokens), 0) AS total
501
501
  FROM usage_log
502
502
  WHERE vendor = ? AND success = 1
503
503
  AND ts >= ?""",
@@ -6,6 +6,7 @@
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ import copy
9
10
  import logging
10
11
  import time
11
12
  from collections.abc import AsyncIterator
@@ -222,10 +223,43 @@ class _RouteExecutor:
222
223
 
223
224
  # ── 公开执行入口 ──────────────────────────────────────
224
225
 
226
+ def _prepare_body_for_tier(
227
+ self,
228
+ body: dict[str, Any],
229
+ tier: VendorTier,
230
+ normalization: Any = None,
231
+ ) -> dict[str, Any]:
232
+ """为指定 tier 准备请求体,必要时应用 Anthropic 专属修复(Phase 2).
233
+
234
+ 仅当 tier 为 Anthropic 且 NormalizationResult 标记需要修复时,
235
+ 才执行 deep copy + Phase 2 修复,确保 Zhipu 等其他 vendor 不受影响。
236
+ """
237
+ if normalization is None or not normalization.has_anthropic_fixes:
238
+ return body
239
+ if tier.name != "anthropic":
240
+ return body
241
+
242
+ from ..server.request_normalizer import apply_anthropic_specific_fixes
243
+
244
+ body_for_vendor = copy.deepcopy(body)
245
+ fixes = apply_anthropic_specific_fixes(
246
+ body_for_vendor.get("messages", []),
247
+ normalization.misplaced_tool_results,
248
+ normalization.misplaced_log_info,
249
+ )
250
+ if fixes:
251
+ logger.debug(
252
+ "Applied Anthropic-specific fixes for tier %s: %s",
253
+ tier.name,
254
+ ", ".join(fixes),
255
+ )
256
+ return body_for_vendor
257
+
225
258
  async def execute_stream(
226
259
  self,
227
260
  body: dict[str, Any],
228
261
  headers: dict[str, str],
262
+ normalization: Any = None,
229
263
  ) -> AsyncIterator[tuple[bytes, str]]:
230
264
  """路由流式请求,按优先级尝试各层级."""
231
265
  last_idx = len(self._tiers) - 1
@@ -257,7 +291,10 @@ class _RouteExecutor:
257
291
  usage: dict[str, Any] = {}
258
292
 
259
293
  try:
260
- async for chunk in tier.vendor.send_message_stream(body, headers):
294
+ body_for_tier = self._prepare_body_for_tier(body, tier, normalization)
295
+ async for chunk in tier.vendor.send_message_stream(
296
+ body_for_tier, headers
297
+ ):
261
298
  parse_usage_from_chunk(
262
299
  chunk,
263
300
  usage,
@@ -276,7 +313,12 @@ class _RouteExecutor:
276
313
  tier.name,
277
314
  usage,
278
315
  )
279
- tier.record_success(info.input_tokens + info.output_tokens)
316
+ tier.record_success(
317
+ info.input_tokens
318
+ + info.output_tokens
319
+ + info.cache_creation_tokens
320
+ + info.cache_read_tokens
321
+ )
280
322
  duration = int((time.monotonic() - start) * 1000)
281
323
  model = body.get("model", "unknown")
282
324
  model_served = usage.get("model_served") or tier.vendor.map_model(model)
@@ -384,6 +426,7 @@ class _RouteExecutor:
384
426
  self,
385
427
  body: dict[str, Any],
386
428
  headers: dict[str, str],
429
+ normalization: Any = None,
387
430
  ) -> VendorResponse:
388
431
  """路由非流式请求,按优先级尝试各层级."""
389
432
  last_idx = len(self._tiers) - 1
@@ -412,7 +455,8 @@ class _RouteExecutor:
412
455
  continue
413
456
 
414
457
  try:
415
- resp = await tier.vendor.send_message(body, headers)
458
+ body_for_tier = self._prepare_body_for_tier(body, tier, normalization)
459
+ resp = await tier.vendor.send_message(body_for_tier, headers)
416
460
 
417
461
  if resp.status_code < 400:
418
462
  duration = int((time.monotonic() - start) * 1000)
@@ -56,6 +56,18 @@ class QuotaGuard:
56
56
  """滑动窗口小时数(供基线加载使用)."""
57
57
  return self._window / 3600
58
58
 
59
+ @property
60
+ def _window_label(self) -> str:
61
+ """人类可读的窗口周期短标签."""
62
+ w = self._window
63
+ if w >= 86400 and w % 86400 == 0:
64
+ return f"{w // 86400}d"
65
+ if w >= 3600 and w % 3600 == 0:
66
+ return f"{w // 3600}h"
67
+ if w >= 60 and w % 60 == 0:
68
+ return f"{w // 60}m"
69
+ return f"{w}s"
70
+
59
71
  def can_use_primary(self) -> bool:
60
72
  """判断是否可以使用主后端."""
61
73
  if not self._enabled:
@@ -68,7 +80,8 @@ class QuotaGuard:
68
80
  ):
69
81
  self._transition_to(QuotaState.QUOTA_EXCEEDED)
70
82
  logger.warning(
71
- "Quota guard: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
83
+ "Quota guard [%s]: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
84
+ self._window_label,
72
85
  self._total / self._budget * 100,
73
86
  )
74
87
  return False
@@ -80,12 +93,18 @@ class QuotaGuard:
80
93
  and self._total < int(self._budget * self._threshold)
81
94
  ):
82
95
  self._transition_to(QuotaState.WITHIN_QUOTA)
83
- logger.info("Quota guard: EXCEEDED → WITHIN_QUOTA (usage dropped)")
96
+ logger.info(
97
+ "Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (usage dropped)",
98
+ self._window_label,
99
+ )
84
100
  return True
85
101
  now = time.monotonic()
86
102
  if now - self._last_probe >= self._effective_probe_interval:
87
103
  self._last_probe = now
88
- logger.info("Quota guard: allowing probe request")
104
+ logger.info(
105
+ "Quota guard [%s]: allowing probe request",
106
+ self._window_label,
107
+ )
89
108
  return True
90
109
  return False
91
110
 
@@ -104,7 +123,10 @@ class QuotaGuard:
104
123
  with self._lock:
105
124
  if self._state == QuotaState.QUOTA_EXCEEDED:
106
125
  self._transition_to(QuotaState.WITHIN_QUOTA)
107
- logger.info("Quota guard: EXCEEDED → WITHIN_QUOTA (probe success)")
126
+ logger.info(
127
+ "Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (probe success)",
128
+ self._window_label,
129
+ )
108
130
 
109
131
  def notify_cap_error(self, retry_after_seconds: float | None = None) -> None:
110
132
  """外部通知检测到用量上限错误.
@@ -125,7 +147,8 @@ class QuotaGuard:
125
147
  )
126
148
  self._cap_error_active = True
127
149
  logger.warning(
128
- "Quota guard: cap error detected → EXCEEDED (effective_probe=%ds)",
150
+ "Quota guard [%s]: cap error detected → EXCEEDED (effective_probe=%ds)",
151
+ self._window_label,
129
152
  int(self._effective_probe_interval),
130
153
  )
131
154
 
@@ -139,12 +162,17 @@ class QuotaGuard:
139
162
  self._total += total_tokens
140
163
  if vendor:
141
164
  logger.info(
142
- "Quota guard [%s]: loaded baseline %d tokens",
165
+ "Quota guard [%s/%s]: loaded baseline %d tokens",
143
166
  vendor,
167
+ self._window_label,
144
168
  total_tokens,
145
169
  )
146
170
  else:
147
- logger.info("Quota guard: loaded baseline %d tokens", total_tokens)
171
+ logger.info(
172
+ "Quota guard [%s]: loaded baseline %d tokens",
173
+ self._window_label,
174
+ total_tokens,
175
+ )
148
176
 
149
177
  def reset(self) -> None:
150
178
  """手动重置为 WITHIN_QUOTA 状态."""
@@ -152,7 +180,10 @@ class QuotaGuard:
152
180
  self._transition_to(QuotaState.WITHIN_QUOTA)
153
181
  self._entries.clear()
154
182
  self._total = 0
155
- logger.info("Quota guard: manually reset to WITHIN_QUOTA")
183
+ logger.info(
184
+ "Quota guard [%s]: manually reset to WITHIN_QUOTA",
185
+ self._window_label,
186
+ )
156
187
 
157
188
  def get_info(self) -> dict:
158
189
  """获取配额守卫状态信息."""
@@ -166,6 +197,7 @@ class QuotaGuard:
166
197
  if self._budget > 0
167
198
  else 0,
168
199
  "threshold_percent": self._threshold * 100,
200
+ "window_hours": self.window_hours,
169
201
  }
170
202
 
171
203
  def _expire(self) -> None:
@@ -134,18 +134,24 @@ class RequestRouter:
134
134
  self,
135
135
  body: dict[str, Any],
136
136
  headers: dict[str, str],
137
+ normalization: Any = None,
137
138
  ) -> AsyncIterator[tuple[bytes, str]]:
138
139
  """路由流式请求,按优先级尝试各层级."""
139
- async for chunk, vendor_name in self._executor.execute_stream(body, headers):
140
+ async for chunk, vendor_name in self._executor.execute_stream(
141
+ body, headers, normalization=normalization
142
+ ):
140
143
  yield chunk, vendor_name
141
144
 
142
145
  async def route_message(
143
146
  self,
144
147
  body: dict[str, Any],
145
148
  headers: dict[str, str],
149
+ normalization: Any = None,
146
150
  ) -> Any:
147
151
  """路由非流式请求,按优先级尝试各层级."""
148
- return await self._executor.execute_message(body, headers)
152
+ return await self._executor.execute_message(
153
+ body, headers, normalization=normalization
154
+ )
149
155
 
150
156
  # ── 生命周期 ───────────────────────────────────────────
151
157