PyPI - coding-proxy - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

coding-proxy 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

coding/proxy/config/config.default.yaml +2 -2
coding/proxy/logging/db.py +1 -1
coding/proxy/routing/executor.py +47 -3
coding/proxy/routing/quota_guard.py +40 -8
coding/proxy/routing/router.py +8 -2
coding/proxy/server/dashboard.py +1445 -0
coding/proxy/server/request_normalizer.py +242 -30
coding/proxy/server/routes.py +14 -4
{coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/METADATA +5 -1
{coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/RECORD +13 -12
{coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/WHEEL +0 -0
{coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/entry_points.txt +0 -0
{coding_proxy-0.2.2.dist-info → coding_proxy-0.2.3.dist-info}/licenses/LICENSE +0 -0

coding/proxy/config/config.default.yaml CHANGED Viewed

@@ -52,13 +52,13 @@ vendors:
       success_threshold: 2
     quota_guard:
       enabled: true
-      token_budget: 45000000 # 5 小时 token 预算（根据订阅计划调整）
+      token_budget: 50000000 # 5 小时 token 预算（根据订阅计划调整）
       window_hours: 5.0
       threshold_percent: 99.0
       probe_interval_seconds: 300
     weekly_quota_guard:
       enabled: true
-      token_budget: 250000000 # 一周 token 预算（根据订阅计划调整）
+      token_budget: 800000000 # 一周 token 预算（根据订阅计划调整）
       window_hours: 168.0 # 7 天滑动窗口
       threshold_percent: 99.0
       probe_interval_seconds: 1800 # 每 30 分钟探测一次

coding/proxy/logging/db.py CHANGED Viewed

@@ -497,7 +497,7 @@ class TokenLogger:
             return 0
         cutoff_iso = _hours_ago_utc_iso(window_hours)
         cursor = await self._db.execute(
-            """SELECT COALESCE(SUM(input_tokens + output_tokens), 0) AS total
+            """SELECT COALESCE(SUM(input_tokens + output_tokens + cache_creation_tokens + cache_read_tokens), 0) AS total
                FROM usage_log
                WHERE vendor = ? AND success = 1
                  AND ts >= ?""",

coding/proxy/routing/executor.py CHANGED Viewed

@@ -6,6 +6,7 @@
 from __future__ import annotations
+import copy
 import logging
 import time
 from collections.abc import AsyncIterator
@@ -222,10 +223,43 @@ class _RouteExecutor:
     # ── 公开执行入口 ──────────────────────────────────────
+    def _prepare_body_for_tier(
+        self,
+        body: dict[str, Any],
+        tier: VendorTier,
+        normalization: Any = None,
+    ) -> dict[str, Any]:
+        """为指定 tier 准备请求体，必要时应用 Anthropic 专属修复（Phase 2）.
+        仅当 tier 为 Anthropic 且 NormalizationResult 标记需要修复时，
+        才执行 deep copy + Phase 2 修复，确保 Zhipu 等其他 vendor 不受影响。
+        """
+        if normalization is None or not normalization.has_anthropic_fixes:
+            return body
+        if tier.name != "anthropic":
+            return body
+        from ..server.request_normalizer import apply_anthropic_specific_fixes
+        body_for_vendor = copy.deepcopy(body)
+        fixes = apply_anthropic_specific_fixes(
+            body_for_vendor.get("messages", []),
+            normalization.misplaced_tool_results,
+            normalization.misplaced_log_info,
+        )
+        if fixes:
+            logger.debug(
+                "Applied Anthropic-specific fixes for tier %s: %s",
+                tier.name,
+                ", ".join(fixes),
+            )
+        return body_for_vendor
     async def execute_stream(
         self,
         body: dict[str, Any],
         headers: dict[str, str],
+        normalization: Any = None,
     ) -> AsyncIterator[tuple[bytes, str]]:
         """路由流式请求，按优先级尝试各层级."""
         last_idx = len(self._tiers) - 1
@@ -257,7 +291,10 @@ class _RouteExecutor:
             usage: dict[str, Any] = {}
             try:
-                async for chunk in tier.vendor.send_message_stream(body, headers):
+                body_for_tier = self._prepare_body_for_tier(body, tier, normalization)
+                async for chunk in tier.vendor.send_message_stream(
+                    body_for_tier, headers
+                ):
                     parse_usage_from_chunk(
                         chunk,
                         usage,
@@ -276,7 +313,12 @@ class _RouteExecutor:
                         tier.name,
                         usage,
                     )
-                tier.record_success(info.input_tokens + info.output_tokens)
+                tier.record_success(
+                    info.input_tokens
+                    + info.output_tokens
+                    + info.cache_creation_tokens
+                    + info.cache_read_tokens
+                )
                 duration = int((time.monotonic() - start) * 1000)
                 model = body.get("model", "unknown")
                 model_served = usage.get("model_served") or tier.vendor.map_model(model)
@@ -384,6 +426,7 @@ class _RouteExecutor:
         self,
         body: dict[str, Any],
         headers: dict[str, str],
+        normalization: Any = None,
     ) -> VendorResponse:
         """路由非流式请求，按优先级尝试各层级."""
         last_idx = len(self._tiers) - 1
@@ -412,7 +455,8 @@ class _RouteExecutor:
                 continue
             try:
-                resp = await tier.vendor.send_message(body, headers)
+                body_for_tier = self._prepare_body_for_tier(body, tier, normalization)
+                resp = await tier.vendor.send_message(body_for_tier, headers)
                 if resp.status_code < 400:
                     duration = int((time.monotonic() - start) * 1000)

coding/proxy/routing/quota_guard.py CHANGED Viewed

@@ -56,6 +56,18 @@ class QuotaGuard:
         """滑动窗口小时数（供基线加载使用）."""
         return self._window / 3600
+    @property
+    def _window_label(self) -> str:
+        """人类可读的窗口周期短标签."""
+        w = self._window
+        if w >= 86400 and w % 86400 == 0:
+            return f"{w // 86400}d"
+        if w >= 3600 and w % 3600 == 0:
+            return f"{w // 3600}h"
+        if w >= 60 and w % 60 == 0:
+            return f"{w // 60}m"
+        return f"{w}s"
     def can_use_primary(self) -> bool:
         """判断是否可以使用主后端."""
         if not self._enabled:
@@ -68,7 +80,8 @@ class QuotaGuard:
                 ):
                     self._transition_to(QuotaState.QUOTA_EXCEEDED)
                     logger.warning(
-                        "Quota guard: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
+                        "Quota guard [%s]: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
+                        self._window_label,
                         self._total / self._budget * 100,
                     )
                     return False
@@ -80,12 +93,18 @@ class QuotaGuard:
                 and self._total < int(self._budget * self._threshold)
             ):
                 self._transition_to(QuotaState.WITHIN_QUOTA)
-                logger.info("Quota guard: EXCEEDED → WITHIN_QUOTA (usage dropped)")
+                logger.info(
+                    "Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (usage dropped)",
+                    self._window_label,
+                )
                 return True
             now = time.monotonic()
             if now - self._last_probe >= self._effective_probe_interval:
                 self._last_probe = now
-                logger.info("Quota guard: allowing probe request")
+                logger.info(
+                    "Quota guard [%s]: allowing probe request",
+                    self._window_label,
+                )
                 return True
             return False
@@ -104,7 +123,10 @@ class QuotaGuard:
         with self._lock:
             if self._state == QuotaState.QUOTA_EXCEEDED:
                 self._transition_to(QuotaState.WITHIN_QUOTA)
-                logger.info("Quota guard: EXCEEDED → WITHIN_QUOTA (probe success)")
+                logger.info(
+                    "Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (probe success)",
+                    self._window_label,
+                )
     def notify_cap_error(self, retry_after_seconds: float | None = None) -> None:
         """外部通知检测到用量上限错误.
@@ -125,7 +147,8 @@ class QuotaGuard:
                 )
             self._cap_error_active = True
             logger.warning(
-                "Quota guard: cap error detected → EXCEEDED (effective_probe=%ds)",
+                "Quota guard [%s]: cap error detected → EXCEEDED (effective_probe=%ds)",
+                self._window_label,
                 int(self._effective_probe_interval),
             )
@@ -139,12 +162,17 @@ class QuotaGuard:
             self._total += total_tokens
             if vendor:
                 logger.info(
-                    "Quota guard [%s]: loaded baseline %d tokens",
+                    "Quota guard [%s/%s]: loaded baseline %d tokens",
                     vendor,
+                    self._window_label,
                     total_tokens,
                 )
             else:
-                logger.info("Quota guard: loaded baseline %d tokens", total_tokens)
+                logger.info(
+                    "Quota guard [%s]: loaded baseline %d tokens",
+                    self._window_label,
+                    total_tokens,
+                )
     def reset(self) -> None:
         """手动重置为 WITHIN_QUOTA 状态."""
@@ -152,7 +180,10 @@ class QuotaGuard:
             self._transition_to(QuotaState.WITHIN_QUOTA)
             self._entries.clear()
             self._total = 0
-            logger.info("Quota guard: manually reset to WITHIN_QUOTA")
+            logger.info(
+                "Quota guard [%s]: manually reset to WITHIN_QUOTA",
+                self._window_label,
+            )
     def get_info(self) -> dict:
         """获取配额守卫状态信息."""
@@ -166,6 +197,7 @@ class QuotaGuard:
                 if self._budget > 0
                 else 0,
                 "threshold_percent": self._threshold * 100,
+                "window_hours": self.window_hours,
             }
     def _expire(self) -> None:

coding/proxy/routing/router.py CHANGED Viewed

@@ -134,18 +134,24 @@ class RequestRouter:
         self,
         body: dict[str, Any],
         headers: dict[str, str],
+        normalization: Any = None,
     ) -> AsyncIterator[tuple[bytes, str]]:
         """路由流式请求，按优先级尝试各层级."""
-        async for chunk, vendor_name in self._executor.execute_stream(body, headers):
+        async for chunk, vendor_name in self._executor.execute_stream(
+            body, headers, normalization=normalization
+        ):
             yield chunk, vendor_name
     async def route_message(
         self,
         body: dict[str, Any],
         headers: dict[str, str],
+        normalization: Any = None,
     ) -> Any:
         """路由非流式请求，按优先级尝试各层级."""
-        return await self._executor.execute_message(body, headers)
+        return await self._executor.execute_message(
+            body, headers, normalization=normalization
+        )
     # ── 生命周期 ───────────────────────────────────────────

coding-proxy 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

coding-proxy 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl