coding-proxy 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coding/__init__.py +0 -0
- coding/proxy/__init__.py +3 -0
- coding/proxy/__main__.py +5 -0
- coding/proxy/auth/__init__.py +13 -0
- coding/proxy/auth/providers/__init__.py +6 -0
- coding/proxy/auth/providers/base.py +35 -0
- coding/proxy/auth/providers/github.py +133 -0
- coding/proxy/auth/providers/google.py +237 -0
- coding/proxy/auth/runtime.py +122 -0
- coding/proxy/auth/store.py +74 -0
- coding/proxy/cli/__init__.py +151 -0
- coding/proxy/cli/auth_commands.py +224 -0
- coding/proxy/compat/__init__.py +30 -0
- coding/proxy/compat/canonical.py +193 -0
- coding/proxy/compat/session_store.py +137 -0
- coding/proxy/config/__init__.py +6 -0
- coding/proxy/config/auth_schema.py +24 -0
- coding/proxy/config/loader.py +139 -0
- coding/proxy/config/resiliency.py +46 -0
- coding/proxy/config/routing.py +279 -0
- coding/proxy/config/schema.py +280 -0
- coding/proxy/config/server.py +23 -0
- coding/proxy/config/vendors.py +53 -0
- coding/proxy/convert/__init__.py +14 -0
- coding/proxy/convert/anthropic_to_gemini.py +352 -0
- coding/proxy/convert/anthropic_to_openai.py +352 -0
- coding/proxy/convert/gemini_sse_adapter.py +169 -0
- coding/proxy/convert/gemini_to_anthropic.py +98 -0
- coding/proxy/convert/openai_to_anthropic.py +88 -0
- coding/proxy/logging/__init__.py +49 -0
- coding/proxy/logging/db.py +308 -0
- coding/proxy/logging/stats.py +129 -0
- coding/proxy/model/__init__.py +93 -0
- coding/proxy/model/auth.py +32 -0
- coding/proxy/model/compat.py +153 -0
- coding/proxy/model/constants.py +21 -0
- coding/proxy/model/pricing.py +70 -0
- coding/proxy/model/token.py +64 -0
- coding/proxy/model/vendor.py +218 -0
- coding/proxy/pricing.py +100 -0
- coding/proxy/routing/__init__.py +47 -0
- coding/proxy/routing/circuit_breaker.py +152 -0
- coding/proxy/routing/error_classifier.py +67 -0
- coding/proxy/routing/executor.py +453 -0
- coding/proxy/routing/model_mapper.py +90 -0
- coding/proxy/routing/quota_guard.py +169 -0
- coding/proxy/routing/rate_limit.py +159 -0
- coding/proxy/routing/retry.py +82 -0
- coding/proxy/routing/router.py +84 -0
- coding/proxy/routing/session_manager.py +62 -0
- coding/proxy/routing/tier.py +171 -0
- coding/proxy/routing/usage_parser.py +193 -0
- coding/proxy/routing/usage_recorder.py +131 -0
- coding/proxy/server/__init__.py +1 -0
- coding/proxy/server/app.py +142 -0
- coding/proxy/server/factory.py +175 -0
- coding/proxy/server/request_normalizer.py +139 -0
- coding/proxy/server/responses.py +74 -0
- coding/proxy/server/routes.py +264 -0
- coding/proxy/streaming/__init__.py +1 -0
- coding/proxy/streaming/anthropic_compat.py +484 -0
- coding/proxy/vendors/__init__.py +29 -0
- coding/proxy/vendors/anthropic.py +44 -0
- coding/proxy/vendors/antigravity.py +328 -0
- coding/proxy/vendors/base.py +353 -0
- coding/proxy/vendors/copilot.py +702 -0
- coding/proxy/vendors/copilot_models.py +438 -0
- coding/proxy/vendors/copilot_token_manager.py +167 -0
- coding/proxy/vendors/copilot_urls.py +16 -0
- coding/proxy/vendors/mixins.py +71 -0
- coding/proxy/vendors/token_manager.py +128 -0
- coding/proxy/vendors/zhipu.py +243 -0
- coding_proxy-0.1.0.dist-info/METADATA +184 -0
- coding_proxy-0.1.0.dist-info/RECORD +77 -0
- coding_proxy-0.1.0.dist-info/WHEEL +4 -0
- coding_proxy-0.1.0.dist-info/entry_points.txt +2 -0
- coding_proxy-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""供应商层级 — 将供应商实例与弹性设施(熔断器 + 配额守卫)聚合为路由单元."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
|
|
10
|
+
from ..vendors.base import BaseVendor
|
|
11
|
+
from .circuit_breaker import CircuitBreaker, CircuitState
|
|
12
|
+
from .quota_guard import QuotaGuard, QuotaState
|
|
13
|
+
from .retry import RetryConfig
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class VendorTier:
|
|
20
|
+
"""一个路由层级:供应商实例 + 关联的熔断器和配额守卫."""
|
|
21
|
+
|
|
22
|
+
vendor: BaseVendor
|
|
23
|
+
circuit_breaker: CircuitBreaker | None = field(default=None)
|
|
24
|
+
quota_guard: QuotaGuard | None = field(default=None)
|
|
25
|
+
weekly_quota_guard: QuotaGuard | None = field(default=None)
|
|
26
|
+
retry_config: RetryConfig | None = field(default=None)
|
|
27
|
+
|
|
28
|
+
# Rate Limit 精确截止时间(monotonic timestamp),0 表示无限制
|
|
29
|
+
_rate_limit_deadline: float = field(default=0.0, repr=False)
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def name(self) -> str:
|
|
33
|
+
return self.vendor.get_name()
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def is_terminal(self) -> bool:
|
|
37
|
+
"""终端层无熔断器,不触发故障转移."""
|
|
38
|
+
return self.circuit_breaker is None
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def rate_limit_remaining_seconds(self) -> float:
|
|
42
|
+
"""Rate limit 剩余等待秒数(<= 0 表示已到期)."""
|
|
43
|
+
return max(0.0, self._rate_limit_deadline - time.monotonic())
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def is_rate_limited(self) -> bool:
|
|
47
|
+
"""是否处于 rate limit 冷却期."""
|
|
48
|
+
return self._rate_limit_deadline > time.monotonic()
|
|
49
|
+
|
|
50
|
+
def can_execute(self) -> bool:
|
|
51
|
+
"""综合判断此层是否可用."""
|
|
52
|
+
if self.circuit_breaker and not self.circuit_breaker.can_execute():
|
|
53
|
+
return False
|
|
54
|
+
if self.quota_guard and not self.quota_guard.can_use_primary():
|
|
55
|
+
return False
|
|
56
|
+
if self.weekly_quota_guard and not self.weekly_quota_guard.can_use_primary():
|
|
57
|
+
return False
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
def record_success(self, usage_tokens: int = 0) -> None:
|
|
61
|
+
"""记录成功:通知熔断器和配额守卫,清除 rate limit deadline."""
|
|
62
|
+
if self.circuit_breaker:
|
|
63
|
+
self.circuit_breaker.record_success()
|
|
64
|
+
if self.quota_guard:
|
|
65
|
+
self.quota_guard.record_primary_success()
|
|
66
|
+
if usage_tokens > 0:
|
|
67
|
+
self.quota_guard.record_usage(usage_tokens)
|
|
68
|
+
if self.weekly_quota_guard:
|
|
69
|
+
self.weekly_quota_guard.record_primary_success()
|
|
70
|
+
if usage_tokens > 0:
|
|
71
|
+
self.weekly_quota_guard.record_usage(usage_tokens)
|
|
72
|
+
self._rate_limit_deadline = 0.0
|
|
73
|
+
|
|
74
|
+
def record_failure(
|
|
75
|
+
self,
|
|
76
|
+
*,
|
|
77
|
+
is_cap_error: bool = False,
|
|
78
|
+
retry_after_seconds: float | None = None,
|
|
79
|
+
rate_limit_deadline: float | None = None,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""记录失败:通知熔断器;如为 cap 错误则通知配额守卫.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
is_cap_error: 是否为配额上限错误
|
|
85
|
+
retry_after_seconds: 从响应头解析的建议恢复时间
|
|
86
|
+
rate_limit_deadline: 精确的 rate limit 截止 monotonic 时间戳
|
|
87
|
+
"""
|
|
88
|
+
if self.circuit_breaker:
|
|
89
|
+
self.circuit_breaker.record_failure(retry_after_seconds=retry_after_seconds)
|
|
90
|
+
if self.quota_guard and is_cap_error:
|
|
91
|
+
self.quota_guard.notify_cap_error(retry_after_seconds=retry_after_seconds)
|
|
92
|
+
if self.weekly_quota_guard and is_cap_error:
|
|
93
|
+
self.weekly_quota_guard.notify_cap_error(retry_after_seconds=retry_after_seconds)
|
|
94
|
+
|
|
95
|
+
if rate_limit_deadline is not None and rate_limit_deadline > self._rate_limit_deadline:
|
|
96
|
+
self._rate_limit_deadline = rate_limit_deadline
|
|
97
|
+
logger.info(
|
|
98
|
+
"Tier %s: rate limit deadline updated, %.1fs remaining",
|
|
99
|
+
self.name,
|
|
100
|
+
rate_limit_deadline - time.monotonic(),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
async def can_execute_with_health_check(self) -> bool:
|
|
104
|
+
"""带健康检查的可用性判断(异步,慢路径).
|
|
105
|
+
|
|
106
|
+
三层恢复门控:
|
|
107
|
+
1. Rate Limit Deadline — 截止时间未到,直接拒绝
|
|
108
|
+
2. Health Check — 轻量级供应商健康探测
|
|
109
|
+
3. Cautious Probe — 通过前两层后,允许真实请求作为探针
|
|
110
|
+
"""
|
|
111
|
+
# ── 第一层: Rate Limit Deadline 门控 ──
|
|
112
|
+
if self.is_rate_limited:
|
|
113
|
+
remaining = self.rate_limit_remaining_seconds
|
|
114
|
+
logger.debug(
|
|
115
|
+
"Tier %s: rate limit deadline active, %.1fs remaining, blocking",
|
|
116
|
+
self.name,
|
|
117
|
+
remaining,
|
|
118
|
+
)
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
cb_allows = self.circuit_breaker.can_execute() if self.circuit_breaker else True
|
|
122
|
+
qg_allows = self.quota_guard.can_use_primary() if self.quota_guard else True
|
|
123
|
+
wqg_allows = self.weekly_quota_guard.can_use_primary() if self.weekly_quota_guard else True
|
|
124
|
+
|
|
125
|
+
if not cb_allows and not qg_allows and not wqg_allows:
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
# 检测是否为探测场景
|
|
129
|
+
is_probe_scenario = False
|
|
130
|
+
if self.circuit_breaker:
|
|
131
|
+
if self.circuit_breaker.state == CircuitState.HALF_OPEN:
|
|
132
|
+
is_probe_scenario = True
|
|
133
|
+
if self.quota_guard:
|
|
134
|
+
# QG 允许探测(在 QUOTA_EXCEEDED 状态下但返回 True)
|
|
135
|
+
if self.quota_guard._state == QuotaState.QUOTA_EXCEEDED and qg_allows:
|
|
136
|
+
is_probe_scenario = True
|
|
137
|
+
if self.weekly_quota_guard:
|
|
138
|
+
if self.weekly_quota_guard._state == QuotaState.QUOTA_EXCEEDED and wqg_allows:
|
|
139
|
+
is_probe_scenario = True
|
|
140
|
+
|
|
141
|
+
if not is_probe_scenario:
|
|
142
|
+
return cb_allows and qg_allows and wqg_allows
|
|
143
|
+
|
|
144
|
+
# ── 第二层: Health Check 门控 ──
|
|
145
|
+
logger.info("Tier %s: probe scenario, running health check", self.name)
|
|
146
|
+
healthy = await self.vendor.check_health()
|
|
147
|
+
if not healthy:
|
|
148
|
+
logger.warning("Tier %s: health check failed, staying degraded", self.name)
|
|
149
|
+
self.record_failure()
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
# ── 第三层: Cautious Probe(允许真实请求通过)──
|
|
153
|
+
logger.info("Tier %s: health check passed, allowing cautious probe", self.name)
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
def reset_rate_limit(self) -> None:
|
|
157
|
+
"""手动清除 rate limit deadline."""
|
|
158
|
+
self._rate_limit_deadline = 0.0
|
|
159
|
+
|
|
160
|
+
def get_rate_limit_info(self) -> dict:
|
|
161
|
+
"""获取 rate limit deadline 状态信息."""
|
|
162
|
+
now = time.monotonic()
|
|
163
|
+
remaining = max(0.0, self._rate_limit_deadline - now)
|
|
164
|
+
return {
|
|
165
|
+
"is_rate_limited": self._rate_limit_deadline > now,
|
|
166
|
+
"remaining_seconds": round(remaining, 1),
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# 向后兼容别名
|
|
171
|
+
BackendTier = VendorTier
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""SSE 流式响应用量解析工具.
|
|
2
|
+
|
|
3
|
+
从 Anthropic / OpenAI / Zhipu 兼容格式的 SSE chunk 中提取 token 用量信息,
|
|
4
|
+
支持多源归一化与 evidence 追踪。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from ..vendors.base import UsageInfo
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _set_if_nonzero(usage: dict, key: str, value: int) -> None:
|
|
19
|
+
"""仅在 value 非零时设置,避免后续 chunk 的 0 值覆盖已提取的非零值.
|
|
20
|
+
|
|
21
|
+
同时处理 None 值,确保数据类型正确性。
|
|
22
|
+
"""
|
|
23
|
+
if value is not None and value != 0:
|
|
24
|
+
usage[key] = value
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _append_usage_evidence(
|
|
28
|
+
usage: dict[str, Any],
|
|
29
|
+
*,
|
|
30
|
+
evidence_kind: str,
|
|
31
|
+
raw_usage: dict[str, Any],
|
|
32
|
+
request_id: str | None = None,
|
|
33
|
+
model_served: str | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
entries = usage.setdefault("_usage_evidence", [])
|
|
36
|
+
if not isinstance(entries, list):
|
|
37
|
+
return
|
|
38
|
+
entries.append({
|
|
39
|
+
"evidence_kind": evidence_kind,
|
|
40
|
+
"raw_usage": raw_usage,
|
|
41
|
+
"request_id": request_id or "",
|
|
42
|
+
"model_served": model_served or "",
|
|
43
|
+
"source_field_map": {
|
|
44
|
+
"input_tokens": next(
|
|
45
|
+
(key for key in ("input_tokens", "prompt_tokens") if key in raw_usage),
|
|
46
|
+
"",
|
|
47
|
+
),
|
|
48
|
+
"output_tokens": next(
|
|
49
|
+
(key for key in ("output_tokens", "completion_tokens") if key in raw_usage),
|
|
50
|
+
"",
|
|
51
|
+
),
|
|
52
|
+
"cache_creation_tokens": next(
|
|
53
|
+
(key for key in ("cache_creation_input_tokens",) if key in raw_usage),
|
|
54
|
+
"",
|
|
55
|
+
),
|
|
56
|
+
"cache_read_tokens": next(
|
|
57
|
+
(
|
|
58
|
+
key for key in (
|
|
59
|
+
"cache_read_input_tokens",
|
|
60
|
+
"cached_tokens",
|
|
61
|
+
) if key in raw_usage
|
|
62
|
+
),
|
|
63
|
+
"",
|
|
64
|
+
),
|
|
65
|
+
},
|
|
66
|
+
"cache_signal_present": any(
|
|
67
|
+
key in raw_usage
|
|
68
|
+
for key in ("cache_creation_input_tokens", "cache_read_input_tokens", "cached_tokens")
|
|
69
|
+
),
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def build_usage_evidence_records(
|
|
74
|
+
usage: dict[str, Any],
|
|
75
|
+
*,
|
|
76
|
+
vendor: str,
|
|
77
|
+
model_served: str,
|
|
78
|
+
request_id: str,
|
|
79
|
+
) -> list[dict[str, Any]]:
|
|
80
|
+
records: list[dict[str, Any]] = []
|
|
81
|
+
entries = usage.get("_usage_evidence", [])
|
|
82
|
+
if not isinstance(entries, list):
|
|
83
|
+
return records
|
|
84
|
+
|
|
85
|
+
for entry in entries:
|
|
86
|
+
if not isinstance(entry, dict):
|
|
87
|
+
continue
|
|
88
|
+
raw_usage = entry.get("raw_usage")
|
|
89
|
+
if not isinstance(raw_usage, dict):
|
|
90
|
+
continue
|
|
91
|
+
source_field_map = entry.get("source_field_map")
|
|
92
|
+
if not isinstance(source_field_map, dict):
|
|
93
|
+
source_field_map = {}
|
|
94
|
+
records.append({
|
|
95
|
+
"vendor": vendor,
|
|
96
|
+
"request_id": str(entry.get("request_id") or request_id or ""),
|
|
97
|
+
"model_served": str(entry.get("model_served") or model_served or ""),
|
|
98
|
+
"evidence_kind": str(entry.get("evidence_kind") or "stream_usage"),
|
|
99
|
+
"raw_usage_json": json.dumps(raw_usage, ensure_ascii=False, sort_keys=True),
|
|
100
|
+
"parsed_input_tokens": usage.get("input_tokens", 0),
|
|
101
|
+
"parsed_output_tokens": usage.get("output_tokens", 0),
|
|
102
|
+
"parsed_cache_creation_tokens": usage.get("cache_creation_tokens", 0),
|
|
103
|
+
"parsed_cache_read_tokens": usage.get("cache_read_tokens", 0),
|
|
104
|
+
"cache_signal_present": bool(entry.get("cache_signal_present")),
|
|
105
|
+
"source_field_map_json": json.dumps(source_field_map, ensure_ascii=False, sort_keys=True),
|
|
106
|
+
})
|
|
107
|
+
return records
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def parse_usage_from_chunk(chunk: bytes, usage: dict, *, vendor_label: str | None = None) -> None:
|
|
111
|
+
"""从 SSE chunk 提取 token 用量.
|
|
112
|
+
|
|
113
|
+
同时支持 Anthropic 原生格式和 OpenAI/Zhipu 兼容格式:
|
|
114
|
+
- Anthropic: data.message.usage.input_tokens / data.usage.output_tokens
|
|
115
|
+
- OpenAI/Zhipu: 顶层 data.usage.prompt_tokens / data.usage.completion_tokens
|
|
116
|
+
|
|
117
|
+
:param vendor_label: 上游 Vendor 标签(如 "Anthropic"、"OpenAI"、"Gemini"),
|
|
118
|
+
用于日志标注实际来源协议,由调用方根据 tier.name 传入。
|
|
119
|
+
"""
|
|
120
|
+
text = chunk.decode("utf-8", errors="ignore")
|
|
121
|
+
for line in text.split("\n"):
|
|
122
|
+
if not line.startswith("data: "):
|
|
123
|
+
continue
|
|
124
|
+
payload = line[6:].strip()
|
|
125
|
+
if not payload or payload == "[DONE]":
|
|
126
|
+
continue
|
|
127
|
+
try:
|
|
128
|
+
data = json.loads(payload)
|
|
129
|
+
except json.JSONDecodeError:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
# Anthropic 格式: message_start 事件 (data.message.usage)
|
|
133
|
+
msg = data.get("message", {})
|
|
134
|
+
if isinstance(msg, dict) and "usage" in msg:
|
|
135
|
+
u = msg["usage"]
|
|
136
|
+
input_tokens = u.get("input_tokens", 0) or u.get("prompt_tokens", 0)
|
|
137
|
+
if input_tokens > 0:
|
|
138
|
+
logger.debug("Extracted input tokens from message.usage: %d", input_tokens)
|
|
139
|
+
_set_if_nonzero(usage, "input_tokens", input_tokens)
|
|
140
|
+
_set_if_nonzero(usage, "cache_creation_tokens", u.get("cache_creation_input_tokens", 0))
|
|
141
|
+
_set_if_nonzero(usage, "cache_read_tokens", u.get("cache_read_input_tokens", 0))
|
|
142
|
+
if "id" in msg:
|
|
143
|
+
usage["request_id"] = msg["id"]
|
|
144
|
+
if "model" in msg:
|
|
145
|
+
usage["model_served"] = msg["model"]
|
|
146
|
+
if isinstance(u, dict):
|
|
147
|
+
_append_usage_evidence(
|
|
148
|
+
usage,
|
|
149
|
+
evidence_kind="message_usage",
|
|
150
|
+
raw_usage=dict(u),
|
|
151
|
+
request_id=msg.get("id"),
|
|
152
|
+
model_served=msg.get("model"),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Anthropic message_delta / OpenAI 最后一个 chunk (data.usage)
|
|
156
|
+
if "usage" in data:
|
|
157
|
+
u = data["usage"]
|
|
158
|
+
output_tokens = u.get("output_tokens", 0) or u.get("completion_tokens", 0)
|
|
159
|
+
input_tokens = u.get("input_tokens", 0) or u.get("prompt_tokens", 0)
|
|
160
|
+
cache_creation_tokens = u.get("cache_creation_input_tokens", 0)
|
|
161
|
+
cache_read_tokens = u.get("cache_read_input_tokens", 0)
|
|
162
|
+
|
|
163
|
+
_label = f" ({vendor_label})" if vendor_label else ""
|
|
164
|
+
if output_tokens > 0:
|
|
165
|
+
logger.debug("Extracted output tokens from data.usage: %d%s", output_tokens, _label)
|
|
166
|
+
if input_tokens > 0:
|
|
167
|
+
logger.debug("Extracted input tokens from data.usage: %d%s", input_tokens, _label)
|
|
168
|
+
|
|
169
|
+
_set_if_nonzero(usage, "output_tokens", output_tokens)
|
|
170
|
+
_set_if_nonzero(usage, "input_tokens", input_tokens)
|
|
171
|
+
_set_if_nonzero(usage, "cache_creation_tokens", cache_creation_tokens)
|
|
172
|
+
_set_if_nonzero(usage, "cache_read_tokens", cache_read_tokens)
|
|
173
|
+
if isinstance(u, dict):
|
|
174
|
+
_append_usage_evidence(
|
|
175
|
+
usage,
|
|
176
|
+
evidence_kind="data_usage",
|
|
177
|
+
raw_usage=dict(u),
|
|
178
|
+
request_id=data.get("id"),
|
|
179
|
+
model_served=data.get("model"),
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# request_id fallback (OpenAI 格式下 id 在顶层)
|
|
183
|
+
if "id" in data and not usage.get("request_id"):
|
|
184
|
+
usage["request_id"] = data["id"]
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def has_missing_input_usage_signals(info: UsageInfo) -> bool:
|
|
188
|
+
"""判断流式请求是否缺失可解释的输入 usage 信号."""
|
|
189
|
+
if info.output_tokens <= 0:
|
|
190
|
+
return False
|
|
191
|
+
if info.input_tokens > 0:
|
|
192
|
+
return False
|
|
193
|
+
return info.cache_creation_tokens <= 0 and info.cache_read_tokens <= 0
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""用量记录器 — 封装 token 用量日志、定价计算与证据构建."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from ..pricing import PricingTable
|
|
12
|
+
from ..logging.db import TokenLogger
|
|
13
|
+
from .usage_parser import UsageInfo
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class UsageRecorder:
|
|
19
|
+
"""封装路由层的用量记录、定价日志与证据构建逻辑."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
token_logger: TokenLogger | None = None,
|
|
24
|
+
pricing_table: PricingTable | None = None,
|
|
25
|
+
) -> None:
|
|
26
|
+
self._token_logger = token_logger
|
|
27
|
+
self._pricing_table = pricing_table
|
|
28
|
+
|
|
29
|
+
def set_pricing_table(self, table: PricingTable) -> None:
|
|
30
|
+
self._pricing_table = table
|
|
31
|
+
|
|
32
|
+
# ── 用量信息构建 ──────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def build_usage_info(usage: dict[str, Any]) -> UsageInfo:
|
|
36
|
+
from .usage_parser import UsageInfo
|
|
37
|
+
|
|
38
|
+
return UsageInfo(
|
|
39
|
+
input_tokens=usage.get("input_tokens", 0),
|
|
40
|
+
output_tokens=usage.get("output_tokens", 0),
|
|
41
|
+
cache_creation_tokens=usage.get("cache_creation_tokens", 0),
|
|
42
|
+
cache_read_tokens=usage.get("cache_read_tokens", 0),
|
|
43
|
+
request_id=usage.get("request_id", ""),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# ── 模型调用日志 ──────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
def log_model_call(
|
|
49
|
+
self,
|
|
50
|
+
*,
|
|
51
|
+
vendor: str,
|
|
52
|
+
model_requested: str,
|
|
53
|
+
model_served: str,
|
|
54
|
+
duration_ms: int,
|
|
55
|
+
usage: UsageInfo,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""打印模型调用级别的详细 Access Log."""
|
|
58
|
+
cost_str = "-"
|
|
59
|
+
if self._pricing_table is not None:
|
|
60
|
+
cost_value = self._pricing_table.compute_cost(
|
|
61
|
+
vendor=vendor,
|
|
62
|
+
model_served=model_served,
|
|
63
|
+
input_tokens=usage.input_tokens,
|
|
64
|
+
output_tokens=usage.output_tokens,
|
|
65
|
+
cache_creation_tokens=usage.cache_creation_tokens,
|
|
66
|
+
cache_read_tokens=usage.cache_read_tokens,
|
|
67
|
+
)
|
|
68
|
+
if cost_value is not None:
|
|
69
|
+
cost_str = cost_value.format()
|
|
70
|
+
logger.info(
|
|
71
|
+
"ModelCall: vendor=%s model_requested=%s model_served=%s "
|
|
72
|
+
"duration=%dms tokens=[in:%d out:%d cache_create:%d cache_read:%d] cost=%s",
|
|
73
|
+
vendor, model_requested, model_served, duration_ms,
|
|
74
|
+
usage.input_tokens, usage.output_tokens,
|
|
75
|
+
usage.cache_creation_tokens, usage.cache_read_tokens, cost_str,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# ── 持久化记录 ────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
async def record(
|
|
81
|
+
self,
|
|
82
|
+
vendor: str,
|
|
83
|
+
model_requested: str,
|
|
84
|
+
model_served: str,
|
|
85
|
+
usage: UsageInfo,
|
|
86
|
+
duration_ms: int,
|
|
87
|
+
success: bool,
|
|
88
|
+
failover: bool,
|
|
89
|
+
failover_from: str | None = None,
|
|
90
|
+
evidence_records: list[dict[str, Any]] | None = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
if not self._token_logger:
|
|
93
|
+
return
|
|
94
|
+
await self._token_logger.log(
|
|
95
|
+
vendor=vendor, model_requested=model_requested, model_served=model_served,
|
|
96
|
+
input_tokens=usage.input_tokens, output_tokens=usage.output_tokens,
|
|
97
|
+
cache_creation_tokens=usage.cache_creation_tokens, cache_read_tokens=usage.cache_read_tokens,
|
|
98
|
+
duration_ms=duration_ms, success=success, failover=failover, failover_from=failover_from,
|
|
99
|
+
request_id=usage.request_id,
|
|
100
|
+
)
|
|
101
|
+
if not evidence_records or vendor != "copilot":
|
|
102
|
+
return
|
|
103
|
+
if not hasattr(self._token_logger, "log_evidence"):
|
|
104
|
+
return
|
|
105
|
+
for record in evidence_records:
|
|
106
|
+
await self._token_logger.log_evidence(**record)
|
|
107
|
+
|
|
108
|
+
# ── 证据记录构建 ──────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def build_nonstream_evidence_records(*, vendor: str, model_served: str, usage: UsageInfo) -> list[dict[str, Any]]:
|
|
112
|
+
if vendor != "copilot":
|
|
113
|
+
return []
|
|
114
|
+
raw_usage: dict[str, Any] = {"input_tokens": usage.input_tokens, "output_tokens": usage.output_tokens}
|
|
115
|
+
if usage.cache_creation_tokens > 0:
|
|
116
|
+
raw_usage["cache_creation_input_tokens"] = usage.cache_creation_tokens
|
|
117
|
+
if usage.cache_read_tokens > 0:
|
|
118
|
+
raw_usage["cache_read_input_tokens"] = usage.cache_read_tokens
|
|
119
|
+
return [{
|
|
120
|
+
"vendor": vendor, "request_id": usage.request_id, "model_served": model_served,
|
|
121
|
+
"evidence_kind": "nonstream_usage_summary",
|
|
122
|
+
"raw_usage_json": json.dumps(raw_usage, ensure_ascii=False, sort_keys=True),
|
|
123
|
+
"parsed_input_tokens": usage.input_tokens, "parsed_output_tokens": usage.output_tokens,
|
|
124
|
+
"parsed_cache_creation_tokens": usage.cache_creation_tokens, "parsed_cache_read_tokens": usage.cache_read_tokens,
|
|
125
|
+
"cache_signal_present": usage.cache_creation_tokens > 0 or usage.cache_read_tokens > 0,
|
|
126
|
+
"source_field_map_json": json.dumps({
|
|
127
|
+
"input_tokens": "input_tokens", "output_tokens": "output_tokens",
|
|
128
|
+
"cache_creation_tokens": "cache_creation_input_tokens" if usage.cache_creation_tokens > 0 else "",
|
|
129
|
+
"cache_read_tokens": "cache_read_input_tokens" if usage.cache_read_tokens > 0 else "",
|
|
130
|
+
}, ensure_ascii=False, sort_keys=True),
|
|
131
|
+
}]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""服务模块."""
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""FastAPI 应用工厂与生命周期管理.
|
|
2
|
+
|
|
3
|
+
路由端点注册已正交分解至 :mod:`.routes`.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from contextlib import asynccontextmanager
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from fastapi import FastAPI
|
|
14
|
+
|
|
15
|
+
from ..auth.providers.github import GitHubDeviceFlowProvider
|
|
16
|
+
from ..auth.providers.google import GoogleOAuthProvider
|
|
17
|
+
from ..auth.runtime import RuntimeReauthCoordinator
|
|
18
|
+
from ..auth.store import TokenStoreManager
|
|
19
|
+
from ..vendors.antigravity import AntigravityVendor
|
|
20
|
+
from ..vendors.copilot import CopilotVendor
|
|
21
|
+
from ..config.loader import load_config
|
|
22
|
+
from ..compat.session_store import CompatSessionStore
|
|
23
|
+
from ..config.schema import ProxyConfig
|
|
24
|
+
from ..logging.db import TokenLogger
|
|
25
|
+
from ..routing.router import RequestRouter
|
|
26
|
+
from ..routing.tier import VendorTier
|
|
27
|
+
from .factory import ( # noqa: F401
|
|
28
|
+
_build_circuit_breaker,
|
|
29
|
+
_build_quota_guard,
|
|
30
|
+
_create_vendor_from_config,
|
|
31
|
+
)
|
|
32
|
+
from .routes import register_all_routes
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@asynccontextmanager
|
|
38
|
+
async def lifespan(app: FastAPI):
|
|
39
|
+
"""应用生命周期管理(启动 / 关闭)."""
|
|
40
|
+
router: RequestRouter = app.state.router
|
|
41
|
+
token_logger: TokenLogger = app.state.token_logger
|
|
42
|
+
compat_session_store: CompatSessionStore = app.state.compat_session_store
|
|
43
|
+
config: ProxyConfig = app.state.config
|
|
44
|
+
|
|
45
|
+
await token_logger.init()
|
|
46
|
+
await compat_session_store.init()
|
|
47
|
+
|
|
48
|
+
# 从配置加载模型定价表
|
|
49
|
+
from ..pricing import PricingTable # noqa: F401
|
|
50
|
+
|
|
51
|
+
pricing_table = PricingTable(config.pricing)
|
|
52
|
+
app.state.pricing_table = pricing_table
|
|
53
|
+
router.set_pricing_table(pricing_table)
|
|
54
|
+
|
|
55
|
+
# 为每个有 QuotaGuard 的 tier 加载基线
|
|
56
|
+
for tier in router.tiers:
|
|
57
|
+
if tier.quota_guard and tier.quota_guard.enabled:
|
|
58
|
+
total = await token_logger.query_window_total(
|
|
59
|
+
tier.quota_guard.window_hours,
|
|
60
|
+
vendor=tier.name,
|
|
61
|
+
)
|
|
62
|
+
tier.quota_guard.load_baseline(total)
|
|
63
|
+
if tier.weekly_quota_guard and tier.weekly_quota_guard.enabled:
|
|
64
|
+
total = await token_logger.query_window_total(
|
|
65
|
+
tier.weekly_quota_guard.window_hours,
|
|
66
|
+
vendor=tier.name,
|
|
67
|
+
)
|
|
68
|
+
tier.weekly_quota_guard.load_baseline(total)
|
|
69
|
+
|
|
70
|
+
logger.info("coding-proxy started: host=%s port=%d", config.server.host, config.server.port)
|
|
71
|
+
yield
|
|
72
|
+
await router.close()
|
|
73
|
+
await compat_session_store.close()
|
|
74
|
+
await token_logger.close()
|
|
75
|
+
logger.info("coding-proxy stopped")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def create_app(config: ProxyConfig | None = None) -> FastAPI:
|
|
79
|
+
"""创建 FastAPI 应用实例."""
|
|
80
|
+
if config is None:
|
|
81
|
+
config = load_config()
|
|
82
|
+
|
|
83
|
+
token_logger = TokenLogger(config.db_path)
|
|
84
|
+
compat_session_store = CompatSessionStore(
|
|
85
|
+
config.compat_state_path,
|
|
86
|
+
ttl_seconds=config.database.compat_state_ttl_seconds,
|
|
87
|
+
)
|
|
88
|
+
from ..routing.model_mapper import ModelMapper # noqa: E402
|
|
89
|
+
|
|
90
|
+
mapper = ModelMapper(config.model_mapping)
|
|
91
|
+
|
|
92
|
+
# 加载 Token Store 用于凭证合并
|
|
93
|
+
token_store = TokenStoreManager(
|
|
94
|
+
store_path=Path(config.auth.token_store_path) if config.auth.token_store_path else None
|
|
95
|
+
)
|
|
96
|
+
token_store.load()
|
|
97
|
+
|
|
98
|
+
# 阶段一:构建 vendor_name → VendorTier 映射表(与顺序无关)
|
|
99
|
+
_vendor_map: dict[str, Any] = {}
|
|
100
|
+
for vendor_cfg in config.vendors:
|
|
101
|
+
if not vendor_cfg.enabled:
|
|
102
|
+
continue
|
|
103
|
+
vendor = _create_vendor_from_config(vendor_cfg, config.failover, mapper, token_store)
|
|
104
|
+
cb = _build_circuit_breaker(vendor_cfg.circuit_breaker) if vendor_cfg.circuit_breaker else None
|
|
105
|
+
qg = _build_quota_guard(vendor_cfg.quota_guard)
|
|
106
|
+
wqg = _build_quota_guard(vendor_cfg.weekly_quota_guard)
|
|
107
|
+
_vendor_map[vendor_cfg.vendor] = VendorTier(vendor=vendor, circuit_breaker=cb, quota_guard=qg, weekly_quota_guard=wqg)
|
|
108
|
+
|
|
109
|
+
# 阶段二:按 tiers 指定的顺序组装最终链路(或回退到 vendors 原始顺序)
|
|
110
|
+
if config.tiers is not None:
|
|
111
|
+
tiers = [_vendor_map[name] for name in config.tiers if name in _vendor_map]
|
|
112
|
+
else:
|
|
113
|
+
tiers = [_vendor_map[v.vendor] for v in config.vendors if v.enabled]
|
|
114
|
+
|
|
115
|
+
# 构建运行时重认证协调器
|
|
116
|
+
reauth_providers: dict[str, Any] = {}
|
|
117
|
+
token_updaters: dict[str, Any] = {}
|
|
118
|
+
for tier in tiers:
|
|
119
|
+
if isinstance(tier.vendor, CopilotVendor):
|
|
120
|
+
reauth_providers["github"] = GitHubDeviceFlowProvider()
|
|
121
|
+
token_updaters["github"] = tier.vendor._token_manager.update_github_token
|
|
122
|
+
elif isinstance(tier.vendor, AntigravityVendor):
|
|
123
|
+
reauth_providers["google"] = GoogleOAuthProvider()
|
|
124
|
+
token_updaters["google"] = tier.vendor._token_manager.update_refresh_token
|
|
125
|
+
|
|
126
|
+
reauth_coordinator: RuntimeReauthCoordinator | None = None
|
|
127
|
+
if reauth_providers:
|
|
128
|
+
reauth_coordinator = RuntimeReauthCoordinator(token_store, reauth_providers, token_updaters)
|
|
129
|
+
|
|
130
|
+
router = RequestRouter(tiers, token_logger, reauth_coordinator, compat_session_store)
|
|
131
|
+
|
|
132
|
+
app = FastAPI(title="coding-proxy", version="0.1.0", lifespan=lifespan)
|
|
133
|
+
app.state.router = router
|
|
134
|
+
app.state.token_logger = token_logger
|
|
135
|
+
app.state.compat_session_store = compat_session_store
|
|
136
|
+
app.state.config = config
|
|
137
|
+
app.state.reauth_coordinator = reauth_coordinator
|
|
138
|
+
|
|
139
|
+
# 注册所有路由端点
|
|
140
|
+
register_all_routes(app, router, reauth_coordinator)
|
|
141
|
+
|
|
142
|
+
return app
|