coding-proxy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. coding/__init__.py +0 -0
  2. coding/proxy/__init__.py +3 -0
  3. coding/proxy/__main__.py +5 -0
  4. coding/proxy/auth/__init__.py +13 -0
  5. coding/proxy/auth/providers/__init__.py +6 -0
  6. coding/proxy/auth/providers/base.py +35 -0
  7. coding/proxy/auth/providers/github.py +133 -0
  8. coding/proxy/auth/providers/google.py +237 -0
  9. coding/proxy/auth/runtime.py +122 -0
  10. coding/proxy/auth/store.py +74 -0
  11. coding/proxy/cli/__init__.py +151 -0
  12. coding/proxy/cli/auth_commands.py +224 -0
  13. coding/proxy/compat/__init__.py +30 -0
  14. coding/proxy/compat/canonical.py +193 -0
  15. coding/proxy/compat/session_store.py +137 -0
  16. coding/proxy/config/__init__.py +6 -0
  17. coding/proxy/config/auth_schema.py +24 -0
  18. coding/proxy/config/loader.py +139 -0
  19. coding/proxy/config/resiliency.py +46 -0
  20. coding/proxy/config/routing.py +279 -0
  21. coding/proxy/config/schema.py +280 -0
  22. coding/proxy/config/server.py +23 -0
  23. coding/proxy/config/vendors.py +53 -0
  24. coding/proxy/convert/__init__.py +14 -0
  25. coding/proxy/convert/anthropic_to_gemini.py +352 -0
  26. coding/proxy/convert/anthropic_to_openai.py +352 -0
  27. coding/proxy/convert/gemini_sse_adapter.py +169 -0
  28. coding/proxy/convert/gemini_to_anthropic.py +98 -0
  29. coding/proxy/convert/openai_to_anthropic.py +88 -0
  30. coding/proxy/logging/__init__.py +49 -0
  31. coding/proxy/logging/db.py +308 -0
  32. coding/proxy/logging/stats.py +129 -0
  33. coding/proxy/model/__init__.py +93 -0
  34. coding/proxy/model/auth.py +32 -0
  35. coding/proxy/model/compat.py +153 -0
  36. coding/proxy/model/constants.py +21 -0
  37. coding/proxy/model/pricing.py +70 -0
  38. coding/proxy/model/token.py +64 -0
  39. coding/proxy/model/vendor.py +218 -0
  40. coding/proxy/pricing.py +100 -0
  41. coding/proxy/routing/__init__.py +47 -0
  42. coding/proxy/routing/circuit_breaker.py +152 -0
  43. coding/proxy/routing/error_classifier.py +67 -0
  44. coding/proxy/routing/executor.py +453 -0
  45. coding/proxy/routing/model_mapper.py +90 -0
  46. coding/proxy/routing/quota_guard.py +169 -0
  47. coding/proxy/routing/rate_limit.py +159 -0
  48. coding/proxy/routing/retry.py +82 -0
  49. coding/proxy/routing/router.py +84 -0
  50. coding/proxy/routing/session_manager.py +62 -0
  51. coding/proxy/routing/tier.py +171 -0
  52. coding/proxy/routing/usage_parser.py +193 -0
  53. coding/proxy/routing/usage_recorder.py +131 -0
  54. coding/proxy/server/__init__.py +1 -0
  55. coding/proxy/server/app.py +142 -0
  56. coding/proxy/server/factory.py +175 -0
  57. coding/proxy/server/request_normalizer.py +139 -0
  58. coding/proxy/server/responses.py +74 -0
  59. coding/proxy/server/routes.py +264 -0
  60. coding/proxy/streaming/__init__.py +1 -0
  61. coding/proxy/streaming/anthropic_compat.py +484 -0
  62. coding/proxy/vendors/__init__.py +29 -0
  63. coding/proxy/vendors/anthropic.py +44 -0
  64. coding/proxy/vendors/antigravity.py +328 -0
  65. coding/proxy/vendors/base.py +353 -0
  66. coding/proxy/vendors/copilot.py +702 -0
  67. coding/proxy/vendors/copilot_models.py +438 -0
  68. coding/proxy/vendors/copilot_token_manager.py +167 -0
  69. coding/proxy/vendors/copilot_urls.py +16 -0
  70. coding/proxy/vendors/mixins.py +71 -0
  71. coding/proxy/vendors/token_manager.py +128 -0
  72. coding/proxy/vendors/zhipu.py +243 -0
  73. coding_proxy-0.1.0.dist-info/METADATA +184 -0
  74. coding_proxy-0.1.0.dist-info/RECORD +77 -0
  75. coding_proxy-0.1.0.dist-info/WHEEL +4 -0
  76. coding_proxy-0.1.0.dist-info/entry_points.txt +2 -0
  77. coding_proxy-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,171 @@
1
+ """供应商层级 — 将供应商实例与弹性设施(熔断器 + 配额守卫)聚合为路由单元."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+
8
+ from dataclasses import dataclass, field
9
+
10
+ from ..vendors.base import BaseVendor
11
+ from .circuit_breaker import CircuitBreaker, CircuitState
12
+ from .quota_guard import QuotaGuard, QuotaState
13
+ from .retry import RetryConfig
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class VendorTier:
20
+ """一个路由层级:供应商实例 + 关联的熔断器和配额守卫."""
21
+
22
+ vendor: BaseVendor
23
+ circuit_breaker: CircuitBreaker | None = field(default=None)
24
+ quota_guard: QuotaGuard | None = field(default=None)
25
+ weekly_quota_guard: QuotaGuard | None = field(default=None)
26
+ retry_config: RetryConfig | None = field(default=None)
27
+
28
+ # Rate Limit 精确截止时间(monotonic timestamp),0 表示无限制
29
+ _rate_limit_deadline: float = field(default=0.0, repr=False)
30
+
31
+ @property
32
+ def name(self) -> str:
33
+ return self.vendor.get_name()
34
+
35
+ @property
36
+ def is_terminal(self) -> bool:
37
+ """终端层无熔断器,不触发故障转移."""
38
+ return self.circuit_breaker is None
39
+
40
+ @property
41
+ def rate_limit_remaining_seconds(self) -> float:
42
+ """Rate limit 剩余等待秒数(<= 0 表示已到期)."""
43
+ return max(0.0, self._rate_limit_deadline - time.monotonic())
44
+
45
+ @property
46
+ def is_rate_limited(self) -> bool:
47
+ """是否处于 rate limit 冷却期."""
48
+ return self._rate_limit_deadline > time.monotonic()
49
+
50
+ def can_execute(self) -> bool:
51
+ """综合判断此层是否可用."""
52
+ if self.circuit_breaker and not self.circuit_breaker.can_execute():
53
+ return False
54
+ if self.quota_guard and not self.quota_guard.can_use_primary():
55
+ return False
56
+ if self.weekly_quota_guard and not self.weekly_quota_guard.can_use_primary():
57
+ return False
58
+ return True
59
+
60
+ def record_success(self, usage_tokens: int = 0) -> None:
61
+ """记录成功:通知熔断器和配额守卫,清除 rate limit deadline."""
62
+ if self.circuit_breaker:
63
+ self.circuit_breaker.record_success()
64
+ if self.quota_guard:
65
+ self.quota_guard.record_primary_success()
66
+ if usage_tokens > 0:
67
+ self.quota_guard.record_usage(usage_tokens)
68
+ if self.weekly_quota_guard:
69
+ self.weekly_quota_guard.record_primary_success()
70
+ if usage_tokens > 0:
71
+ self.weekly_quota_guard.record_usage(usage_tokens)
72
+ self._rate_limit_deadline = 0.0
73
+
74
+ def record_failure(
75
+ self,
76
+ *,
77
+ is_cap_error: bool = False,
78
+ retry_after_seconds: float | None = None,
79
+ rate_limit_deadline: float | None = None,
80
+ ) -> None:
81
+ """记录失败:通知熔断器;如为 cap 错误则通知配额守卫.
82
+
83
+ Args:
84
+ is_cap_error: 是否为配额上限错误
85
+ retry_after_seconds: 从响应头解析的建议恢复时间
86
+ rate_limit_deadline: 精确的 rate limit 截止 monotonic 时间戳
87
+ """
88
+ if self.circuit_breaker:
89
+ self.circuit_breaker.record_failure(retry_after_seconds=retry_after_seconds)
90
+ if self.quota_guard and is_cap_error:
91
+ self.quota_guard.notify_cap_error(retry_after_seconds=retry_after_seconds)
92
+ if self.weekly_quota_guard and is_cap_error:
93
+ self.weekly_quota_guard.notify_cap_error(retry_after_seconds=retry_after_seconds)
94
+
95
+ if rate_limit_deadline is not None and rate_limit_deadline > self._rate_limit_deadline:
96
+ self._rate_limit_deadline = rate_limit_deadline
97
+ logger.info(
98
+ "Tier %s: rate limit deadline updated, %.1fs remaining",
99
+ self.name,
100
+ rate_limit_deadline - time.monotonic(),
101
+ )
102
+
103
+ async def can_execute_with_health_check(self) -> bool:
104
+ """带健康检查的可用性判断(异步,慢路径).
105
+
106
+ 三层恢复门控:
107
+ 1. Rate Limit Deadline — 截止时间未到,直接拒绝
108
+ 2. Health Check — 轻量级供应商健康探测
109
+ 3. Cautious Probe — 通过前两层后,允许真实请求作为探针
110
+ """
111
+ # ── 第一层: Rate Limit Deadline 门控 ──
112
+ if self.is_rate_limited:
113
+ remaining = self.rate_limit_remaining_seconds
114
+ logger.debug(
115
+ "Tier %s: rate limit deadline active, %.1fs remaining, blocking",
116
+ self.name,
117
+ remaining,
118
+ )
119
+ return False
120
+
121
+ cb_allows = self.circuit_breaker.can_execute() if self.circuit_breaker else True
122
+ qg_allows = self.quota_guard.can_use_primary() if self.quota_guard else True
123
+ wqg_allows = self.weekly_quota_guard.can_use_primary() if self.weekly_quota_guard else True
124
+
125
+ if not cb_allows and not qg_allows and not wqg_allows:
126
+ return False
127
+
128
+ # 检测是否为探测场景
129
+ is_probe_scenario = False
130
+ if self.circuit_breaker:
131
+ if self.circuit_breaker.state == CircuitState.HALF_OPEN:
132
+ is_probe_scenario = True
133
+ if self.quota_guard:
134
+ # QG 允许探测(在 QUOTA_EXCEEDED 状态下但返回 True)
135
+ if self.quota_guard._state == QuotaState.QUOTA_EXCEEDED and qg_allows:
136
+ is_probe_scenario = True
137
+ if self.weekly_quota_guard:
138
+ if self.weekly_quota_guard._state == QuotaState.QUOTA_EXCEEDED and wqg_allows:
139
+ is_probe_scenario = True
140
+
141
+ if not is_probe_scenario:
142
+ return cb_allows and qg_allows and wqg_allows
143
+
144
+ # ── 第二层: Health Check 门控 ──
145
+ logger.info("Tier %s: probe scenario, running health check", self.name)
146
+ healthy = await self.vendor.check_health()
147
+ if not healthy:
148
+ logger.warning("Tier %s: health check failed, staying degraded", self.name)
149
+ self.record_failure()
150
+ return False
151
+
152
+ # ── 第三层: Cautious Probe(允许真实请求通过)──
153
+ logger.info("Tier %s: health check passed, allowing cautious probe", self.name)
154
+ return True
155
+
156
+ def reset_rate_limit(self) -> None:
157
+ """手动清除 rate limit deadline."""
158
+ self._rate_limit_deadline = 0.0
159
+
160
+ def get_rate_limit_info(self) -> dict:
161
+ """获取 rate limit deadline 状态信息."""
162
+ now = time.monotonic()
163
+ remaining = max(0.0, self._rate_limit_deadline - now)
164
+ return {
165
+ "is_rate_limited": self._rate_limit_deadline > now,
166
+ "remaining_seconds": round(remaining, 1),
167
+ }
168
+
169
+
170
+ # 向后兼容别名
171
+ BackendTier = VendorTier
@@ -0,0 +1,193 @@
1
+ """SSE 流式响应用量解析工具.
2
+
3
+ 从 Anthropic / OpenAI / Zhipu 兼容格式的 SSE chunk 中提取 token 用量信息,
4
+ 支持多源归一化与 evidence 追踪。
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ from typing import Any
12
+
13
+ from ..vendors.base import UsageInfo
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def _set_if_nonzero(usage: dict, key: str, value: int) -> None:
19
+ """仅在 value 非零时设置,避免后续 chunk 的 0 值覆盖已提取的非零值.
20
+
21
+ 同时处理 None 值,确保数据类型正确性。
22
+ """
23
+ if value is not None and value != 0:
24
+ usage[key] = value
25
+
26
+
27
+ def _append_usage_evidence(
28
+ usage: dict[str, Any],
29
+ *,
30
+ evidence_kind: str,
31
+ raw_usage: dict[str, Any],
32
+ request_id: str | None = None,
33
+ model_served: str | None = None,
34
+ ) -> None:
35
+ entries = usage.setdefault("_usage_evidence", [])
36
+ if not isinstance(entries, list):
37
+ return
38
+ entries.append({
39
+ "evidence_kind": evidence_kind,
40
+ "raw_usage": raw_usage,
41
+ "request_id": request_id or "",
42
+ "model_served": model_served or "",
43
+ "source_field_map": {
44
+ "input_tokens": next(
45
+ (key for key in ("input_tokens", "prompt_tokens") if key in raw_usage),
46
+ "",
47
+ ),
48
+ "output_tokens": next(
49
+ (key for key in ("output_tokens", "completion_tokens") if key in raw_usage),
50
+ "",
51
+ ),
52
+ "cache_creation_tokens": next(
53
+ (key for key in ("cache_creation_input_tokens",) if key in raw_usage),
54
+ "",
55
+ ),
56
+ "cache_read_tokens": next(
57
+ (
58
+ key for key in (
59
+ "cache_read_input_tokens",
60
+ "cached_tokens",
61
+ ) if key in raw_usage
62
+ ),
63
+ "",
64
+ ),
65
+ },
66
+ "cache_signal_present": any(
67
+ key in raw_usage
68
+ for key in ("cache_creation_input_tokens", "cache_read_input_tokens", "cached_tokens")
69
+ ),
70
+ })
71
+
72
+
73
+ def build_usage_evidence_records(
74
+ usage: dict[str, Any],
75
+ *,
76
+ vendor: str,
77
+ model_served: str,
78
+ request_id: str,
79
+ ) -> list[dict[str, Any]]:
80
+ records: list[dict[str, Any]] = []
81
+ entries = usage.get("_usage_evidence", [])
82
+ if not isinstance(entries, list):
83
+ return records
84
+
85
+ for entry in entries:
86
+ if not isinstance(entry, dict):
87
+ continue
88
+ raw_usage = entry.get("raw_usage")
89
+ if not isinstance(raw_usage, dict):
90
+ continue
91
+ source_field_map = entry.get("source_field_map")
92
+ if not isinstance(source_field_map, dict):
93
+ source_field_map = {}
94
+ records.append({
95
+ "vendor": vendor,
96
+ "request_id": str(entry.get("request_id") or request_id or ""),
97
+ "model_served": str(entry.get("model_served") or model_served or ""),
98
+ "evidence_kind": str(entry.get("evidence_kind") or "stream_usage"),
99
+ "raw_usage_json": json.dumps(raw_usage, ensure_ascii=False, sort_keys=True),
100
+ "parsed_input_tokens": usage.get("input_tokens", 0),
101
+ "parsed_output_tokens": usage.get("output_tokens", 0),
102
+ "parsed_cache_creation_tokens": usage.get("cache_creation_tokens", 0),
103
+ "parsed_cache_read_tokens": usage.get("cache_read_tokens", 0),
104
+ "cache_signal_present": bool(entry.get("cache_signal_present")),
105
+ "source_field_map_json": json.dumps(source_field_map, ensure_ascii=False, sort_keys=True),
106
+ })
107
+ return records
108
+
109
+
110
+ def parse_usage_from_chunk(chunk: bytes, usage: dict, *, vendor_label: str | None = None) -> None:
111
+ """从 SSE chunk 提取 token 用量.
112
+
113
+ 同时支持 Anthropic 原生格式和 OpenAI/Zhipu 兼容格式:
114
+ - Anthropic: data.message.usage.input_tokens / data.usage.output_tokens
115
+ - OpenAI/Zhipu: 顶层 data.usage.prompt_tokens / data.usage.completion_tokens
116
+
117
+ :param vendor_label: 上游 Vendor 标签(如 "Anthropic"、"OpenAI"、"Gemini"),
118
+ 用于日志标注实际来源协议,由调用方根据 tier.name 传入。
119
+ """
120
+ text = chunk.decode("utf-8", errors="ignore")
121
+ for line in text.split("\n"):
122
+ if not line.startswith("data: "):
123
+ continue
124
+ payload = line[6:].strip()
125
+ if not payload or payload == "[DONE]":
126
+ continue
127
+ try:
128
+ data = json.loads(payload)
129
+ except json.JSONDecodeError:
130
+ continue
131
+
132
+ # Anthropic 格式: message_start 事件 (data.message.usage)
133
+ msg = data.get("message", {})
134
+ if isinstance(msg, dict) and "usage" in msg:
135
+ u = msg["usage"]
136
+ input_tokens = u.get("input_tokens", 0) or u.get("prompt_tokens", 0)
137
+ if input_tokens > 0:
138
+ logger.debug("Extracted input tokens from message.usage: %d", input_tokens)
139
+ _set_if_nonzero(usage, "input_tokens", input_tokens)
140
+ _set_if_nonzero(usage, "cache_creation_tokens", u.get("cache_creation_input_tokens", 0))
141
+ _set_if_nonzero(usage, "cache_read_tokens", u.get("cache_read_input_tokens", 0))
142
+ if "id" in msg:
143
+ usage["request_id"] = msg["id"]
144
+ if "model" in msg:
145
+ usage["model_served"] = msg["model"]
146
+ if isinstance(u, dict):
147
+ _append_usage_evidence(
148
+ usage,
149
+ evidence_kind="message_usage",
150
+ raw_usage=dict(u),
151
+ request_id=msg.get("id"),
152
+ model_served=msg.get("model"),
153
+ )
154
+
155
+ # Anthropic message_delta / OpenAI 最后一个 chunk (data.usage)
156
+ if "usage" in data:
157
+ u = data["usage"]
158
+ output_tokens = u.get("output_tokens", 0) or u.get("completion_tokens", 0)
159
+ input_tokens = u.get("input_tokens", 0) or u.get("prompt_tokens", 0)
160
+ cache_creation_tokens = u.get("cache_creation_input_tokens", 0)
161
+ cache_read_tokens = u.get("cache_read_input_tokens", 0)
162
+
163
+ _label = f" ({vendor_label})" if vendor_label else ""
164
+ if output_tokens > 0:
165
+ logger.debug("Extracted output tokens from data.usage: %d%s", output_tokens, _label)
166
+ if input_tokens > 0:
167
+ logger.debug("Extracted input tokens from data.usage: %d%s", input_tokens, _label)
168
+
169
+ _set_if_nonzero(usage, "output_tokens", output_tokens)
170
+ _set_if_nonzero(usage, "input_tokens", input_tokens)
171
+ _set_if_nonzero(usage, "cache_creation_tokens", cache_creation_tokens)
172
+ _set_if_nonzero(usage, "cache_read_tokens", cache_read_tokens)
173
+ if isinstance(u, dict):
174
+ _append_usage_evidence(
175
+ usage,
176
+ evidence_kind="data_usage",
177
+ raw_usage=dict(u),
178
+ request_id=data.get("id"),
179
+ model_served=data.get("model"),
180
+ )
181
+
182
+ # request_id fallback (OpenAI 格式下 id 在顶层)
183
+ if "id" in data and not usage.get("request_id"):
184
+ usage["request_id"] = data["id"]
185
+
186
+
187
+ def has_missing_input_usage_signals(info: UsageInfo) -> bool:
188
+ """判断流式请求是否缺失可解释的输入 usage 信号."""
189
+ if info.output_tokens <= 0:
190
+ return False
191
+ if info.input_tokens > 0:
192
+ return False
193
+ return info.cache_creation_tokens <= 0 and info.cache_read_tokens <= 0
@@ -0,0 +1,131 @@
1
+ """用量记录器 — 封装 token 用量日志、定价计算与证据构建."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import time
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ if TYPE_CHECKING:
11
+ from ..pricing import PricingTable
12
+ from ..logging.db import TokenLogger
13
+ from .usage_parser import UsageInfo
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class UsageRecorder:
19
+ """封装路由层的用量记录、定价日志与证据构建逻辑."""
20
+
21
+ def __init__(
22
+ self,
23
+ token_logger: TokenLogger | None = None,
24
+ pricing_table: PricingTable | None = None,
25
+ ) -> None:
26
+ self._token_logger = token_logger
27
+ self._pricing_table = pricing_table
28
+
29
+ def set_pricing_table(self, table: PricingTable) -> None:
30
+ self._pricing_table = table
31
+
32
+ # ── 用量信息构建 ──────────────────────────────────────
33
+
34
+ @staticmethod
35
+ def build_usage_info(usage: dict[str, Any]) -> UsageInfo:
36
+ from .usage_parser import UsageInfo
37
+
38
+ return UsageInfo(
39
+ input_tokens=usage.get("input_tokens", 0),
40
+ output_tokens=usage.get("output_tokens", 0),
41
+ cache_creation_tokens=usage.get("cache_creation_tokens", 0),
42
+ cache_read_tokens=usage.get("cache_read_tokens", 0),
43
+ request_id=usage.get("request_id", ""),
44
+ )
45
+
46
+ # ── 模型调用日志 ──────────────────────────────────────
47
+
48
+ def log_model_call(
49
+ self,
50
+ *,
51
+ vendor: str,
52
+ model_requested: str,
53
+ model_served: str,
54
+ duration_ms: int,
55
+ usage: UsageInfo,
56
+ ) -> None:
57
+ """打印模型调用级别的详细 Access Log."""
58
+ cost_str = "-"
59
+ if self._pricing_table is not None:
60
+ cost_value = self._pricing_table.compute_cost(
61
+ vendor=vendor,
62
+ model_served=model_served,
63
+ input_tokens=usage.input_tokens,
64
+ output_tokens=usage.output_tokens,
65
+ cache_creation_tokens=usage.cache_creation_tokens,
66
+ cache_read_tokens=usage.cache_read_tokens,
67
+ )
68
+ if cost_value is not None:
69
+ cost_str = cost_value.format()
70
+ logger.info(
71
+ "ModelCall: vendor=%s model_requested=%s model_served=%s "
72
+ "duration=%dms tokens=[in:%d out:%d cache_create:%d cache_read:%d] cost=%s",
73
+ vendor, model_requested, model_served, duration_ms,
74
+ usage.input_tokens, usage.output_tokens,
75
+ usage.cache_creation_tokens, usage.cache_read_tokens, cost_str,
76
+ )
77
+
78
+ # ── 持久化记录 ────────────────────────────────────────
79
+
80
+ async def record(
81
+ self,
82
+ vendor: str,
83
+ model_requested: str,
84
+ model_served: str,
85
+ usage: UsageInfo,
86
+ duration_ms: int,
87
+ success: bool,
88
+ failover: bool,
89
+ failover_from: str | None = None,
90
+ evidence_records: list[dict[str, Any]] | None = None,
91
+ ) -> None:
92
+ if not self._token_logger:
93
+ return
94
+ await self._token_logger.log(
95
+ vendor=vendor, model_requested=model_requested, model_served=model_served,
96
+ input_tokens=usage.input_tokens, output_tokens=usage.output_tokens,
97
+ cache_creation_tokens=usage.cache_creation_tokens, cache_read_tokens=usage.cache_read_tokens,
98
+ duration_ms=duration_ms, success=success, failover=failover, failover_from=failover_from,
99
+ request_id=usage.request_id,
100
+ )
101
+ if not evidence_records or vendor != "copilot":
102
+ return
103
+ if not hasattr(self._token_logger, "log_evidence"):
104
+ return
105
+ for record in evidence_records:
106
+ await self._token_logger.log_evidence(**record)
107
+
108
+ # ── 证据记录构建 ──────────────────────────────────────
109
+
110
+ @staticmethod
111
+ def build_nonstream_evidence_records(*, vendor: str, model_served: str, usage: UsageInfo) -> list[dict[str, Any]]:
112
+ if vendor != "copilot":
113
+ return []
114
+ raw_usage: dict[str, Any] = {"input_tokens": usage.input_tokens, "output_tokens": usage.output_tokens}
115
+ if usage.cache_creation_tokens > 0:
116
+ raw_usage["cache_creation_input_tokens"] = usage.cache_creation_tokens
117
+ if usage.cache_read_tokens > 0:
118
+ raw_usage["cache_read_input_tokens"] = usage.cache_read_tokens
119
+ return [{
120
+ "vendor": vendor, "request_id": usage.request_id, "model_served": model_served,
121
+ "evidence_kind": "nonstream_usage_summary",
122
+ "raw_usage_json": json.dumps(raw_usage, ensure_ascii=False, sort_keys=True),
123
+ "parsed_input_tokens": usage.input_tokens, "parsed_output_tokens": usage.output_tokens,
124
+ "parsed_cache_creation_tokens": usage.cache_creation_tokens, "parsed_cache_read_tokens": usage.cache_read_tokens,
125
+ "cache_signal_present": usage.cache_creation_tokens > 0 or usage.cache_read_tokens > 0,
126
+ "source_field_map_json": json.dumps({
127
+ "input_tokens": "input_tokens", "output_tokens": "output_tokens",
128
+ "cache_creation_tokens": "cache_creation_input_tokens" if usage.cache_creation_tokens > 0 else "",
129
+ "cache_read_tokens": "cache_read_input_tokens" if usage.cache_read_tokens > 0 else "",
130
+ }, ensure_ascii=False, sort_keys=True),
131
+ }]
@@ -0,0 +1 @@
1
+ """服务模块."""
@@ -0,0 +1,142 @@
1
+ """FastAPI 应用工厂与生命周期管理.
2
+
3
+ 路由端点注册已正交分解至 :mod:`.routes`.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from contextlib import asynccontextmanager
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from fastapi import FastAPI
14
+
15
+ from ..auth.providers.github import GitHubDeviceFlowProvider
16
+ from ..auth.providers.google import GoogleOAuthProvider
17
+ from ..auth.runtime import RuntimeReauthCoordinator
18
+ from ..auth.store import TokenStoreManager
19
+ from ..vendors.antigravity import AntigravityVendor
20
+ from ..vendors.copilot import CopilotVendor
21
+ from ..config.loader import load_config
22
+ from ..compat.session_store import CompatSessionStore
23
+ from ..config.schema import ProxyConfig
24
+ from ..logging.db import TokenLogger
25
+ from ..routing.router import RequestRouter
26
+ from ..routing.tier import VendorTier
27
+ from .factory import ( # noqa: F401
28
+ _build_circuit_breaker,
29
+ _build_quota_guard,
30
+ _create_vendor_from_config,
31
+ )
32
+ from .routes import register_all_routes
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ @asynccontextmanager
38
+ async def lifespan(app: FastAPI):
39
+ """应用生命周期管理(启动 / 关闭)."""
40
+ router: RequestRouter = app.state.router
41
+ token_logger: TokenLogger = app.state.token_logger
42
+ compat_session_store: CompatSessionStore = app.state.compat_session_store
43
+ config: ProxyConfig = app.state.config
44
+
45
+ await token_logger.init()
46
+ await compat_session_store.init()
47
+
48
+ # 从配置加载模型定价表
49
+ from ..pricing import PricingTable # noqa: F401
50
+
51
+ pricing_table = PricingTable(config.pricing)
52
+ app.state.pricing_table = pricing_table
53
+ router.set_pricing_table(pricing_table)
54
+
55
+ # 为每个有 QuotaGuard 的 tier 加载基线
56
+ for tier in router.tiers:
57
+ if tier.quota_guard and tier.quota_guard.enabled:
58
+ total = await token_logger.query_window_total(
59
+ tier.quota_guard.window_hours,
60
+ vendor=tier.name,
61
+ )
62
+ tier.quota_guard.load_baseline(total)
63
+ if tier.weekly_quota_guard and tier.weekly_quota_guard.enabled:
64
+ total = await token_logger.query_window_total(
65
+ tier.weekly_quota_guard.window_hours,
66
+ vendor=tier.name,
67
+ )
68
+ tier.weekly_quota_guard.load_baseline(total)
69
+
70
+ logger.info("coding-proxy started: host=%s port=%d", config.server.host, config.server.port)
71
+ yield
72
+ await router.close()
73
+ await compat_session_store.close()
74
+ await token_logger.close()
75
+ logger.info("coding-proxy stopped")
76
+
77
+
78
+ def create_app(config: ProxyConfig | None = None) -> FastAPI:
79
+ """创建 FastAPI 应用实例."""
80
+ if config is None:
81
+ config = load_config()
82
+
83
+ token_logger = TokenLogger(config.db_path)
84
+ compat_session_store = CompatSessionStore(
85
+ config.compat_state_path,
86
+ ttl_seconds=config.database.compat_state_ttl_seconds,
87
+ )
88
+ from ..routing.model_mapper import ModelMapper # noqa: E402
89
+
90
+ mapper = ModelMapper(config.model_mapping)
91
+
92
+ # 加载 Token Store 用于凭证合并
93
+ token_store = TokenStoreManager(
94
+ store_path=Path(config.auth.token_store_path) if config.auth.token_store_path else None
95
+ )
96
+ token_store.load()
97
+
98
+ # 阶段一:构建 vendor_name → VendorTier 映射表(与顺序无关)
99
+ _vendor_map: dict[str, Any] = {}
100
+ for vendor_cfg in config.vendors:
101
+ if not vendor_cfg.enabled:
102
+ continue
103
+ vendor = _create_vendor_from_config(vendor_cfg, config.failover, mapper, token_store)
104
+ cb = _build_circuit_breaker(vendor_cfg.circuit_breaker) if vendor_cfg.circuit_breaker else None
105
+ qg = _build_quota_guard(vendor_cfg.quota_guard)
106
+ wqg = _build_quota_guard(vendor_cfg.weekly_quota_guard)
107
+ _vendor_map[vendor_cfg.vendor] = VendorTier(vendor=vendor, circuit_breaker=cb, quota_guard=qg, weekly_quota_guard=wqg)
108
+
109
+ # 阶段二:按 tiers 指定的顺序组装最终链路(或回退到 vendors 原始顺序)
110
+ if config.tiers is not None:
111
+ tiers = [_vendor_map[name] for name in config.tiers if name in _vendor_map]
112
+ else:
113
+ tiers = [_vendor_map[v.vendor] for v in config.vendors if v.enabled]
114
+
115
+ # 构建运行时重认证协调器
116
+ reauth_providers: dict[str, Any] = {}
117
+ token_updaters: dict[str, Any] = {}
118
+ for tier in tiers:
119
+ if isinstance(tier.vendor, CopilotVendor):
120
+ reauth_providers["github"] = GitHubDeviceFlowProvider()
121
+ token_updaters["github"] = tier.vendor._token_manager.update_github_token
122
+ elif isinstance(tier.vendor, AntigravityVendor):
123
+ reauth_providers["google"] = GoogleOAuthProvider()
124
+ token_updaters["google"] = tier.vendor._token_manager.update_refresh_token
125
+
126
+ reauth_coordinator: RuntimeReauthCoordinator | None = None
127
+ if reauth_providers:
128
+ reauth_coordinator = RuntimeReauthCoordinator(token_store, reauth_providers, token_updaters)
129
+
130
+ router = RequestRouter(tiers, token_logger, reauth_coordinator, compat_session_store)
131
+
132
+ app = FastAPI(title="coding-proxy", version="0.1.0", lifespan=lifespan)
133
+ app.state.router = router
134
+ app.state.token_logger = token_logger
135
+ app.state.compat_session_store = compat_session_store
136
+ app.state.config = config
137
+ app.state.reauth_coordinator = reauth_coordinator
138
+
139
+ # 注册所有路由端点
140
+ register_all_routes(app, router, reauth_coordinator)
141
+
142
+ return app