coding-proxy 0.2.1a3__tar.gz → 0.2.3a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/CHANGELOG.md +10 -11
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/PKG-INFO +2 -2
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/README.md +1 -1
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/docs/user-guide.md +284 -3
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/docs/zh-CN/README.md +1 -1
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/pyproject.toml +1 -1
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/cli/__init__.py +37 -3
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/config.default.yaml +6 -1
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/logging/__init__.py +5 -1
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/quota_guard.py +39 -8
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/router.py +60 -0
- coding_proxy-0.2.3a1/src/coding/proxy/server/dashboard.py +779 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/server/request_normalizer.py +62 -24
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/server/routes.py +48 -2
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_app_routes.py +155 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_request_normalizer.py +81 -23
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/uv.lock +1 -1
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/.github/workflows/ci.yml +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/.github/workflows/coverage.yml +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/.github/workflows/release.yml +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/.gitignore +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/AGENTS.md +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/CLAUDE.md +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/LICENSE +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/docs/ci-cd.md +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/docs/framework.md +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/__main__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/auth/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/auth/providers/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/auth/providers/base.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/auth/providers/github.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/auth/providers/google.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/auth/runtime.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/auth/store.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/cli/auth_commands.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/cli/banner.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/compat/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/compat/canonical.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/compat/session_store.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/auth_schema.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/loader.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/resiliency.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/routing.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/schema.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/server.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/config/vendors.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/convert/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/convert/anthropic_to_gemini.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/convert/anthropic_to_openai.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/convert/gemini_sse_adapter.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/convert/gemini_to_anthropic.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/convert/openai_to_anthropic.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/logging/db.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/logging/formatters.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/logging/stats.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/model/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/model/auth.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/model/compat.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/model/constants.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/model/pricing.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/model/token.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/model/vendor.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/pricing.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/circuit_breaker.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/error_classifier.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/executor.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/model_mapper.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/rate_limit.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/retry.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/session_manager.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/tier.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/usage_parser.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/routing/usage_recorder.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/server/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/server/app.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/server/factory.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/server/responses.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/streaming/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/streaming/anthropic_compat.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/alibaba.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/anthropic.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/antigravity.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/base.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/copilot.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/copilot_models.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/copilot_token_manager.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/copilot_urls.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/doubao.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/kimi.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/minimax.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/mixins.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/native_anthropic.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/token_manager.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/xiaomi.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/src/coding/proxy/vendors/zhipu.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/__init__.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_antigravity.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_auto_login.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_banner.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_circuit_breaker.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_cli_usage.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_compat.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_config_init.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_config_loader.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_convert_request.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_convert_response.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_convert_sse.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_copilot.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_copilot_convert_request.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_copilot_convert_response.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_copilot_models.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_copilot_urls.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_currency.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_error_classifier.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_logging_dual_write.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_mixins.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_model_auth.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_model_compat.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_model_constants.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_model_mapper.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_model_pricing.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_model_token.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_model_vendor.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_native_vendors.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_parse_usage.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_pricing.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_quota_guard.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_rate_limit.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_router_chain.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_router_executor.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_runtime_reauth.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_schema.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_streaming_anthropic_compat.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_tier.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_tiers_config.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_time_range.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_token_logger.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_token_manager.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_types.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_vendor_streaming.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_vendors.py +0 -0
- {coding_proxy-0.2.1a3 → coding_proxy-0.2.3a1}/tests/test_zhipu.py +0 -0
|
@@ -4,22 +4,21 @@
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
## [v0.2.1](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.2.1a1) — 2026-04-11
|
|
7
|
+
## [v0.2.3](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.2.3a1) — 2026-04-15
|
|
9
8
|
|
|
10
|
-
-
|
|
11
|
-
-
|
|
9
|
+
- feat(dashboard): 新增实时 Web Dashboard 页面,聚合展示流量与用量统计;
|
|
10
|
+
- docs(user-guide): 补充 POST /v1/messages 完整 API 参考文档;
|
|
11
|
+
- fix(request-normalizer): misplaced tool_result 从剥离改为重定位,修复跨供应商降级后 Anthropic 恢复失败;
|
|
12
12
|
|
|
13
|
+
## [v0.2.2](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.2.2) — 2026-04-13
|
|
13
14
|
|
|
14
|
-
-
|
|
15
|
+
- feat(reset): CLI reset 命令新增 -v/--vendor 参数,支持运行时 N-tier 链路重排序(逗号分隔的 vendor 列表);
|
|
16
|
+
- fix(logging): 修复 uvicorn.error 日志在文件中重复打印的问题;
|
|
15
17
|
|
|
16
|
-
|
|
18
|
+
## [v0.2.1](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.2.1) — 2026-04-11
|
|
17
19
|
|
|
18
|
-
-
|
|
19
|
-
|
|
20
|
-
- **修复(v1internal 协议)**:新增 `project_id` 配置字段 + v1internal 请求信封包装 + 客户端指纹 Headers + 端点 URL 适配
|
|
21
|
-
- **修复(自动发现)**:利用已有的 `cloud-platform` OAuth scope 通过 Cloud Resource Manager API 自动发现用户的 GCP `project_id`,首次请求时零配置自动切换至 v1internal 模式——开箱即用,无需手动配置
|
|
22
|
-
- **附带改进**:`_acquire()` scope 校验保持 warning 降级;`_mark_scope_error_if_needed()` 增强诊断日志;`get_diagnostics()` 暴露发现状态
|
|
20
|
+
- feat(logging): 实现日志双写(控制台 + 本地文件),日志文件支持 5MB 自动轮转及 gzip 压缩备份;ModelCall 日志降级为 DEBUG 级别;
|
|
21
|
+
- feat(circuit-breaker): 补全熔断器状态转换日志的 vendor 上下文信息;
|
|
23
22
|
|
|
24
23
|
## [v0.2.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.2.0) — 2026-04-09
|
|
25
24
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coding-proxy
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3a1
|
|
4
4
|
Summary: A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao...
|
|
5
5
|
Project-URL: Source Code, https://github.com/ThreeFish-AI/coding-proxy
|
|
6
6
|
Project-URL: User Guide, https://github.com/ThreeFish-AI/coding-proxy/blob/master/docs/user-guide.md
|
|
@@ -56,7 +56,7 @@ When you're deeply immersed in your coding "zone" with **Claude Code** (or any A
|
|
|
56
56
|
|
|
57
57
|
## 🌟 Core Features
|
|
58
58
|
|
|
59
|
-
- **⛓️ N-tier Chained Failover**:
|
|
59
|
+
- **⛓️ N-tier Chained Failover**: Autonomous descending sequence, supporting Claude's official plans, as well as Coding Plans from GitHub Copilot, Z AI, MiniMax, Alibaba Qwen, Xiaomi, Kimi, Doubao, etc.
|
|
60
60
|
- **🛡️ Smart Resilience & Quota Guardians**: Every single vendor node comes fully armed with an independent **Circuit Breaker** and **Quota Guard** to proactively dodge avalanches without breaking a sweat.
|
|
61
61
|
- **👻 Phantom-like Transparency**: **100% transparent** to the client! No code tweaks required. Overwrite `ANTHROPIC_BASE_URL` with a single line, and you're good to go.
|
|
62
62
|
- **🔄 Universal Alchemy (Formats & Models)**: Native support for two-way request/streaming (SSE) translations between Anthropic ←→ Gemini. Plus, auto/DIY model name mapping (e.g., effortlessly morphing `claude-*` into `glm-*`).
|
|
@@ -29,7 +29,7 @@ When you're deeply immersed in your coding "zone" with **Claude Code** (or any A
|
|
|
29
29
|
|
|
30
30
|
## 🌟 Core Features
|
|
31
31
|
|
|
32
|
-
- **⛓️ N-tier Chained Failover**:
|
|
32
|
+
- **⛓️ N-tier Chained Failover**: Autonomous descending sequence, supporting Claude's official plans, as well as Coding Plans from GitHub Copilot, Z AI, MiniMax, Alibaba Qwen, Xiaomi, Kimi, Doubao, etc.
|
|
33
33
|
- **🛡️ Smart Resilience & Quota Guardians**: Every single vendor node comes fully armed with an independent **Circuit Breaker** and **Quota Guard** to proactively dodge avalanches without breaking a sweat.
|
|
34
34
|
- **👻 Phantom-like Transparency**: **100% transparent** to the client! No code tweaks required. Overwrite `ANTHROPIC_BASE_URL` with a single line, and you're good to go.
|
|
35
35
|
- **🔄 Universal Alchemy (Formats & Models)**: Native support for two-way request/streaming (SSE) translations between Anthropic ←→ Gemini. Plus, auto/DIY model name mapping (e.g., effortlessly morphing `claude-*` into `glm-*`).
|
|
@@ -908,17 +908,298 @@ curl -I http://127.0.0.1:8046/
|
|
|
908
908
|
|
|
909
909
|
### 5.2 POST /v1/messages
|
|
910
910
|
|
|
911
|
-
代理 Anthropic Messages API
|
|
911
|
+
代理 Anthropic Messages API,支持流式(SSE)与非流式请求。请求经过规范化处理后,路由至配置的 vendor tier 链;若当前 tier 不可用或返回可恢复错误,自动故障转移至下一 tier。
|
|
912
|
+
|
|
913
|
+
#### 5.2.1 请求格式
|
|
914
|
+
|
|
915
|
+
**请求头**
|
|
916
|
+
|
|
917
|
+
| 请求头 | 必填 | 说明 |
|
|
918
|
+
|--------|------|------|
|
|
919
|
+
| `Content-Type` | ✓ | 固定为 `application/json` |
|
|
920
|
+
| `Authorization` | ✗ | 格式 `Bearer <token>`;对 Anthropic vendor 透传,对其他 vendor 由代理内部凭证管理 |
|
|
921
|
+
| `anthropic-version` | ✗ | Anthropic API 版本,建议传 `2023-06-01`;透传至上游 |
|
|
922
|
+
| `anthropic-beta` | ✗ | Beta 功能标识,透传至上游(如 `interleaved-thinking-2025-05-14`) |
|
|
923
|
+
|
|
924
|
+
> **注**:`hop-by-hop` 头(如 `Connection`、`Transfer-Encoding`)会在转发前自动过滤。
|
|
925
|
+
|
|
926
|
+
**请求体参数**
|
|
927
|
+
|
|
928
|
+
| 字段 | 类型 | 必填 | 约束 | 说明 |
|
|
929
|
+
|------|------|------|------|------|
|
|
930
|
+
| `model` | string | ✓ | 非空 | 目标模型标识。经 `model_mapping` 规则映射后路由至实际 vendor 模型 |
|
|
931
|
+
| `messages` | array | ✓ | 至少 1 条;`user`/`assistant` 交替;末尾必须为 `user` | 对话历史,详见[消息结构](#消息结构) |
|
|
932
|
+
| `max_tokens` | integer | ✗ | > 0 | 最大输出 token 数 |
|
|
933
|
+
| `stream` | boolean | ✗ | 默认 `false` | 是否以 SSE 流式返回 |
|
|
934
|
+
| `temperature` | number | ✗ | `[0, 2]` | 采样温度 |
|
|
935
|
+
| `top_p` | number | ✗ | `(0, 1]` | Top-p 采样 |
|
|
936
|
+
| `top_k` | integer | ✗ | ≥ 1 | Top-k 采样 |
|
|
937
|
+
| `stop_sequences` | array[string] | ✗ | | 提前停止的字符串序列 |
|
|
938
|
+
| `system` | string \| array | ✗ | | 系统提示词;可为纯字符串或 content block 数组 |
|
|
939
|
+
| `tools` | array | ✗ | | 工具定义;详见 Anthropic 官方文档 |
|
|
940
|
+
| `tool_choice` | object | ✗ | | 工具选择策略(`auto`/`any`/`tool`) |
|
|
941
|
+
| `thinking` | object | ✗ | 需 `budget_tokens`;部分 vendor 不支持 | Extended Thinking 配置,格式 `{"type":"enabled","budget_tokens":N}` |
|
|
942
|
+
| `metadata` | object | ✗ | | 用户元数据(如 `user_id`),透传至上游 |
|
|
943
|
+
|
|
944
|
+
<a id="消息结构"></a>**消息结构**
|
|
945
|
+
|
|
946
|
+
每条消息的 `content` 字段可为纯字符串或 content block 数组:
|
|
947
|
+
|
|
948
|
+
```json
|
|
949
|
+
{
|
|
950
|
+
"role": "user",
|
|
951
|
+
"content": [
|
|
952
|
+
{ "type": "text", "text": "请描述这张图片" },
|
|
953
|
+
{
|
|
954
|
+
"type": "image",
|
|
955
|
+
"source": {
|
|
956
|
+
"type": "base64",
|
|
957
|
+
"media_type": "image/png",
|
|
958
|
+
"data": "<base64 编码的图片数据>"
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
]
|
|
962
|
+
}
|
|
963
|
+
```
|
|
964
|
+
|
|
965
|
+
支持的 content block 类型:
|
|
966
|
+
|
|
967
|
+
| 类型 | 适用角色 | 必填字段 | 说明 |
|
|
968
|
+
|------|---------|---------|------|
|
|
969
|
+
| `text` | `user`/`assistant` | `text` | 纯文本 |
|
|
970
|
+
| `image` | `user` | `source`(`type`/`media_type`/`data` 或 `url`) | 图片;部分 vendor 不支持 |
|
|
971
|
+
| `tool_use` | `assistant` | `id`(`toolu_` 前缀)、`name`、`input` | 模型发起工具调用 |
|
|
972
|
+
| `tool_result` | `user` | `tool_use_id`、`content` | 工具调用结果;**只能出现在 `user` 消息中** |
|
|
973
|
+
| `thinking` | `assistant` | `thinking`、`signature` | Extended Thinking 内容块;跨 vendor 时会被自动剥离 |
|
|
974
|
+
|
|
975
|
+
---
|
|
976
|
+
|
|
977
|
+
#### 5.2.2 非流式请求示例
|
|
978
|
+
|
|
979
|
+
```bash
|
|
980
|
+
curl -X POST http://127.0.0.1:8046/v1/messages \
|
|
981
|
+
-H "Content-Type: application/json" \
|
|
982
|
+
-H "Authorization: Bearer $ANTHROPIC_API_KEY" \
|
|
983
|
+
-H "anthropic-version: 2023-06-01" \
|
|
984
|
+
-d '{
|
|
985
|
+
"model": "claude-sonnet-4-5",
|
|
986
|
+
"max_tokens": 1024,
|
|
987
|
+
"messages": [
|
|
988
|
+
{"role": "user", "content": "你好,介绍一下你自己"}
|
|
989
|
+
]
|
|
990
|
+
}'
|
|
991
|
+
```
|
|
992
|
+
|
|
993
|
+
**成功响应(HTTP 200)**
|
|
994
|
+
|
|
995
|
+
```json
|
|
996
|
+
{
|
|
997
|
+
"id": "msg_01XFDUDYJgAACzvnptvVoYEL",
|
|
998
|
+
"type": "message",
|
|
999
|
+
"role": "assistant",
|
|
1000
|
+
"content": [
|
|
1001
|
+
{ "type": "text", "text": "你好!我是 Claude,一个由 Anthropic 开发的 AI 助手。" }
|
|
1002
|
+
],
|
|
1003
|
+
"model": "claude-sonnet-4-5-20251101",
|
|
1004
|
+
"stop_reason": "end_turn",
|
|
1005
|
+
"stop_sequence": null,
|
|
1006
|
+
"usage": {
|
|
1007
|
+
"input_tokens": 14,
|
|
1008
|
+
"output_tokens": 32,
|
|
1009
|
+
"cache_creation_input_tokens": 0,
|
|
1010
|
+
"cache_read_input_tokens": 0
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
```
|
|
1014
|
+
|
|
1015
|
+
**响应字段说明**
|
|
1016
|
+
|
|
1017
|
+
| 字段 | 类型 | 说明 |
|
|
1018
|
+
|------|------|------|
|
|
1019
|
+
| `id` | string | 消息唯一 ID,格式 `msg_*` |
|
|
1020
|
+
| `type` | string | 固定为 `"message"` |
|
|
1021
|
+
| `role` | string | 固定为 `"assistant"` |
|
|
1022
|
+
| `content` | array | 响应内容块列表(`text`/`tool_use` 等) |
|
|
1023
|
+
| `model` | string | 实际处理请求的模型完整 ID |
|
|
1024
|
+
| `stop_reason` | string | 停止原因,见下表 |
|
|
1025
|
+
| `stop_sequence` | string \| null | 触发停止的序列字符串;未命中时为 `null` |
|
|
1026
|
+
| `usage.input_tokens` | integer | 输入消耗的 token 数 |
|
|
1027
|
+
| `usage.output_tokens` | integer | 输出消耗的 token 数 |
|
|
1028
|
+
| `usage.cache_creation_input_tokens` | integer | 创建缓存的 token 数(Prompt Cache) |
|
|
1029
|
+
| `usage.cache_read_input_tokens` | integer | 从缓存命中的 token 数(Prompt Cache) |
|
|
1030
|
+
|
|
1031
|
+
**`stop_reason` 枚举**
|
|
1032
|
+
|
|
1033
|
+
| 值 | 含义 |
|
|
1034
|
+
|----|------|
|
|
1035
|
+
| `end_turn` | 模型自然输出完毕 |
|
|
1036
|
+
| `tool_use` | 模型发起工具调用,等待结果 |
|
|
1037
|
+
| `stop_sequence` | 触发了请求中指定的停止序列 |
|
|
1038
|
+
| `max_tokens` | 达到 `max_tokens` 上限 |
|
|
1039
|
+
|
|
1040
|
+
---
|
|
1041
|
+
|
|
1042
|
+
#### 5.2.3 流式请求示例(SSE 模式)
|
|
1043
|
+
|
|
1044
|
+
在请求体中设置 `"stream": true`,响应将以 `text/event-stream` 格式逐块下发:
|
|
912
1045
|
|
|
913
1046
|
```bash
|
|
914
1047
|
curl -X POST http://127.0.0.1:8046/v1/messages \
|
|
915
1048
|
-H "Content-Type: application/json" \
|
|
916
1049
|
-H "Authorization: Bearer $ANTHROPIC_API_KEY" \
|
|
917
1050
|
-H "anthropic-version: 2023-06-01" \
|
|
918
|
-
-
|
|
1051
|
+
--no-buffer \
|
|
1052
|
+
-d '{
|
|
1053
|
+
"model": "claude-sonnet-4-5",
|
|
1054
|
+
"max_tokens": 1024,
|
|
1055
|
+
"stream": true,
|
|
1056
|
+
"messages": [
|
|
1057
|
+
{"role": "user", "content": "用一句话介绍你自己"}
|
|
1058
|
+
]
|
|
1059
|
+
}'
|
|
1060
|
+
```
|
|
1061
|
+
|
|
1062
|
+
**SSE 事件流示例**
|
|
1063
|
+
|
|
919
1064
|
```
|
|
1065
|
+
event: message_start
|
|
1066
|
+
data: {"type":"message_start","message":{"id":"msg_01abc","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-5-20251101","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":14,"output_tokens":1,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}
|
|
1067
|
+
|
|
1068
|
+
event: content_block_start
|
|
1069
|
+
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
|
|
1070
|
+
|
|
1071
|
+
event: ping
|
|
1072
|
+
data: {"type":"ping"}
|
|
1073
|
+
|
|
1074
|
+
event: content_block_delta
|
|
1075
|
+
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"我是"}}
|
|
1076
|
+
|
|
1077
|
+
event: content_block_delta
|
|
1078
|
+
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Claude"}}
|
|
1079
|
+
|
|
1080
|
+
event: content_block_stop
|
|
1081
|
+
data: {"type":"content_block_stop","index":0}
|
|
1082
|
+
|
|
1083
|
+
event: message_delta
|
|
1084
|
+
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":8}}
|
|
1085
|
+
|
|
1086
|
+
event: message_stop
|
|
1087
|
+
data: {"type":"message_stop"}
|
|
1088
|
+
```
|
|
1089
|
+
|
|
1090
|
+
**SSE 事件类型说明**
|
|
1091
|
+
|
|
1092
|
+
| 事件类型 | 说明 |
|
|
1093
|
+
|---------|------|
|
|
1094
|
+
| `message_start` | 消息开始,包含初始元数据 |
|
|
1095
|
+
| `content_block_start` | 新的 content block 开始(每个 block 有独立 `index`) |
|
|
1096
|
+
| `content_block_delta` | content block 增量;`delta.type` 为 `text_delta` 或 `input_json_delta`(工具调用参数) |
|
|
1097
|
+
| `content_block_stop` | 当前 content block 结束 |
|
|
1098
|
+
| `message_delta` | 消息级别的增量更新,包含最终 `stop_reason` 和累计 `usage` |
|
|
1099
|
+
| `message_stop` | 消息结束,流关闭 |
|
|
1100
|
+
| `ping` | 心跳事件,客户端可忽略 |
|
|
1101
|
+
| `error` | 流式处理过程中发生错误(见[错误响应](#错误响应)) |
|
|
1102
|
+
|
|
1103
|
+
> **注**:流式模式下,一旦 SSE 流开始发送,代理不再进行 tier 级别的故障转移。若中途出现错误,会以 `event: error` 事件通知客户端,随后关闭流。
|
|
1104
|
+
|
|
1105
|
+
---
|
|
1106
|
+
|
|
1107
|
+
#### 5.2.4 工具调用示例
|
|
1108
|
+
|
|
1109
|
+
```bash
|
|
1110
|
+
curl -X POST http://127.0.0.1:8046/v1/messages \
|
|
1111
|
+
-H "Content-Type: application/json" \
|
|
1112
|
+
-H "anthropic-version: 2023-06-01" \
|
|
1113
|
+
-d '{
|
|
1114
|
+
"model": "claude-sonnet-4-5",
|
|
1115
|
+
"max_tokens": 1024,
|
|
1116
|
+
"tools": [
|
|
1117
|
+
{
|
|
1118
|
+
"name": "get_weather",
|
|
1119
|
+
"description": "获取指定城市的当前天气",
|
|
1120
|
+
"input_schema": {
|
|
1121
|
+
"type": "object",
|
|
1122
|
+
"properties": {
|
|
1123
|
+
"city": {"type": "string", "description": "城市名称"}
|
|
1124
|
+
},
|
|
1125
|
+
"required": ["city"]
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
],
|
|
1129
|
+
"messages": [
|
|
1130
|
+
{"role": "user", "content": "北京今天天气怎么样?"}
|
|
1131
|
+
]
|
|
1132
|
+
}'
|
|
1133
|
+
```
|
|
1134
|
+
|
|
1135
|
+
---
|
|
1136
|
+
|
|
1137
|
+
<a id="错误响应"></a>
|
|
1138
|
+
#### 5.2.5 错误响应
|
|
1139
|
+
|
|
1140
|
+
**错误响应结构**
|
|
1141
|
+
|
|
1142
|
+
```json
|
|
1143
|
+
{
|
|
1144
|
+
"error": {
|
|
1145
|
+
"type": "invalid_request_error",
|
|
1146
|
+
"message": "详细错误描述",
|
|
1147
|
+
"details": ["原因1", "原因2"]
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
```
|
|
1151
|
+
|
|
1152
|
+
> `details` 字段为可选,仅在 `NoCompatibleVendorError`(无可用 vendor)等特定场景中包含。
|
|
1153
|
+
|
|
1154
|
+
**HTTP 状态码对照**
|
|
1155
|
+
|
|
1156
|
+
| HTTP 状态码 | `error.type` | 触发场景 | 是否可重试 |
|
|
1157
|
+
|------------|-------------|---------|-----------|
|
|
1158
|
+
| `400` | `invalid_request_error` | 请求格式/内容不合规(消息结构错误、缺少必填字段、无兼容 vendor 等) | ✗ |
|
|
1159
|
+
| `401` | `authentication_error` | 无有效认证凭证 | ✗ |
|
|
1160
|
+
| `403` | `permission_error` | 权限不足 | ✗ |
|
|
1161
|
+
| `429` | `rate_limit_error` | 所有 vendor 均触发速率限制 | ✓(等待后重试) |
|
|
1162
|
+
| `500` | `api_error` | 代理内部异常 | ✓(视情况) |
|
|
1163
|
+
| `502` | `api_error` | 所有 vendor 均不可达(超时/连接失败) | ✓ |
|
|
1164
|
+
| `503` | `authentication_error` | Token 获取失败(如 OAuth 凭证失效) | ✓(重新认证后) |
|
|
1165
|
+
|
|
1166
|
+
**流式错误事件**
|
|
1167
|
+
|
|
1168
|
+
流式响应中途发生错误时,以 SSE 事件形式通知:
|
|
1169
|
+
|
|
1170
|
+
```
|
|
1171
|
+
event: error
|
|
1172
|
+
data: {"type":"error","error":{"type":"api_error","message":"上游连接超时"}}
|
|
1173
|
+
```
|
|
1174
|
+
|
|
1175
|
+
---
|
|
1176
|
+
|
|
1177
|
+
#### 5.2.6 请求规范化行为
|
|
1178
|
+
|
|
1179
|
+
代理在将请求转发至 vendor 前,会自动进行规范化处理。**以下行为对调用方透明,无需手动处理**:
|
|
1180
|
+
|
|
1181
|
+
**自动修复(静默处理)**
|
|
1182
|
+
|
|
1183
|
+
| 问题 | 处理方式 |
|
|
1184
|
+
|------|---------|
|
|
1185
|
+
| `tool_use_id` 格式不符(非 `toolu_` 前缀,如 `srvtoolu_*`) | 自动重写为合规格式,并维护映射关系 |
|
|
1186
|
+
| `tool_result` 出现在 `assistant` 消息中 | 将该 block 从 assistant 消息中剥离(首次触发时记录 WARNING 日志) |
|
|
1187
|
+
| `tool_use` 缺少合法 ID | 自动生成新 ID 并建立映射 |
|
|
1188
|
+
|
|
1189
|
+
**致命验证错误(返回 HTTP 400)**
|
|
1190
|
+
|
|
1191
|
+
| 场景 | 错误示例 |
|
|
1192
|
+
|------|---------|
|
|
1193
|
+
| `tool_use` block 缺少 `id` 字段 | `"tool_use block is missing 'id' field"` |
|
|
1194
|
+
| `tool_result` block 缺少 `tool_use_id` 字段 | `"tool_result block is missing 'tool_use_id' field"` |
|
|
1195
|
+
| 消息角色不交替(连续相同角色) | `"messages must alternate between user and assistant"` |
|
|
1196
|
+
| `messages` 末尾不是 `user` 消息 | `"last message must be from user"` |
|
|
1197
|
+
|
|
1198
|
+
**Thinking Block 跨 Vendor 处理**
|
|
1199
|
+
|
|
1200
|
+
当请求被路由至非 Anthropic vendor(如 Copilot、智谱等)时,assistant 历史消息中的 `thinking` block 会被自动剥离,因为这些 block 包含仅 Anthropic 可验证的签名(`signature` 字段)。此行为不影响当前轮次的 `thinking` 功能配置(由目标 vendor 的能力决定)。
|
|
920
1201
|
|
|
921
|
-
> **注**:示例中使用 `claude-sonnet-4
|
|
1202
|
+
> **注**:示例中使用 `claude-sonnet-4-5` 作为模型 ID 示例。实际可用的模型 ID 取决于配置的 vendor 与 `model_mapping` 规则,以 [Anthropic 官方文档](https://docs.anthropic.com/en/docs/about-claude/models) 和本地配置为准。
|
|
922
1203
|
|
|
923
1204
|
### 5.3 POST /v1/messages/count_tokens
|
|
924
1205
|
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
|
|
30
30
|
## 🌟 核心特性 (Core Features)
|
|
31
31
|
|
|
32
|
-
- **⛓️ N-tier 链式故障转移 (Failover)
|
|
32
|
+
- **⛓️ N-tier 链式故障转移 (Failover)**:自主降序序列,支持 Claude 官方 Plans,以及 GitHub Copilot、智谱、MiniMax、阿里千问、小米、Kimi、豆包等的 Coding Plan。
|
|
33
33
|
- **🛡️ 智能弹性与容灾守卫**:每个供应商节点独立配备 **熔断器 (Circuit Breaker)** 与 **配额守卫 (Quota Guard)**,防雪崩、主动避险。
|
|
34
34
|
- **👻 透明无感代理机制**:对客户端 **100% 透明**!无需修改任何代码,仅需一行配置覆盖 `ANTHROPIC_BASE_URL` 即可接入。
|
|
35
35
|
- **🔄 跨模型与全格式转换**:原生支持 Anthropic ←→ Gemini 的请求与流式响应(SSE)双向转换,并支持自动/自助映射模型名称(如 `claude-*` 至 `glm-*`)。
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "coding-proxy"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.3a1"
|
|
4
4
|
description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -216,16 +216,50 @@ async def _run_usage(
|
|
|
216
216
|
@app.command()
|
|
217
217
|
def reset(
|
|
218
218
|
port: int = typer.Option(8046, "--port", "-p", help="代理服务端口"),
|
|
219
|
+
vendor: str | None = typer.Option(
|
|
220
|
+
None,
|
|
221
|
+
"--vendor",
|
|
222
|
+
"-v",
|
|
223
|
+
help="提升/重排序 vendor 优先级(单个或逗号分隔多个)",
|
|
224
|
+
),
|
|
219
225
|
) -> None:
|
|
220
|
-
"""
|
|
226
|
+
"""重置所有层级的熔断器和配额守卫.
|
|
227
|
+
|
|
228
|
+
可通过 -v 指定运行时 N-tier 链路重排序:
|
|
229
|
+
|
|
230
|
+
\b
|
|
231
|
+
-v zhipu 提升 zhipu 到最高优先级
|
|
232
|
+
-v zhipu,anthropic 替换整个 N-tier 链路顺序
|
|
233
|
+
"""
|
|
221
234
|
import httpx
|
|
222
235
|
|
|
236
|
+
# 构建请求 body
|
|
237
|
+
json_body: dict | None = None
|
|
238
|
+
if vendor:
|
|
239
|
+
parts = [v.strip() for v in vendor.split(",") if v.strip()]
|
|
240
|
+
if parts:
|
|
241
|
+
json_body = {"vendors": parts}
|
|
242
|
+
|
|
223
243
|
try:
|
|
224
|
-
resp = httpx.post(
|
|
244
|
+
resp = httpx.post(
|
|
245
|
+
f"http://127.0.0.1:{port}/api/reset",
|
|
246
|
+
json=json_body,
|
|
247
|
+
timeout=5,
|
|
248
|
+
)
|
|
225
249
|
if resp.status_code == 200:
|
|
250
|
+
data = resp.json()
|
|
226
251
|
console.print("[green]所有层级的熔断器和配额守卫已重置[/green]")
|
|
252
|
+
tier_order = data.get("tier_order")
|
|
253
|
+
if tier_order:
|
|
254
|
+
order_str = " → ".join(tier_order)
|
|
255
|
+
console.print(f"[cyan]当前链路顺序:[/] {order_str}")
|
|
227
256
|
else:
|
|
228
|
-
|
|
257
|
+
try:
|
|
258
|
+
err = resp.json()
|
|
259
|
+
msg = err.get("error", {}).get("message", resp.text)
|
|
260
|
+
except Exception:
|
|
261
|
+
msg = resp.text
|
|
262
|
+
console.print(f"[red]重置失败: {msg}[/red]")
|
|
229
263
|
except httpx.ConnectError:
|
|
230
264
|
console.print("[red]代理服务未运行[/red]")
|
|
231
265
|
|
|
@@ -111,7 +111,7 @@ vendors:
|
|
|
111
111
|
# 不配置 circuit_breaker → 自动成为终端层,不触发向下故障转移
|
|
112
112
|
circuit_breaker:
|
|
113
113
|
failure_threshold: 3
|
|
114
|
-
recovery_timeout_seconds:
|
|
114
|
+
recovery_timeout_seconds: 30
|
|
115
115
|
success_threshold: 2
|
|
116
116
|
quota_guard:
|
|
117
117
|
enabled: true # 启用后按 Premium Requests 配额管理
|
|
@@ -421,6 +421,11 @@ pricing:
|
|
|
421
421
|
input_cost_per_mtok: ¥0.80
|
|
422
422
|
output_cost_per_mtok: ¥2.00
|
|
423
423
|
cache_read_cost_per_mtok: ¥0.16
|
|
424
|
+
- vendor: zhipu
|
|
425
|
+
model: glm-4.7 # 待区分长短上下文定价
|
|
426
|
+
input_cost_per_mtok: ¥2.00
|
|
427
|
+
output_cost_per_mtok: ¥8.00
|
|
428
|
+
cache_read_cost_per_mtok: ¥0.40
|
|
424
429
|
- vendor: zhipu
|
|
425
430
|
model: glm-5v-turbo # 待区分长短上下文定价
|
|
426
431
|
input_cost_per_mtok: ¥5.00
|
|
@@ -118,7 +118,11 @@ def build_log_config(
|
|
|
118
118
|
},
|
|
119
119
|
"loggers": {
|
|
120
120
|
"uvicorn": {"handlers": ["default"], "level": level, "propagate": False},
|
|
121
|
-
"uvicorn.error": {
|
|
121
|
+
"uvicorn.error": {
|
|
122
|
+
"handlers": ["default"],
|
|
123
|
+
"level": level,
|
|
124
|
+
"propagate": False,
|
|
125
|
+
},
|
|
122
126
|
"uvicorn.access": {
|
|
123
127
|
"handlers": ["access"],
|
|
124
128
|
"level": "INFO",
|
|
@@ -56,6 +56,18 @@ class QuotaGuard:
|
|
|
56
56
|
"""滑动窗口小时数(供基线加载使用)."""
|
|
57
57
|
return self._window / 3600
|
|
58
58
|
|
|
59
|
+
@property
|
|
60
|
+
def _window_label(self) -> str:
|
|
61
|
+
"""人类可读的窗口周期短标签."""
|
|
62
|
+
w = self._window
|
|
63
|
+
if w >= 86400 and w % 86400 == 0:
|
|
64
|
+
return f"{w // 86400}d"
|
|
65
|
+
if w >= 3600 and w % 3600 == 0:
|
|
66
|
+
return f"{w // 3600}h"
|
|
67
|
+
if w >= 60 and w % 60 == 0:
|
|
68
|
+
return f"{w // 60}m"
|
|
69
|
+
return f"{w}s"
|
|
70
|
+
|
|
59
71
|
def can_use_primary(self) -> bool:
|
|
60
72
|
"""判断是否可以使用主后端."""
|
|
61
73
|
if not self._enabled:
|
|
@@ -68,7 +80,8 @@ class QuotaGuard:
|
|
|
68
80
|
):
|
|
69
81
|
self._transition_to(QuotaState.QUOTA_EXCEEDED)
|
|
70
82
|
logger.warning(
|
|
71
|
-
"Quota guard: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
|
|
83
|
+
"Quota guard [%s]: WITHIN_QUOTA → EXCEEDED (%.1f%%)",
|
|
84
|
+
self._window_label,
|
|
72
85
|
self._total / self._budget * 100,
|
|
73
86
|
)
|
|
74
87
|
return False
|
|
@@ -80,12 +93,18 @@ class QuotaGuard:
|
|
|
80
93
|
and self._total < int(self._budget * self._threshold)
|
|
81
94
|
):
|
|
82
95
|
self._transition_to(QuotaState.WITHIN_QUOTA)
|
|
83
|
-
logger.info(
|
|
96
|
+
logger.info(
|
|
97
|
+
"Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (usage dropped)",
|
|
98
|
+
self._window_label,
|
|
99
|
+
)
|
|
84
100
|
return True
|
|
85
101
|
now = time.monotonic()
|
|
86
102
|
if now - self._last_probe >= self._effective_probe_interval:
|
|
87
103
|
self._last_probe = now
|
|
88
|
-
logger.info(
|
|
104
|
+
logger.info(
|
|
105
|
+
"Quota guard [%s]: allowing probe request",
|
|
106
|
+
self._window_label,
|
|
107
|
+
)
|
|
89
108
|
return True
|
|
90
109
|
return False
|
|
91
110
|
|
|
@@ -104,7 +123,10 @@ class QuotaGuard:
|
|
|
104
123
|
with self._lock:
|
|
105
124
|
if self._state == QuotaState.QUOTA_EXCEEDED:
|
|
106
125
|
self._transition_to(QuotaState.WITHIN_QUOTA)
|
|
107
|
-
logger.info(
|
|
126
|
+
logger.info(
|
|
127
|
+
"Quota guard [%s]: EXCEEDED → WITHIN_QUOTA (probe success)",
|
|
128
|
+
self._window_label,
|
|
129
|
+
)
|
|
108
130
|
|
|
109
131
|
def notify_cap_error(self, retry_after_seconds: float | None = None) -> None:
|
|
110
132
|
"""外部通知检测到用量上限错误.
|
|
@@ -125,7 +147,8 @@ class QuotaGuard:
|
|
|
125
147
|
)
|
|
126
148
|
self._cap_error_active = True
|
|
127
149
|
logger.warning(
|
|
128
|
-
"Quota guard: cap error detected → EXCEEDED (effective_probe=%ds)",
|
|
150
|
+
"Quota guard [%s]: cap error detected → EXCEEDED (effective_probe=%ds)",
|
|
151
|
+
self._window_label,
|
|
129
152
|
int(self._effective_probe_interval),
|
|
130
153
|
)
|
|
131
154
|
|
|
@@ -139,12 +162,17 @@ class QuotaGuard:
|
|
|
139
162
|
self._total += total_tokens
|
|
140
163
|
if vendor:
|
|
141
164
|
logger.info(
|
|
142
|
-
"Quota guard [%s]: loaded baseline %d tokens",
|
|
165
|
+
"Quota guard [%s/%s]: loaded baseline %d tokens",
|
|
143
166
|
vendor,
|
|
167
|
+
self._window_label,
|
|
144
168
|
total_tokens,
|
|
145
169
|
)
|
|
146
170
|
else:
|
|
147
|
-
logger.info(
|
|
171
|
+
logger.info(
|
|
172
|
+
"Quota guard [%s]: loaded baseline %d tokens",
|
|
173
|
+
self._window_label,
|
|
174
|
+
total_tokens,
|
|
175
|
+
)
|
|
148
176
|
|
|
149
177
|
def reset(self) -> None:
|
|
150
178
|
"""手动重置为 WITHIN_QUOTA 状态."""
|
|
@@ -152,7 +180,10 @@ class QuotaGuard:
|
|
|
152
180
|
self._transition_to(QuotaState.WITHIN_QUOTA)
|
|
153
181
|
self._entries.clear()
|
|
154
182
|
self._total = 0
|
|
155
|
-
logger.info(
|
|
183
|
+
logger.info(
|
|
184
|
+
"Quota guard [%s]: manually reset to WITHIN_QUOTA",
|
|
185
|
+
self._window_label,
|
|
186
|
+
)
|
|
156
187
|
|
|
157
188
|
def get_info(self) -> dict:
|
|
158
189
|
"""获取配额守卫状态信息."""
|
|
@@ -68,6 +68,66 @@ class RequestRouter:
|
|
|
68
68
|
"""当前活跃供应商名称(由 Executor 在成功响应时写入)."""
|
|
69
69
|
return self._active_vendor_name
|
|
70
70
|
|
|
71
|
+
# ── 运行时 N-tier 链路重排序 ─────────────────────────────
|
|
72
|
+
|
|
73
|
+
def get_vendor_names(self) -> list[str]:
|
|
74
|
+
"""返回当前 tiers 的供应商名称列表(按优先级顺序)."""
|
|
75
|
+
return [t.name for t in self._tiers]
|
|
76
|
+
|
|
77
|
+
def reorder_tiers(self, vendor_names: list[str]) -> None:
|
|
78
|
+
"""原地重排序 N-tier 链路.
|
|
79
|
+
|
|
80
|
+
使用切片赋值保持列表引用同一性,使 ``_RouteExecutor`` 立即可见。
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
vendor_names: 新的供应商名称顺序(必须包含所有当前 tier)。
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
ValueError: 名称不存在、有重复、或未覆盖所有 tier。
|
|
87
|
+
"""
|
|
88
|
+
name_to_tier = {t.name: t for t in self._tiers}
|
|
89
|
+
current_names = set(name_to_tier)
|
|
90
|
+
|
|
91
|
+
# 校验:重复
|
|
92
|
+
if len(vendor_names) != len(set(vendor_names)):
|
|
93
|
+
seen: set[str] = set()
|
|
94
|
+
dups = [n for n in vendor_names if n in seen or seen.add(n)] # type: ignore[func-returns-value]
|
|
95
|
+
raise ValueError(f"vendor 名称重复: {', '.join(dups)}")
|
|
96
|
+
|
|
97
|
+
# 校验:名称存在性
|
|
98
|
+
unknown = [n for n in vendor_names if n not in current_names]
|
|
99
|
+
if unknown:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"未知 vendor: {', '.join(unknown)}; "
|
|
102
|
+
f"可用: {', '.join(sorted(current_names))}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# 校验:全量覆盖
|
|
106
|
+
provided = set(vendor_names)
|
|
107
|
+
if provided != current_names:
|
|
108
|
+
missing = current_names - provided
|
|
109
|
+
raise ValueError(f"缺少 vendor: {', '.join(sorted(missing))}")
|
|
110
|
+
|
|
111
|
+
self._tiers[:] = [name_to_tier[n] for n in vendor_names]
|
|
112
|
+
|
|
113
|
+
def promote_vendor(self, vendor_name: str) -> None:
|
|
114
|
+
"""将指定 vendor 提升至最高优先级,其余保持相对顺序.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
vendor_name: 要提升的供应商名称。
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
ValueError: 名称不存在。
|
|
121
|
+
"""
|
|
122
|
+
current_names = self.get_vendor_names()
|
|
123
|
+
if vendor_name not in current_names:
|
|
124
|
+
available = sorted(t.name for t in self._tiers)
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"未知 vendor: {vendor_name}; 可用: {', '.join(available)}"
|
|
127
|
+
)
|
|
128
|
+
new_order = [vendor_name] + [n for n in current_names if n != vendor_name]
|
|
129
|
+
self.reorder_tiers(new_order)
|
|
130
|
+
|
|
71
131
|
# ── 公开路由接口(委托给 _RouteExecutor)───────────────
|
|
72
132
|
|
|
73
133
|
async def route_stream(
|