coding-proxy 0.4.1a9__tar.gz → 0.4.1a10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/CHANGELOG.md +2 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/PKG-INFO +1 -1
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/arch/config-reference.md +34 -6
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/pyproject.toml +1 -1
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/config.default.yaml +8 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/routing.py +10 -3
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/schema.py +2 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/vendors.py +17 -1
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/server/factory.py +10 -6
- coding_proxy-0.4.1a10/src/coding/proxy/vendors/concurrency.py +78 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/zhipu.py +86 -33
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_schema.py +2 -1
- coding_proxy-0.4.1a10/tests/test_zhipu_concurrency.py +557 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/uv.lock +1 -1
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/.github/workflows/ci.yml +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/.github/workflows/coverage.yml +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/.github/workflows/release.yml +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/.gitignore +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/.pre-commit-config.yaml +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/AGENTS.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/CLAUDE.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/LICENSE +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/README.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/assets/dashboard-v0.4.0.png +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/assets/session-v0.4.0.png +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/agents/browser-validation.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/agents/issue.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/agents/knowledge-map.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/agents/reference-specifications.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/arch/convert.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/arch/design-patterns.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/arch/routing.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/arch/testing.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/arch/vendors.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/framework.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/guide/api-reference.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/guide/cli-reference.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/guide/dashboard.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/guide/monitoring.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/guide/quickstart.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/guide/vendors.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/ops/ci-cd.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/user-guide.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/docs/zh-CN/README.md +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/__main__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/base.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/github.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/google.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/runtime.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/store.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/cli/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/cli/auth_commands.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/cli/banner.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/compat/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/compat/canonical.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/compat/session_store.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/auth_schema.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/loader.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/resiliency.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/server.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/config/session_policy.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/anthropic_to_gemini.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/anthropic_to_openai.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/gemini_sse_adapter.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/gemini_to_anthropic.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/openai_to_anthropic.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/vendor_channels.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/db.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/formatters.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/stats.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/model/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/model/auth.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/model/compat.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/model/constants.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/model/pricing.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/model/token.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/model/vendor.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/config.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/anthropic.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/gemini.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/openai.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/handler.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/operation.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/routes.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/usage_registry.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/pricing.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/circuit_breaker.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/error_classifier.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/executor.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/model_mapper.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/quota_guard.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/rate_limit.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/retry.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/router.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/session_manager.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/session_policy.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/tier.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/usage_parser.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/usage_recorder.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/server/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/server/app.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/server/dashboard.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/server/responses.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/server/routes.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/streaming/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/streaming/anthropic_compat.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/alibaba.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/anthropic.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/antigravity.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/base.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot_models.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot_token_manager.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot_urls.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/doubao.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/kimi.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/minimax.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/mixins.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/native_anthropic.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/token_manager.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/xiaomi.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/e2e/__init__.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/e2e/conftest.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/e2e/test_e2e_http.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/e2e/test_e2e_token.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/e2e/test_e2e_vendor.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_antigravity.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_app_routes.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_auto_login.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_banner.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_circuit_breaker.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_cli_usage.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_compat.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_config_init.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_config_loader.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_convert_request.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_convert_response.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_convert_sse.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_copilot.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_copilot_convert_request.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_copilot_convert_response.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_copilot_models.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_copilot_urls.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_currency.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_error_classifier.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_logging_dual_write.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_mixins.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_model_auth.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_model_compat.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_model_constants.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_model_mapper.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_model_pricing.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_model_token.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_model_vendor.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_native_api_base_url_override.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_native_api_extractors.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_native_api_handler.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_native_api_operation.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_native_api_routes.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_native_vendors.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_parse_usage.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_parse_usage_gemini.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_pricing.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_quota_guard.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_rate_limit.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_router_chain.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_router_executor.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_runtime_reauth.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_session_aware.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_streaming_anthropic_compat.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_tier.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_tiers_config.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_time_range.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_token_logger.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_token_logger_native_columns.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_token_manager.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_types.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_vendor_channels.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_vendor_streaming.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_vendors.py +0 -0
- {coding_proxy-0.4.1a9 → coding_proxy-0.4.1a10}/tests/test_zhipu.py +0 -0
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
- feat(zhipu): 新增每模型并发限制(默认 3,可通过 `vendors[zhipu].concurrency` 配置),基于 `asyncio.Semaphore` 实现 FIFO 公平排队,流式与非流式共用同一槽位,与 429 重试机制兼容。
|
|
8
|
+
|
|
7
9
|
## [v0.4.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.4.0) — 2026-05-01
|
|
8
10
|
|
|
9
11
|
> [!IMPORTANT]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coding-proxy
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1a10
|
|
4
4
|
Summary: A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao...
|
|
5
5
|
Project-URL: Source Code, https://github.com/ThreeFish-AI/coding-proxy
|
|
6
6
|
Project-URL: User Guide, https://github.com/ThreeFish-AI/coding-proxy/blob/master/docs/user-guide.md
|
|
@@ -89,12 +89,13 @@ flowchart TD
|
|
|
89
89
|
|
|
90
90
|
## 5. VendorConfig 弹性字段
|
|
91
91
|
|
|
92
|
-
| 字段 | 类型 | 默认值 | 说明
|
|
93
|
-
| -------------------- | -------------- | -------------------- |
|
|
94
|
-
| `circuit_breaker` | config \| None | `None` | 熔断器配置(None = 终端层)
|
|
95
|
-
| `retry` | config | `RetryConfig()` | 重试策略配置
|
|
96
|
-
| `quota_guard` | config | `QuotaGuardConfig()` | 日度配额守卫配置
|
|
97
|
-
| `weekly_quota_guard` | config | `QuotaGuardConfig()` | 周度配额守卫配置
|
|
92
|
+
| 字段 | 类型 | 默认值 | 说明 |
|
|
93
|
+
| -------------------- | -------------- | -------------------- | ----------------------------------- |
|
|
94
|
+
| `circuit_breaker` | config \| None | `None` | 熔断器配置(None = 终端层) |
|
|
95
|
+
| `retry` | config | `RetryConfig()` | 重试策略配置 |
|
|
96
|
+
| `quota_guard` | config | `QuotaGuardConfig()` | 日度配额守卫配置 |
|
|
97
|
+
| `weekly_quota_guard` | config | `QuotaGuardConfig()` | 周度配额守卫配置 |
|
|
98
|
+
| `concurrency` | config \| None | `None` | `[zhipu]` 每模型并发限制(详见 5.5) |
|
|
98
99
|
|
|
99
100
|
<a id="elastic-params"></a>
|
|
100
101
|
|
|
@@ -143,6 +144,33 @@ flowchart TD
|
|
|
143
144
|
| `error_types` | list[str] | `["rate_limit_error", "overloaded_error", "api_error"]` |
|
|
144
145
|
| `error_message_patterns` | list[str] | `["quota", "limit exceeded", "usage cap", "capacity", "internal network failure"]` |
|
|
145
146
|
|
|
147
|
+
### 5.5 ZhipuConcurrencyConfig — Zhipu 每模型并发参数
|
|
148
|
+
|
|
149
|
+
仅对 `vendor: zhipu` 生效,基于 `asyncio.Semaphore` 实现 FIFO 公平排队。
|
|
150
|
+
|
|
151
|
+
| 字段 | 类型 | 默认值 | 说明 |
|
|
152
|
+
| --------- | -------------- | ------ | -------------------------------------------------------------------------------- |
|
|
153
|
+
| `default` | int | `3` | 全局默认并行度(适用于所有未在 `models` 中显式覆盖的模型);取值范围 `[1, 20]` |
|
|
154
|
+
| `models` | map[str → int] | `{}` | 按映射后模型名(如 `glm-5v-turbo` / `glm-5.1` / `glm-4.5-air`)自定义并行度上限 |
|
|
155
|
+
|
|
156
|
+
YAML 示例:
|
|
157
|
+
|
|
158
|
+
```yaml
|
|
159
|
+
- vendor: zhipu
|
|
160
|
+
concurrency:
|
|
161
|
+
default: 3
|
|
162
|
+
models:
|
|
163
|
+
glm-5v-turbo: 5
|
|
164
|
+
glm-5.1: 2
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
行为语义:
|
|
168
|
+
|
|
169
|
+
- 信号量按**映射后模型名**键控,与上游真实承载模型对齐;流式与非流式请求共用同一槽位。
|
|
170
|
+
- 槽位满时新请求按 FIFO 顺序排队,直到任一在途请求释放槽位才被唤醒。
|
|
171
|
+
- 429 重试期间持续占用槽位(重试视为同一请求的延续)。
|
|
172
|
+
- 顶层 `concurrency` 字段缺省为 `None` → 转发至 `ZhipuConfig` 时回退默认值 `default=3`;如需完全关闭限流,可在 `ZhipuConfig` 构造层显式置 `null`(一般无需操作)。
|
|
173
|
+
|
|
146
174
|
---
|
|
147
175
|
|
|
148
176
|
## 6. 供应商专属字段
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "coding-proxy"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.1a10"
|
|
4
4
|
description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -119,6 +119,14 @@ vendors:
|
|
|
119
119
|
window_hours: 24.0
|
|
120
120
|
threshold_percent: 95.0
|
|
121
121
|
probe_interval_seconds: 300
|
|
122
|
+
# 每模型并发限制:默认 3 个并行请求;超出则按 FIFO 排队等待
|
|
123
|
+
# 可通过 models 字段覆盖单个模型的限制(如 glm-5.1: 5)
|
|
124
|
+
concurrency:
|
|
125
|
+
default: 3
|
|
126
|
+
# models:
|
|
127
|
+
# glm-5v-turbo: 3
|
|
128
|
+
# glm-5.1: 3
|
|
129
|
+
# glm-4.5-air: 3
|
|
122
130
|
|
|
123
131
|
# Vendor 4: MiniMax(默认禁用,需手动启用并添加到 tiers)
|
|
124
132
|
- vendor: minimax
|
|
@@ -9,6 +9,7 @@ from typing import Annotated, Any, Literal
|
|
|
9
9
|
from pydantic import BaseModel, BeforeValidator, Field, PrivateAttr, model_validator
|
|
10
10
|
|
|
11
11
|
from .resiliency import CircuitBreakerConfig, QuotaGuardConfig, RetryConfig
|
|
12
|
+
from .vendors import ZhipuConcurrencyConfig
|
|
12
13
|
|
|
13
14
|
# ── 价格字段解析($ / ¥ 前缀支持) ──────────────────────────
|
|
14
15
|
|
|
@@ -64,13 +65,13 @@ _NATIVE_ANTHROPIC_FIELDS: frozenset[str] = frozenset(
|
|
|
64
65
|
"api_key",
|
|
65
66
|
}
|
|
66
67
|
)
|
|
67
|
-
#
|
|
68
|
-
_ZHIPU_FIELDS = _NATIVE_ANTHROPIC_FIELDS
|
|
68
|
+
# Zhipu 独占字段:在通用 api_key 基础上增加每模型并发限制
|
|
69
|
+
_ZHIPU_FIELDS: frozenset[str] = _NATIVE_ANTHROPIC_FIELDS | frozenset({"concurrency"})
|
|
69
70
|
|
|
70
71
|
_VENDOR_EXCLUSIVE_FIELDS: dict[str, frozenset[str]] = {
|
|
71
72
|
"copilot": _COPILOT_FIELDS,
|
|
72
73
|
"antigravity": _ANTIGRAVITY_FIELDS,
|
|
73
|
-
"zhipu":
|
|
74
|
+
"zhipu": _ZHIPU_FIELDS,
|
|
74
75
|
"minimax": _NATIVE_ANTHROPIC_FIELDS,
|
|
75
76
|
"kimi": _NATIVE_ANTHROPIC_FIELDS,
|
|
76
77
|
"doubao": _NATIVE_ANTHROPIC_FIELDS,
|
|
@@ -285,6 +286,12 @@ class VendorConfig(BaseModel):
|
|
|
285
286
|
quota_guard: QuotaGuardConfig = Field(default_factory=QuotaGuardConfig)
|
|
286
287
|
weekly_quota_guard: QuotaGuardConfig = Field(default_factory=QuotaGuardConfig)
|
|
287
288
|
|
|
289
|
+
# ── Zhipu 专属:每模型并发限制 ───────────────────────────
|
|
290
|
+
concurrency: ZhipuConcurrencyConfig | None = Field(
|
|
291
|
+
default=None,
|
|
292
|
+
description="[zhipu] 每模型并发限制;None 表示不限并发",
|
|
293
|
+
)
|
|
294
|
+
|
|
288
295
|
@model_validator(mode="after")
|
|
289
296
|
def _warn_irrelevant_fields(self) -> VendorConfig:
|
|
290
297
|
"""对非当前 vendor 类型的非空专属字段发出 warning."""
|
|
@@ -54,6 +54,7 @@ from .vendors import ( # noqa: F401
|
|
|
54
54
|
KimiConfig,
|
|
55
55
|
MinimaxConfig,
|
|
56
56
|
XiaomiConfig,
|
|
57
|
+
ZhipuConcurrencyConfig,
|
|
57
58
|
ZhipuConfig,
|
|
58
59
|
)
|
|
59
60
|
|
|
@@ -318,6 +319,7 @@ __all__ = [
|
|
|
318
319
|
"CopilotConfig",
|
|
319
320
|
"AntigravityConfig",
|
|
320
321
|
"ZhipuConfig",
|
|
322
|
+
"ZhipuConcurrencyConfig",
|
|
321
323
|
# resiliency
|
|
322
324
|
"CircuitBreakerConfig",
|
|
323
325
|
"RetryConfig",
|
|
@@ -2,7 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ZhipuConcurrencyConfig(BaseModel):
|
|
9
|
+
"""Zhipu 每模型并发限制配置."""
|
|
10
|
+
|
|
11
|
+
default: int = Field(default=3, ge=1, le=20, description="全局默认并行度")
|
|
12
|
+
models: dict[str, int] = Field(
|
|
13
|
+
default_factory=dict,
|
|
14
|
+
description="按映射后模型名自定义并行度(覆盖 default)",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
def get_limit(self, model: str) -> int:
|
|
18
|
+
"""获取指定模型的并行度限制."""
|
|
19
|
+
return self.models.get(model, self.default)
|
|
6
20
|
|
|
7
21
|
|
|
8
22
|
class AnthropicConfig(BaseModel):
|
|
@@ -48,6 +62,7 @@ class ZhipuConfig(BaseModel):
|
|
|
48
62
|
base_url: str = "https://open.bigmodel.cn/api/anthropic"
|
|
49
63
|
api_key: str = ""
|
|
50
64
|
timeout_ms: int = 3000000
|
|
65
|
+
concurrency: ZhipuConcurrencyConfig = Field(default_factory=ZhipuConcurrencyConfig)
|
|
51
66
|
|
|
52
67
|
|
|
53
68
|
class MinimaxConfig(BaseModel):
|
|
@@ -100,6 +115,7 @@ __all__ = [
|
|
|
100
115
|
"CopilotConfig",
|
|
101
116
|
"AntigravityConfig",
|
|
102
117
|
"ZhipuConfig",
|
|
118
|
+
"ZhipuConcurrencyConfig",
|
|
103
119
|
"MinimaxConfig",
|
|
104
120
|
"KimiConfig",
|
|
105
121
|
"DoubaoConfig",
|
|
@@ -156,13 +156,17 @@ def _create_vendor_from_config(
|
|
|
156
156
|
cfg = _resolve_antigravity_credentials(cfg, token_store)
|
|
157
157
|
return AntigravityVendor(cfg, failover_cfg, mapper)
|
|
158
158
|
case "zhipu":
|
|
159
|
-
|
|
160
|
-
enabled
|
|
161
|
-
base_url
|
|
159
|
+
zhipu_kwargs: dict[str, Any] = {
|
|
160
|
+
"enabled": vendor_cfg.enabled,
|
|
161
|
+
"base_url": vendor_cfg.base_url
|
|
162
162
|
or "https://open.bigmodel.cn/api/anthropic",
|
|
163
|
-
api_key
|
|
164
|
-
timeout_ms
|
|
165
|
-
|
|
163
|
+
"api_key": vendor_cfg.api_key,
|
|
164
|
+
"timeout_ms": vendor_cfg.timeout_ms,
|
|
165
|
+
}
|
|
166
|
+
# 仅当显式配置了 concurrency 时转发,否则使用 ZhipuConfig 默认值
|
|
167
|
+
if vendor_cfg.concurrency is not None:
|
|
168
|
+
zhipu_kwargs["concurrency"] = vendor_cfg.concurrency
|
|
169
|
+
cfg = ZhipuConfig(**zhipu_kwargs)
|
|
166
170
|
return ZhipuVendor(cfg, mapper, failover_cfg)
|
|
167
171
|
case "minimax":
|
|
168
172
|
cfg = MinimaxConfig(
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""每模型并发限制器 — 基于 asyncio.Semaphore 的公平排队.
|
|
2
|
+
|
|
3
|
+
为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``asyncio.Semaphore``,
|
|
4
|
+
确保同一时间点该模型的并行请求数不超过配置的上限。当所有槽位被占满时,
|
|
5
|
+
新请求按 FIFO 顺序排队等待,直到有槽位释放。
|
|
6
|
+
|
|
7
|
+
设计要点:
|
|
8
|
+
- **惰性创建**:仅在首次请求到达时才为该模型创建 Semaphore,避免冷启动开销
|
|
9
|
+
- **FIFO 公平**:``asyncio.Semaphore`` 内部使用 FIFO 队列,天然满足排队语义
|
|
10
|
+
- **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名(如 ``claude-sonnet-*``)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import logging
|
|
17
|
+
|
|
18
|
+
from ..config.vendors import ZhipuConcurrencyConfig
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ModelConcurrencyLimiter:
|
|
24
|
+
"""按模型名提供独立并发槽位的限制器.
|
|
25
|
+
|
|
26
|
+
用法::
|
|
27
|
+
|
|
28
|
+
limiter = ModelConcurrencyLimiter(config)
|
|
29
|
+
sem = await limiter.acquire("glm-5v-turbo")
|
|
30
|
+
try:
|
|
31
|
+
... # 执行请求
|
|
32
|
+
finally:
|
|
33
|
+
sem.release()
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: ZhipuConcurrencyConfig) -> None:
|
|
37
|
+
self._config = config
|
|
38
|
+
self._semaphores: dict[str, asyncio.Semaphore] = {}
|
|
39
|
+
|
|
40
|
+
def _get_semaphore(self, model: str) -> asyncio.Semaphore:
|
|
41
|
+
"""获取(或惰性创建)指定模型的信号量."""
|
|
42
|
+
sem = self._semaphores.get(model)
|
|
43
|
+
if sem is None:
|
|
44
|
+
limit = self._config.get_limit(model)
|
|
45
|
+
sem = asyncio.Semaphore(limit)
|
|
46
|
+
self._semaphores[model] = sem
|
|
47
|
+
logger.debug(
|
|
48
|
+
"ModelConcurrencyLimiter: created semaphore model=%s limit=%d",
|
|
49
|
+
model,
|
|
50
|
+
limit,
|
|
51
|
+
)
|
|
52
|
+
return sem
|
|
53
|
+
|
|
54
|
+
async def acquire(self, model: str) -> asyncio.Semaphore:
|
|
55
|
+
"""获取指定模型的并发槽位,必要时阻塞排队.
|
|
56
|
+
|
|
57
|
+
返回已获取的 Semaphore 实例,调用方负责在请求完成后调用 ``release()``。
|
|
58
|
+
"""
|
|
59
|
+
sem = self._get_semaphore(model)
|
|
60
|
+
await sem.acquire()
|
|
61
|
+
return sem
|
|
62
|
+
|
|
63
|
+
def get_diagnostics(self) -> dict[str, dict[str, int]]:
|
|
64
|
+
"""返回每个模型的并发状态快照(用于可观测性)."""
|
|
65
|
+
snapshot: dict[str, dict[str, int]] = {}
|
|
66
|
+
for model, sem in self._semaphores.items():
|
|
67
|
+
limit = self._config.get_limit(model)
|
|
68
|
+
# asyncio.Semaphore 内部 _value 表示剩余可用槽位
|
|
69
|
+
available = sem._value # noqa: SLF001 — 公开 API 未暴露
|
|
70
|
+
snapshot[model] = {
|
|
71
|
+
"limit": limit,
|
|
72
|
+
"in_use": max(limit - available, 0),
|
|
73
|
+
"available": max(available, 0),
|
|
74
|
+
}
|
|
75
|
+
return snapshot
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
__all__ = ["ModelConcurrencyLimiter"]
|
|
@@ -34,6 +34,7 @@ from ..routing.rate_limit import (
|
|
|
34
34
|
)
|
|
35
35
|
from ..routing.retry import RetryConfig, calculate_delay
|
|
36
36
|
from .base import VendorResponse
|
|
37
|
+
from .concurrency import ModelConcurrencyLimiter
|
|
37
38
|
from .native_anthropic import NativeAnthropicVendor
|
|
38
39
|
|
|
39
40
|
logger = logging.getLogger(__name__)
|
|
@@ -68,6 +69,12 @@ class ZhipuVendor(NativeAnthropicVendor):
|
|
|
68
69
|
) -> None:
|
|
69
70
|
super().__init__(config, model_mapper, failover_config)
|
|
70
71
|
self._rl_retry = _RATE_LIMIT_RETRY
|
|
72
|
+
# 每模型并发限制器(config.concurrency 为 None 时禁用)
|
|
73
|
+
self._concurrency_limiter: ModelConcurrencyLimiter | None = (
|
|
74
|
+
ModelConcurrencyLimiter(config.concurrency)
|
|
75
|
+
if config.concurrency is not None
|
|
76
|
+
else None
|
|
77
|
+
)
|
|
71
78
|
|
|
72
79
|
# ── 非流式:429 重试 ────────────────────────────────────
|
|
73
80
|
|
|
@@ -76,7 +83,24 @@ class ZhipuVendor(NativeAnthropicVendor):
|
|
|
76
83
|
request_body: dict[str, Any],
|
|
77
84
|
headers: dict[str, str],
|
|
78
85
|
) -> VendorResponse:
|
|
79
|
-
"""非流式请求,429 时自动重试.
|
|
86
|
+
"""非流式请求,429 时自动重试.
|
|
87
|
+
|
|
88
|
+
在 429 重试循环外层套上每模型并发槽位获取,确保同一时间点同一模型的
|
|
89
|
+
在途请求数不超过配置上限;超过时新请求 FIFO 排队等待。
|
|
90
|
+
"""
|
|
91
|
+
sem = await self._maybe_acquire_concurrency_slot(request_body)
|
|
92
|
+
try:
|
|
93
|
+
return await self._send_message_with_retry(request_body, headers)
|
|
94
|
+
finally:
|
|
95
|
+
if sem is not None:
|
|
96
|
+
sem.release()
|
|
97
|
+
|
|
98
|
+
async def _send_message_with_retry(
|
|
99
|
+
self,
|
|
100
|
+
request_body: dict[str, Any],
|
|
101
|
+
headers: dict[str, str],
|
|
102
|
+
) -> VendorResponse:
|
|
103
|
+
"""原 send_message 主体逻辑(不含并发控制)."""
|
|
80
104
|
max_attempts = self._rl_retry.max_attempts
|
|
81
105
|
|
|
82
106
|
for attempt in range(max_attempts):
|
|
@@ -116,42 +140,71 @@ class ZhipuVendor(NativeAnthropicVendor):
|
|
|
116
140
|
安全性:429 在 BaseVendor.send_message_stream 中于
|
|
117
141
|
status code 检查阶段即 raise(在任何 chunk yield 之前),
|
|
118
142
|
因此重试不会导致已发出数据不一致。
|
|
143
|
+
|
|
144
|
+
在 429 重试循环外层套上每模型并发槽位获取,确保流式请求与非流式请求
|
|
145
|
+
共用同一信号量,统一限制同一模型的总在途并发数。
|
|
119
146
|
"""
|
|
147
|
+
sem = await self._maybe_acquire_concurrency_slot(request_body)
|
|
120
148
|
max_attempts = self._rl_retry.max_attempts
|
|
121
149
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
150
|
+
try:
|
|
151
|
+
for attempt in range(max_attempts):
|
|
152
|
+
try:
|
|
153
|
+
# 429 在 status code 检查阶段即 raise(在任何 chunk 之前),
|
|
154
|
+
# 因此 __anext__ 安全:要么拿到首个 chunk,要么抛异常。
|
|
155
|
+
ait = super().send_message_stream(request_body, headers)
|
|
156
|
+
head = await ait.__anext__()
|
|
157
|
+
except StopAsyncIteration:
|
|
158
|
+
return
|
|
159
|
+
except httpx.HTTPStatusError as exc:
|
|
160
|
+
if exc.response is None or exc.response.status_code != 429:
|
|
161
|
+
raise
|
|
162
|
+
if attempt == max_attempts - 1:
|
|
163
|
+
logger.warning(
|
|
164
|
+
"Zhipu 429 stream rate limit exhausted after %d attempts",
|
|
165
|
+
max_attempts,
|
|
166
|
+
)
|
|
167
|
+
raise
|
|
168
|
+
|
|
169
|
+
delay = self._compute_retry_delay_from_response(
|
|
170
|
+
exc.response, attempt
|
|
137
171
|
)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
172
|
+
logger.info(
|
|
173
|
+
"Zhipu 429 stream rate limit, retry %d/%d in %.1fms",
|
|
174
|
+
attempt + 1,
|
|
175
|
+
max_attempts - 1,
|
|
176
|
+
delay,
|
|
177
|
+
)
|
|
178
|
+
await asyncio.sleep(delay / 1000.0)
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
# yield 在 try/except 之外,避免捕获外部 athrow 的异常
|
|
182
|
+
yield head
|
|
183
|
+
async for chunk in ait:
|
|
184
|
+
yield chunk
|
|
185
|
+
return
|
|
186
|
+
finally:
|
|
187
|
+
if sem is not None:
|
|
188
|
+
sem.release()
|
|
189
|
+
|
|
190
|
+
# ── 并发控制 ────────────────────────────────────────────
|
|
191
|
+
|
|
192
|
+
async def _maybe_acquire_concurrency_slot(
|
|
193
|
+
self,
|
|
194
|
+
request_body: dict[str, Any],
|
|
195
|
+
) -> asyncio.Semaphore | None:
|
|
196
|
+
"""按映射后模型名获取并发槽位;未配置 concurrency 时返回 None.
|
|
197
|
+
|
|
198
|
+
``map_model()`` 是纯同步字典查找,在 Semaphore 等待前调用是安全的,
|
|
199
|
+
且能确保排队键与上游真实承载模型对齐。
|
|
200
|
+
"""
|
|
201
|
+
if self._concurrency_limiter is None:
|
|
202
|
+
return None
|
|
203
|
+
raw_model = request_body.get("model", "") if request_body else ""
|
|
204
|
+
mapped_model = self.map_model(raw_model) if raw_model else ""
|
|
205
|
+
if not mapped_model:
|
|
206
|
+
return None
|
|
207
|
+
return await self._concurrency_limiter.acquire(mapped_model)
|
|
155
208
|
|
|
156
209
|
# ── 延迟计算 ────────────────────────────────────────────
|
|
157
210
|
|
|
@@ -31,7 +31,8 @@ def test_antigravity_fields_set():
|
|
|
31
31
|
|
|
32
32
|
def test_zhipu_fields_set():
|
|
33
33
|
assert "api_key" in _ZHIPU_FIELDS
|
|
34
|
-
assert
|
|
34
|
+
assert "concurrency" in _ZHIPU_FIELDS
|
|
35
|
+
assert len(_ZHIPU_FIELDS) == 2
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
def test_vendor_exclusive_fields_mapping_complete():
|