coding-proxy 0.4.1a11__tar.gz → 0.4.1a12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/PKG-INFO +1 -1
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/pyproject.toml +1 -1
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/server/dashboard.py +112 -1
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/server/routes.py +56 -0
- coding_proxy-0.4.1a12/src/coding/proxy/vendors/concurrency.py +162 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/zhipu.py +7 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_zhipu_concurrency.py +11 -11
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/uv.lock +1 -1
- coding_proxy-0.4.1a11/src/coding/proxy/vendors/concurrency.py +0 -83
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/.github/workflows/ci.yml +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/.github/workflows/coverage.yml +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/.github/workflows/release.yml +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/.gitignore +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/.pre-commit-config.yaml +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/AGENTS.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/CHANGELOG.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/CLAUDE.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/LICENSE +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/README.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/assets/dashboard-v0.4.0.png +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/assets/session-v0.4.0.png +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/agents/browser-validation.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/agents/issue.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/agents/knowledge-map.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/agents/reference-specifications.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/arch/config-reference.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/arch/convert.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/arch/design-patterns.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/arch/routing.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/arch/testing.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/arch/vendors.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/framework.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/guide/api-reference.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/guide/cli-reference.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/guide/dashboard.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/guide/monitoring.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/guide/quickstart.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/guide/vendors.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/ops/ci-cd.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/user-guide.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/docs/zh-CN/README.md +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/__main__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/auth/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/auth/providers/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/auth/providers/base.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/auth/providers/github.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/auth/providers/google.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/auth/runtime.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/auth/store.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/cli/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/cli/auth_commands.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/cli/banner.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/compat/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/compat/canonical.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/compat/session_store.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/auth_schema.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/config.default.yaml +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/loader.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/resiliency.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/routing.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/schema.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/server.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/session_policy.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/config/vendors.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/convert/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/convert/anthropic_to_gemini.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/convert/anthropic_to_openai.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/convert/gemini_sse_adapter.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/convert/gemini_to_anthropic.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/convert/openai_to_anthropic.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/convert/vendor_channels.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/logging/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/logging/db.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/logging/formatters.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/logging/stats.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/model/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/model/auth.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/model/compat.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/model/constants.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/model/pricing.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/model/token.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/model/vendor.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/config.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/extractors/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/extractors/anthropic.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/extractors/gemini.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/extractors/openai.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/handler.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/operation.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/routes.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/native_api/usage_registry.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/pricing.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/circuit_breaker.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/error_classifier.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/executor.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/model_mapper.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/quota_guard.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/rate_limit.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/retry.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/router.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/session_manager.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/session_policy.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/tier.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/usage_parser.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/routing/usage_recorder.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/server/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/server/app.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/server/factory.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/server/responses.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/streaming/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/streaming/anthropic_compat.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/alibaba.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/anthropic.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/antigravity.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/base.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/copilot.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/copilot_models.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/copilot_token_manager.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/copilot_urls.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/doubao.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/kimi.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/minimax.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/mixins.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/native_anthropic.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/token_manager.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/src/coding/proxy/vendors/xiaomi.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/e2e/__init__.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/e2e/conftest.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/e2e/test_e2e_http.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/e2e/test_e2e_token.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/e2e/test_e2e_vendor.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_antigravity.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_app_routes.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_auto_login.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_banner.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_circuit_breaker.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_cli_usage.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_compat.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_config_init.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_config_loader.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_convert_request.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_convert_response.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_convert_sse.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_copilot.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_copilot_convert_request.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_copilot_convert_response.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_copilot_models.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_copilot_urls.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_currency.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_error_classifier.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_logging_dual_write.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_mixins.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_model_auth.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_model_compat.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_model_constants.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_model_mapper.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_model_pricing.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_model_token.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_model_vendor.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_native_api_base_url_override.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_native_api_extractors.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_native_api_handler.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_native_api_operation.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_native_api_routes.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_native_vendors.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_parse_usage.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_parse_usage_gemini.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_pricing.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_quota_guard.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_rate_limit.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_router_chain.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_router_executor.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_runtime_reauth.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_schema.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_session_aware.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_streaming_anthropic_compat.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_tier.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_tiers_config.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_time_range.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_token_logger.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_token_logger_native_columns.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_token_manager.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_types.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_vendor_channels.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_vendor_streaming.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_vendors.py +0 -0
- {coding_proxy-0.4.1a11 → coding_proxy-0.4.1a12}/tests/test_zhipu.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coding-proxy
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1a12
|
|
4
4
|
Summary: A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao...
|
|
5
5
|
Project-URL: Source Code, https://github.com/ThreeFish-AI/coding-proxy
|
|
6
6
|
Project-URL: User Guide, https://github.com/ThreeFish-AI/coding-proxy/blob/master/docs/user-guide.md
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "coding-proxy"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.1a12"
|
|
4
4
|
description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -640,6 +640,43 @@ _DASHBOARD_HTML = """<!DOCTYPE html>
|
|
|
640
640
|
padding: 1px 6px;
|
|
641
641
|
border-radius: 3px;
|
|
642
642
|
}
|
|
643
|
+
.mc-limit-editable {
|
|
644
|
+
cursor: pointer;
|
|
645
|
+
border-bottom: 1px dashed rgba(74,222,128,.4);
|
|
646
|
+
transition: border-color .2s, color .2s;
|
|
647
|
+
}
|
|
648
|
+
.mc-limit-editable:hover {
|
|
649
|
+
border-bottom-color: #4ade80;
|
|
650
|
+
color: #4ade80;
|
|
651
|
+
}
|
|
652
|
+
.mc-limit-input {
|
|
653
|
+
width: 36px;
|
|
654
|
+
background: var(--bg-primary);
|
|
655
|
+
border: 1px solid var(--accent-blue);
|
|
656
|
+
border-radius: 3px;
|
|
657
|
+
color: var(--text-primary);
|
|
658
|
+
font-size: 10px;
|
|
659
|
+
font-family: 'JetBrains Mono', monospace;
|
|
660
|
+
text-align: center;
|
|
661
|
+
padding: 0 2px;
|
|
662
|
+
outline: none;
|
|
663
|
+
-moz-appearance: textfield;
|
|
664
|
+
}
|
|
665
|
+
.mc-limit-input::-webkit-outer-spin-button,
|
|
666
|
+
.mc-limit-input::-webkit-inner-spin-button {
|
|
667
|
+
-webkit-appearance: none;
|
|
668
|
+
margin: 0;
|
|
669
|
+
}
|
|
670
|
+
.mc-limit-flash-ok { animation: mc-flash-ok .6s ease; }
|
|
671
|
+
.mc-limit-flash-err { animation: mc-flash-err .6s ease; }
|
|
672
|
+
@keyframes mc-flash-ok {
|
|
673
|
+
0%,100% { color: inherit; }
|
|
674
|
+
40% { color: #4ade80; }
|
|
675
|
+
}
|
|
676
|
+
@keyframes mc-flash-err {
|
|
677
|
+
0%,100% { color: inherit; }
|
|
678
|
+
40% { color: #f87171; }
|
|
679
|
+
}
|
|
643
680
|
</style>
|
|
644
681
|
</head>
|
|
645
682
|
<body>
|
|
@@ -1268,7 +1305,8 @@ function updateModelCalling(status) {
|
|
|
1268
1305
|
+ '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
|
|
1269
1306
|
+ '<div class="mc-bar-wrap"><div class="mc-bar-fill ' + barClass + '" style="width:' + pct + '%"></div></div>'
|
|
1270
1307
|
+ '<div class="mc-stats">'
|
|
1271
|
-
+ '<span class="mc-badge mc-badge-active">' + m.in_use
|
|
1308
|
+
+ '<span class="mc-badge mc-badge-active">' + m.in_use
|
|
1309
|
+
+ '/<span class="mc-limit-editable" data-tier="' + escapeHtml(m.vendor) + '" data-model="' + escapeHtml(m.model) + '" data-limit="' + m.limit + '" title="点击修改并行度">' + m.limit + '</span></span>'
|
|
1272
1310
|
+ (m.pending > 0 ? '<span class="mc-badge mc-badge-pending">⏳ ' + m.pending + '</span>' : '')
|
|
1273
1311
|
+ '</div>'
|
|
1274
1312
|
+ '</div>';
|
|
@@ -1293,6 +1331,79 @@ function stopModelCallingPoll() {
|
|
|
1293
1331
|
if (_mcTimer) { clearInterval(_mcTimer); _mcTimer = null; }
|
|
1294
1332
|
}
|
|
1295
1333
|
|
|
1334
|
+
// ── 并行度运行时编辑 ──────────────────────────────────────
|
|
1335
|
+
var _mcEditing = false;
|
|
1336
|
+
document.addEventListener('click', function(e) {
|
|
1337
|
+
if (_mcEditing) return;
|
|
1338
|
+
var el = e.target.closest('.mc-limit-editable');
|
|
1339
|
+
if (!el) return;
|
|
1340
|
+
e.preventDefault();
|
|
1341
|
+
_mcEditing = true;
|
|
1342
|
+
var oldVal = el.getAttribute('data-limit');
|
|
1343
|
+
var tier = el.getAttribute('data-tier');
|
|
1344
|
+
var model = el.getAttribute('data-model');
|
|
1345
|
+
var input = document.createElement('input');
|
|
1346
|
+
input.type = 'number';
|
|
1347
|
+
input.className = 'mc-limit-input';
|
|
1348
|
+
input.min = '1';
|
|
1349
|
+
input.max = '20';
|
|
1350
|
+
input.value = oldVal;
|
|
1351
|
+
el.style.display = 'none';
|
|
1352
|
+
el.parentNode.insertBefore(input, el.nextSibling);
|
|
1353
|
+
input.focus();
|
|
1354
|
+
input.select();
|
|
1355
|
+
|
|
1356
|
+
var _cancelled = false;
|
|
1357
|
+
|
|
1358
|
+
function restore() {
|
|
1359
|
+
_mcEditing = false;
|
|
1360
|
+
if (input.parentNode) input.parentNode.removeChild(input);
|
|
1361
|
+
el.style.display = '';
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
function flash(cls) {
|
|
1365
|
+
el.classList.add(cls);
|
|
1366
|
+
setTimeout(function() { el.classList.remove(cls); }, 600);
|
|
1367
|
+
}
|
|
1368
|
+
|
|
1369
|
+
input.addEventListener('keydown', function(ev) {
|
|
1370
|
+
if (ev.key === 'Escape') { _cancelled = true; restore(); return; }
|
|
1371
|
+
if (ev.key !== 'Enter') return;
|
|
1372
|
+
ev.preventDefault();
|
|
1373
|
+
submit();
|
|
1374
|
+
});
|
|
1375
|
+
|
|
1376
|
+
input.addEventListener('blur', function() {
|
|
1377
|
+
setTimeout(function() { if (!_cancelled) submit(); }, 50);
|
|
1378
|
+
});
|
|
1379
|
+
|
|
1380
|
+
function submit() {
|
|
1381
|
+
if (_cancelled) return;
|
|
1382
|
+
var v = parseInt(input.value, 10);
|
|
1383
|
+
if (isNaN(v) || v < 1 || v > 20) { restore(); flash('mc-limit-flash-err'); return; }
|
|
1384
|
+
if (String(v) === oldVal) { restore(); return; }
|
|
1385
|
+
fetch('/api/concurrency', {
|
|
1386
|
+
method: 'PUT',
|
|
1387
|
+
headers: {'Content-Type': 'application/json'},
|
|
1388
|
+
body: JSON.stringify({tier: tier, model: model, limit: v})
|
|
1389
|
+
}).then(function(res) {
|
|
1390
|
+
if (res.ok) {
|
|
1391
|
+
return res.json().then(function() {
|
|
1392
|
+
el.textContent = v;
|
|
1393
|
+
el.setAttribute('data-limit', v);
|
|
1394
|
+
flash('mc-limit-flash-ok');
|
|
1395
|
+
});
|
|
1396
|
+
} else {
|
|
1397
|
+
flash('mc-limit-flash-err');
|
|
1398
|
+
}
|
|
1399
|
+
}).catch(function() {
|
|
1400
|
+
flash('mc-limit-flash-err');
|
|
1401
|
+
}).finally(function() {
|
|
1402
|
+
restore();
|
|
1403
|
+
});
|
|
1404
|
+
}
|
|
1405
|
+
});
|
|
1406
|
+
|
|
1296
1407
|
// ── 按 tiers 顺序排序 vendor 列表 ─────────────────────────
|
|
1297
1408
|
function sortByTierOrder(vendors, tierOrder) {
|
|
1298
1409
|
if (!tierOrder || !tierOrder.length) return vendors.sort();
|
|
@@ -225,6 +225,61 @@ def register_status_route(app: Any, router: Any) -> None:
|
|
|
225
225
|
return result
|
|
226
226
|
|
|
227
227
|
|
|
228
|
+
def register_concurrency_route(app: Any, router: Any) -> None:
|
|
229
|
+
"""注册运行时并发限制调整路由."""
|
|
230
|
+
|
|
231
|
+
@app.put("/api/concurrency")
|
|
232
|
+
async def update_concurrency(request: Request) -> Response:
|
|
233
|
+
try:
|
|
234
|
+
body = await request.json()
|
|
235
|
+
except Exception:
|
|
236
|
+
return json_error_response(
|
|
237
|
+
400, error_type="invalid_request_error", message="body must be JSON"
|
|
238
|
+
)
|
|
239
|
+
tier_name = body.get("tier")
|
|
240
|
+
model = body.get("model")
|
|
241
|
+
limit = body.get("limit")
|
|
242
|
+
if not tier_name or not model or limit is None:
|
|
243
|
+
return json_error_response(
|
|
244
|
+
400,
|
|
245
|
+
error_type="invalid_request_error",
|
|
246
|
+
message="requires tier, model, limit",
|
|
247
|
+
)
|
|
248
|
+
if not isinstance(limit, int) or limit < 1 or limit > 20:
|
|
249
|
+
return json_error_response(
|
|
250
|
+
400,
|
|
251
|
+
error_type="invalid_request_error",
|
|
252
|
+
message="limit must be an integer between 1 and 20",
|
|
253
|
+
)
|
|
254
|
+
for tier in router.tiers:
|
|
255
|
+
if tier.name == tier_name:
|
|
256
|
+
vendor = tier.vendor
|
|
257
|
+
update_fn = getattr(vendor, "update_concurrency", None)
|
|
258
|
+
if update_fn is None:
|
|
259
|
+
return json_error_response(
|
|
260
|
+
400,
|
|
261
|
+
error_type="invalid_request_error",
|
|
262
|
+
message=f"vendor '{tier_name}' does not support concurrency",
|
|
263
|
+
)
|
|
264
|
+
try:
|
|
265
|
+
update_fn(model, limit)
|
|
266
|
+
except (ValueError, AttributeError) as exc:
|
|
267
|
+
return json_error_response(
|
|
268
|
+
400, error_type="invalid_request_error", message=str(exc)
|
|
269
|
+
)
|
|
270
|
+
return Response(
|
|
271
|
+
content=json.dumps(
|
|
272
|
+
{"ok": True, "tier": tier_name, "model": model, "limit": limit},
|
|
273
|
+
ensure_ascii=False,
|
|
274
|
+
).encode(),
|
|
275
|
+
status_code=200,
|
|
276
|
+
media_type="application/json",
|
|
277
|
+
)
|
|
278
|
+
return json_error_response(
|
|
279
|
+
404, error_type="not_found", message=f"tier '{tier_name}' not found"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
228
283
|
def register_copilot_routes(app: Any, router: Any) -> None:
|
|
229
284
|
"""注册 Copilot 诊断与模型探测路由."""
|
|
230
285
|
from .factory import _find_copilot_vendor
|
|
@@ -457,6 +512,7 @@ def register_all_routes(
|
|
|
457
512
|
register_core_routes(app, router)
|
|
458
513
|
register_health_routes(app)
|
|
459
514
|
register_status_route(app, router)
|
|
515
|
+
register_concurrency_route(app, router)
|
|
460
516
|
register_copilot_routes(app, router)
|
|
461
517
|
register_admin_routes(app, router)
|
|
462
518
|
register_session_vendor_routes(app, router)
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""每模型并发限制器 — 支持运行时动态调整的公平排队.
|
|
2
|
+
|
|
3
|
+
为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``_ConcurrencySlot`,
|
|
4
|
+
确保同一时间点该模型的并行请求数不超过配置的上限。当所有槽位被占满时,
|
|
5
|
+
新请求按 FIFO 顺序排队等待,直到有槽位释放。
|
|
6
|
+
|
|
7
|
+
设计要点:
|
|
8
|
+
- **惰性创建**:仅在首次请求到达时才为该模型创建 Slot,避免冷启动开销
|
|
9
|
+
- **FIFO 公平**:``asyncio.Event`` + while 循环天然满足 FIFO 排队语义
|
|
10
|
+
- **动态调整**:支持运行时修改 per-model limit,无需重启进程
|
|
11
|
+
- **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
|
|
19
|
+
from ..config.vendors import ZhipuConcurrencyConfig
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class _ConcurrencySlot:
|
|
25
|
+
"""支持动态 limit 的并发槽位.
|
|
26
|
+
|
|
27
|
+
使用 ``asyncio.Event`` 作为等待/通知原语,在 ``acquire`` 中 await 等待,
|
|
28
|
+
在 ``release`` / ``set_limit`` 中唤醒。``set_limit`` 修改上限后立即唤醒
|
|
29
|
+
所有等待者,由它们重新判断是否可获得槽位。
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, limit: int) -> None:
|
|
33
|
+
self._limit = limit
|
|
34
|
+
self._in_use: int = 0
|
|
35
|
+
self._pending: int = 0
|
|
36
|
+
self._wake = asyncio.Event()
|
|
37
|
+
self._wake.set()
|
|
38
|
+
|
|
39
|
+
async def acquire(self) -> _ConcurrencySlot:
|
|
40
|
+
"""获取一个并发槽位,必要时阻塞排队.
|
|
41
|
+
|
|
42
|
+
返回 ``self``,调用方在请求完成后调用 ``release()``。
|
|
43
|
+
"""
|
|
44
|
+
# Fast path
|
|
45
|
+
if self._in_use < self._limit:
|
|
46
|
+
self._in_use += 1
|
|
47
|
+
return self
|
|
48
|
+
# Slow path — 等待槽位释放
|
|
49
|
+
self._pending += 1
|
|
50
|
+
try:
|
|
51
|
+
while True:
|
|
52
|
+
self._wake.clear()
|
|
53
|
+
await self._wake.wait()
|
|
54
|
+
if self._in_use < self._limit:
|
|
55
|
+
self._in_use += 1
|
|
56
|
+
return self
|
|
57
|
+
finally:
|
|
58
|
+
self._pending -= 1
|
|
59
|
+
|
|
60
|
+
def release(self) -> None:
|
|
61
|
+
"""释放一个并发槽位."""
|
|
62
|
+
self._in_use = max(0, self._in_use - 1)
|
|
63
|
+
self._wake.set()
|
|
64
|
+
|
|
65
|
+
def set_limit(self, new_limit: int) -> None:
|
|
66
|
+
"""动态调整并发上限.
|
|
67
|
+
|
|
68
|
+
增大 limit 时立即唤醒等待者;缩小时已持有的槽位不受影响,
|
|
69
|
+
新 limit 在后续 acquire 中自然生效。
|
|
70
|
+
"""
|
|
71
|
+
self._limit = new_limit
|
|
72
|
+
self._wake.set()
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def limit(self) -> int:
|
|
76
|
+
return self._limit
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def in_use(self) -> int:
|
|
80
|
+
return self._in_use
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def available(self) -> int:
|
|
84
|
+
return max(0, self._limit - self._in_use)
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def pending(self) -> int:
|
|
88
|
+
return self._pending
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ModelConcurrencyLimiter:
|
|
92
|
+
"""按模型名提供独立并发槽位的限制器.
|
|
93
|
+
|
|
94
|
+
用法::
|
|
95
|
+
|
|
96
|
+
limiter = ModelConcurrencyLimiter(config)
|
|
97
|
+
slot = await limiter.acquire("glm-5v-turbo")
|
|
98
|
+
try:
|
|
99
|
+
... # 执行请求
|
|
100
|
+
finally:
|
|
101
|
+
slot.release()
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __init__(self, config: ZhipuConcurrencyConfig) -> None:
|
|
105
|
+
self._config = config
|
|
106
|
+
self._slots: dict[str, _ConcurrencySlot] = {}
|
|
107
|
+
|
|
108
|
+
def _get_or_create_slot(self, model: str) -> _ConcurrencySlot:
|
|
109
|
+
"""获取(或惰性创建)指定模型的并发槽位."""
|
|
110
|
+
slot = self._slots.get(model)
|
|
111
|
+
if slot is None:
|
|
112
|
+
limit = self._config.get_limit(model)
|
|
113
|
+
slot = _ConcurrencySlot(limit)
|
|
114
|
+
self._slots[model] = slot
|
|
115
|
+
logger.debug(
|
|
116
|
+
"ModelConcurrencyLimiter: created slot model=%s limit=%d",
|
|
117
|
+
model,
|
|
118
|
+
limit,
|
|
119
|
+
)
|
|
120
|
+
return slot
|
|
121
|
+
|
|
122
|
+
async def acquire(self, model: str) -> _ConcurrencySlot:
|
|
123
|
+
"""获取指定模型的并发槽位,必要时阻塞排队.
|
|
124
|
+
|
|
125
|
+
返回已获取的 Slot 实例,调用方负责在请求完成后调用 ``release()``。
|
|
126
|
+
"""
|
|
127
|
+
slot = self._get_or_create_slot(model)
|
|
128
|
+
await slot.acquire()
|
|
129
|
+
return slot
|
|
130
|
+
|
|
131
|
+
def set_limit(self, model: str, new_limit: int) -> None:
|
|
132
|
+
"""运行时修改指定模型的并发上限.
|
|
133
|
+
|
|
134
|
+
同时更新 config.models 以确保后续惰性创建使用新值。
|
|
135
|
+
"""
|
|
136
|
+
slot = self._slots.get(model)
|
|
137
|
+
if slot is None:
|
|
138
|
+
slot = _ConcurrencySlot(new_limit)
|
|
139
|
+
self._slots[model] = slot
|
|
140
|
+
else:
|
|
141
|
+
slot.set_limit(new_limit)
|
|
142
|
+
self._config.models[model] = new_limit
|
|
143
|
+
logger.info(
|
|
144
|
+
"ModelConcurrencyLimiter: updated limit model=%s new_limit=%d",
|
|
145
|
+
model,
|
|
146
|
+
new_limit,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def get_diagnostics(self) -> dict[str, dict[str, int]]:
|
|
150
|
+
"""返回每个模型的并发状态快照(用于可观测性)."""
|
|
151
|
+
snapshot: dict[str, dict[str, int]] = {}
|
|
152
|
+
for model, slot in self._slots.items():
|
|
153
|
+
snapshot[model] = {
|
|
154
|
+
"limit": slot.limit,
|
|
155
|
+
"in_use": slot.in_use,
|
|
156
|
+
"available": slot.available,
|
|
157
|
+
"pending": slot.pending,
|
|
158
|
+
}
|
|
159
|
+
return snapshot
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
__all__ = ["ModelConcurrencyLimiter"]
|
|
@@ -261,6 +261,13 @@ class ZhipuVendor(NativeAnthropicVendor):
|
|
|
261
261
|
diagnostics["concurrency"] = self._concurrency_limiter.get_diagnostics()
|
|
262
262
|
return diagnostics
|
|
263
263
|
|
|
264
|
+
def update_concurrency(self, model: str, limit: int) -> None:
|
|
265
|
+
"""运行时更新指定模型的并发限制."""
|
|
266
|
+
if self._concurrency_limiter is None:
|
|
267
|
+
msg = "Concurrency limiter is not enabled for this vendor"
|
|
268
|
+
raise ValueError(msg)
|
|
269
|
+
self._concurrency_limiter.set_limit(model, limit)
|
|
270
|
+
|
|
264
271
|
# ── 延迟计算 ────────────────────────────────────────────
|
|
265
272
|
|
|
266
273
|
def _compute_retry_delay_from_headers(
|
|
@@ -141,12 +141,12 @@ class TestModelConcurrencyLimiter:
|
|
|
141
141
|
@pytest.mark.asyncio
|
|
142
142
|
async def test_lazy_semaphore_creation(self) -> None:
|
|
143
143
|
limiter = ModelConcurrencyLimiter(ZhipuConcurrencyConfig(default=2))
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
# 不同模型独立
|
|
147
|
-
assert
|
|
148
|
-
# 相同模型复用
|
|
149
|
-
assert limiter.
|
|
144
|
+
slot_a = limiter._get_or_create_slot("model-a")
|
|
145
|
+
slot_b = limiter._get_or_create_slot("model-b")
|
|
146
|
+
# 不同模型独立 slot
|
|
147
|
+
assert slot_a is not slot_b
|
|
148
|
+
# 相同模型复用 slot
|
|
149
|
+
assert limiter._get_or_create_slot("model-a") is slot_a
|
|
150
150
|
|
|
151
151
|
@pytest.mark.asyncio
|
|
152
152
|
async def test_acquire_blocks_when_full(self) -> None:
|
|
@@ -184,8 +184,8 @@ class TestModelConcurrencyLimiter:
|
|
|
184
184
|
|
|
185
185
|
def test_diagnostics_snapshot(self) -> None:
|
|
186
186
|
limiter = ModelConcurrencyLimiter(ZhipuConcurrencyConfig(default=3))
|
|
187
|
-
# 触发
|
|
188
|
-
limiter.
|
|
187
|
+
# 触发 slot 创建
|
|
188
|
+
limiter._get_or_create_slot("glm-5.1")
|
|
189
189
|
snap = limiter.get_diagnostics()
|
|
190
190
|
assert "glm-5.1" in snap
|
|
191
191
|
assert snap["glm-5.1"]["limit"] == 3
|
|
@@ -459,10 +459,10 @@ class TestZhipuVendorStreamConcurrency:
|
|
|
459
459
|
chunks.append(chunk)
|
|
460
460
|
assert len(chunks) == 2
|
|
461
461
|
|
|
462
|
-
# 确认
|
|
462
|
+
# 确认 slot 当前完全可用
|
|
463
463
|
assert vendor._concurrency_limiter is not None
|
|
464
|
-
|
|
465
|
-
assert
|
|
464
|
+
slot = vendor._concurrency_limiter._get_or_create_slot("glm-5.1")
|
|
465
|
+
assert slot.available == 1
|
|
466
466
|
|
|
467
467
|
@pytest.mark.asyncio
|
|
468
468
|
async def test_stream_releases_slot_on_error(self) -> None:
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
"""每模型并发限制器 — 基于 asyncio.Semaphore 的公平排队.
|
|
2
|
-
|
|
3
|
-
为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``asyncio.Semaphore``,
|
|
4
|
-
确保同一时间点该模型的并行请求数不超过配置的上限。当所有槽位被占满时,
|
|
5
|
-
新请求按 FIFO 顺序排队等待,直到有槽位释放。
|
|
6
|
-
|
|
7
|
-
设计要点:
|
|
8
|
-
- **惰性创建**:仅在首次请求到达时才为该模型创建 Semaphore,避免冷启动开销
|
|
9
|
-
- **FIFO 公平**:``asyncio.Semaphore`` 内部使用 FIFO 队列,天然满足排队语义
|
|
10
|
-
- **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名(如 ``claude-sonnet-*``)
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
from __future__ import annotations
|
|
14
|
-
|
|
15
|
-
import asyncio
|
|
16
|
-
import logging
|
|
17
|
-
|
|
18
|
-
from ..config.vendors import ZhipuConcurrencyConfig
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class ModelConcurrencyLimiter:
|
|
24
|
-
"""按模型名提供独立并发槽位的限制器.
|
|
25
|
-
|
|
26
|
-
用法::
|
|
27
|
-
|
|
28
|
-
limiter = ModelConcurrencyLimiter(config)
|
|
29
|
-
sem = await limiter.acquire("glm-5v-turbo")
|
|
30
|
-
try:
|
|
31
|
-
... # 执行请求
|
|
32
|
-
finally:
|
|
33
|
-
sem.release()
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
def __init__(self, config: ZhipuConcurrencyConfig) -> None:
|
|
37
|
-
self._config = config
|
|
38
|
-
self._semaphores: dict[str, asyncio.Semaphore] = {}
|
|
39
|
-
|
|
40
|
-
def _get_semaphore(self, model: str) -> asyncio.Semaphore:
|
|
41
|
-
"""获取(或惰性创建)指定模型的信号量."""
|
|
42
|
-
sem = self._semaphores.get(model)
|
|
43
|
-
if sem is None:
|
|
44
|
-
limit = self._config.get_limit(model)
|
|
45
|
-
sem = asyncio.Semaphore(limit)
|
|
46
|
-
self._semaphores[model] = sem
|
|
47
|
-
logger.debug(
|
|
48
|
-
"ModelConcurrencyLimiter: created semaphore model=%s limit=%d",
|
|
49
|
-
model,
|
|
50
|
-
limit,
|
|
51
|
-
)
|
|
52
|
-
return sem
|
|
53
|
-
|
|
54
|
-
async def acquire(self, model: str) -> asyncio.Semaphore:
|
|
55
|
-
"""获取指定模型的并发槽位,必要时阻塞排队.
|
|
56
|
-
|
|
57
|
-
返回已获取的 Semaphore 实例,调用方负责在请求完成后调用 ``release()``。
|
|
58
|
-
"""
|
|
59
|
-
sem = self._get_semaphore(model)
|
|
60
|
-
await sem.acquire()
|
|
61
|
-
return sem
|
|
62
|
-
|
|
63
|
-
def get_diagnostics(self) -> dict[str, dict[str, int]]:
|
|
64
|
-
"""返回每个模型的并发状态快照(用于可观测性)."""
|
|
65
|
-
snapshot: dict[str, dict[str, int]] = {}
|
|
66
|
-
for model, sem in self._semaphores.items():
|
|
67
|
-
limit = self._config.get_limit(model)
|
|
68
|
-
# asyncio.Semaphore 内部 _value 表示剩余可用槽位
|
|
69
|
-
available = sem._value # noqa: SLF001 — 公开 API 未暴露
|
|
70
|
-
in_use = max(limit - available, 0)
|
|
71
|
-
# _waiters 为正在排队等待的协程集合,无等待者时为 None
|
|
72
|
-
waiters = getattr(sem, "_waiters", None) # noqa: SLF001
|
|
73
|
-
pending = len(waiters) if waiters else 0
|
|
74
|
-
snapshot[model] = {
|
|
75
|
-
"limit": limit,
|
|
76
|
-
"in_use": in_use,
|
|
77
|
-
"available": max(available, 0),
|
|
78
|
-
"pending": pending,
|
|
79
|
-
}
|
|
80
|
-
return snapshot
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
__all__ = ["ModelConcurrencyLimiter"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|