coding-proxy 0.4.1a12__tar.gz → 0.5.1a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/AGENTS.md +3 -2
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/CHANGELOG.md +28 -1
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/PKG-INFO +1 -1
- coding_proxy-0.5.1a1/assets/model-calling-v0.5.0.png +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/pyproject.toml +1 -1
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/executor.py +14 -10
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/server/dashboard.py +36 -15
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/server/routes.py +6 -7
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/base.py +25 -0
- coding_proxy-0.5.1a1/src/coding/proxy/vendors/concurrency.py +251 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/zhipu.py +42 -102
- coding_proxy-0.5.1a1/tests/test_concurrency_monitor.py +158 -0
- coding_proxy-0.5.1a1/tests/test_executor_in_flight_tracking.py +233 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_router_executor.py +5 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_zhipu_concurrency.py +164 -72
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/uv.lock +1 -1
- coding_proxy-0.4.1a12/src/coding/proxy/vendors/concurrency.py +0 -162
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/.github/workflows/ci.yml +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/.github/workflows/coverage.yml +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/.github/workflows/release.yml +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/.gitignore +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/.pre-commit-config.yaml +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/CLAUDE.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/LICENSE +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/README.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/assets/dashboard-v0.4.0.png +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/assets/session-v0.4.0.png +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/agents/browser-validation.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/agents/issue.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/agents/knowledge-map.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/agents/reference-specifications.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/arch/config-reference.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/arch/convert.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/arch/design-patterns.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/arch/routing.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/arch/testing.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/arch/vendors.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/framework.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/guide/api-reference.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/guide/cli-reference.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/guide/dashboard.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/guide/monitoring.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/guide/quickstart.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/guide/vendors.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/ops/ci-cd.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/user-guide.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/docs/zh-CN/README.md +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/__main__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/base.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/github.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/google.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/runtime.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/store.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/cli/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/cli/auth_commands.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/cli/banner.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/compat/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/compat/canonical.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/compat/session_store.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/auth_schema.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/config.default.yaml +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/loader.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/resiliency.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/routing.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/schema.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/server.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/session_policy.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/config/vendors.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/anthropic_to_gemini.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/anthropic_to_openai.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/gemini_sse_adapter.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/gemini_to_anthropic.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/openai_to_anthropic.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/vendor_channels.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/db.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/formatters.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/stats.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/model/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/model/auth.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/model/compat.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/model/constants.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/model/pricing.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/model/token.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/model/vendor.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/config.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/anthropic.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/gemini.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/openai.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/handler.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/operation.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/routes.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/usage_registry.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/pricing.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/circuit_breaker.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/error_classifier.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/model_mapper.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/quota_guard.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/rate_limit.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/retry.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/router.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/session_manager.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/session_policy.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/tier.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/usage_parser.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/usage_recorder.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/server/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/server/app.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/server/factory.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/server/responses.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/streaming/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/streaming/anthropic_compat.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/alibaba.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/anthropic.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/antigravity.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot_models.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot_token_manager.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot_urls.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/doubao.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/kimi.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/minimax.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/mixins.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/native_anthropic.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/token_manager.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/xiaomi.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/e2e/__init__.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/e2e/conftest.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/e2e/test_e2e_http.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/e2e/test_e2e_token.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/e2e/test_e2e_vendor.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_antigravity.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_app_routes.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_auto_login.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_banner.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_circuit_breaker.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_cli_usage.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_compat.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_config_init.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_config_loader.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_convert_request.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_convert_response.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_convert_sse.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_copilot.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_copilot_convert_request.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_copilot_convert_response.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_copilot_models.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_copilot_urls.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_currency.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_error_classifier.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_logging_dual_write.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_mixins.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_model_auth.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_model_compat.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_model_constants.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_model_mapper.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_model_pricing.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_model_token.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_model_vendor.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_native_api_base_url_override.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_native_api_extractors.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_native_api_handler.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_native_api_operation.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_native_api_routes.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_native_vendors.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_parse_usage.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_parse_usage_gemini.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_pricing.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_quota_guard.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_rate_limit.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_router_chain.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_runtime_reauth.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_schema.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_session_aware.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_streaming_anthropic_compat.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_tier.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_tiers_config.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_time_range.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_token_logger.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_token_logger_native_columns.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_token_manager.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_types.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_vendor_channels.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_vendor_streaming.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_vendors.py +0 -0
- {coding_proxy-0.4.1a12 → coding_proxy-0.5.1a1}/tests/test_zhipu.py +0 -0
|
@@ -44,14 +44,15 @@
|
|
|
44
44
|
1. **Python**: 严禁使用 pip/poetry,**必须**统一使用 `uv` 进行包管理与脚本执行(如 `uv run`);
|
|
45
45
|
2. **JavaScript/TypeScript**: 严禁使用 npm/yarn,**必须**统一使用 `pnpm` 进行包管理与脚本执行;
|
|
46
46
|
- **Database Management**: 谨慎操作,数据迁移、测试等操作严禁将现有数据删除,谨慎操作数据迁移的回滚,防止数据被清理。
|
|
47
|
+
- **In-depth and close to the facts**:系统且全面地进行问题的分析,深入贴近事实,如有疑问,需先发问,不要乱做决定。
|
|
47
48
|
- **Browser Validation Protocol (浏览器验证准则)**:Agent 不得自行完成、绕过或模拟任何 OAuth / SSO 认证流程,所有登录态均来源于用户已认证的 Chrome 主 profile(真实用户登录态)。完整协议(连通性自检、凭证管理、E2E 集成、实机回归等)详见 [浏览器验证协议](./docs/agents/browser-validation.md);
|
|
48
49
|
1. **安全红线**:禁止在 Sandbox 浏览器中跳转 Google 同意屏;禁止以模拟用户或第三方账号替代真实登录态;禁止要求用户在 chat 中粘贴密码、Cookie 或验证码;
|
|
49
50
|
- **Knowledge Map (知识索引)**:项目所有文档索引统一维护在 [知识索引](./docs/agents/knowledge-map.md),并在文档目录变更时即时同步跟新;
|
|
50
51
|
- **Documentation Standards (文档规范)**:
|
|
51
|
-
1. **Visual Documentation (图文并茂)**: 对于复杂逻辑,优先 **Mermaid Visualization Norms (Mermaid 可视化规范)
|
|
52
|
+
1. **Visual Documentation (图文并茂)**: 对于复杂逻辑,优先 **Mermaid Visualization Norms (Mermaid 可视化规范)**,构建”图文并茂”的直观文档;
|
|
52
53
|
- **色彩语义与兼容性**:为图表节点配置具备语义辨识度的色彩,并确保在深色模式(Dark Mode)下具有极高的对比度与清晰度;
|
|
53
54
|
- **逻辑模块化解构**:针对业务跨度较大的架构流程,强制采用 `subgraph` 容器进行层级解构与边界划分,以增强图表的自解说(Self-explaining)能力;
|
|
54
55
|
2. **语言叙事**:用语精准,叙事完备,行文专业,聚焦核心,篇幅精炼,形象具体,体现真实作用与用户吸引性,字数恰当;
|
|
55
|
-
3. **Direct Hyperlinking (直接跳转)**: 在文档中提及 Repo 内其他资源(文档/代码)时,**必须**构建可跳转的相对路径链接(如 `[Doc Name](./path.md)
|
|
56
|
+
3. **Direct Hyperlinking (直接跳转)**: 在文档中提及 Repo 内其他资源(文档/代码)时,**必须**构建可跳转的相对路径链接(如 `[Doc Name](./path.md)`),严禁使用”死文本”引用,以降低信息检索熵;
|
|
56
57
|
4. **实操截图**:文档需要引入必要的浏览器实操截图时,需自行通过默认浏览器打开相关页面,通过实操现场截图并保留到文档路径进行文档引用;
|
|
57
58
|
- **Reference Specifications (IEEE)**:为保障工程决策的可追溯性与学术严谨性,核心引用需遵循 [reference-specifications.md](docs/agents/reference-specifications.md)IEEE 标准引用格式;
|
|
@@ -4,7 +4,34 @@
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
-
- feat(
|
|
7
|
+
- feat(dashboard): Model Calling 实时监控扩展至全 vendor / 全 model(仅 CC 场景),其他 vendor 在 monitor 模式下仅计数不限流,Zhipu 保留 limited 模式 + FIFO 排队;
|
|
8
|
+
- feat(concurrency): 新增 `peak_pending_recent` 最近 10s 排队峰值追踪,瞬时排队释放后前端仍可见"曾排队 N" 余晖徽章;
|
|
9
|
+
- perf(dashboard): Model Calling 轮询间隔由 5000ms 缩短至 1500ms,提升瞬时排队可观测性;
|
|
10
|
+
- refactor(vendors): `ModelConcurrencyLimiter` 重构为 `ModelConcurrencyController`,统一 monitor / limited 双模式抽象(保留旧名别名);并发控制由 vendor 内部迁移至 executor 层 `track_in_flight` 包裹,行为对所有 vendor 一致;
|
|
11
|
+
|
|
12
|
+
## [v0.5.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.5.0) - 2026-05-27
|
|
13
|
+
|
|
14
|
+
> [!IMPORTANT]
|
|
15
|
+
>
|
|
16
|
+
> **🚀 Model Calling 实时状态!**
|
|
17
|
+
>
|
|
18
|
+
> 模型并发与排队深度一目了然,运行时动态调整每个模型并行度,预防 vendor 侧的 429 幺蛾子。
|
|
19
|
+
|
|
20
|
+

|
|
21
|
+
|
|
22
|
+
### ✨ 核心亮点
|
|
23
|
+
|
|
24
|
+
- feat(concurrency): 新增 Model Calling 实时状态模块,可视化每模型并发与排队深度,支持运行时动态修改每模型并行度 (#250) (#251)
|
|
25
|
+
- feat(zhipu): 新增每模型并发限制,默认 3 个并行请求 FIFO 排队 (#248)
|
|
26
|
+
- feat(zhipu): 为 429 Rate Limit 添加指数退避重试挽回机制 (#242)
|
|
27
|
+
|
|
28
|
+
### 🔧 更多特性
|
|
29
|
+
|
|
30
|
+
- fix(antigravity): 修复 v1internal 模式检测逻辑并新增 E2E 测试; (#234)
|
|
31
|
+
- fix(routes): 修复 count_tokens 路由对 target_vendor.name 的错误属性访问; (#235)
|
|
32
|
+
- fix(vendor-channels): 修复 zhipu→anthropic 通道 tool_use/tool_result 配对漏洞; (#236)
|
|
33
|
+
- fix(native-api): 修复 Gemini :verb 路径中 %3A URL 编码导致上游 400 的兼容问题; (#237)
|
|
34
|
+
- fix(zhipu): 诊断首选 tier 语义拒绝降级问题,增强可观测性并提取跨供应商清洗共享函数 (#243)
|
|
8
35
|
|
|
9
36
|
## [v0.4.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.4.0) — 2026-05-01
|
|
10
37
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coding-proxy
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1a1
|
|
4
4
|
Summary: A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao...
|
|
5
5
|
Project-URL: Source Code, https://github.com/ThreeFish-AI/coding-proxy
|
|
6
6
|
Project-URL: User Guide, https://github.com/ThreeFish-AI/coding-proxy/blob/master/docs/user-guide.md
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "coding-proxy"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.1a1"
|
|
4
4
|
description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -689,15 +689,17 @@ class _RouteExecutor:
|
|
|
689
689
|
tier.name, failed_tier_name, session_record, body
|
|
690
690
|
)
|
|
691
691
|
body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
692
|
+
_mapped_model = tier.vendor.map_model(body.get("model", ""))
|
|
693
|
+
async with tier.vendor.track_in_flight(_mapped_model):
|
|
694
|
+
async for chunk in tier.vendor.send_message_stream(
|
|
695
|
+
body_for_tier, headers
|
|
696
|
+
):
|
|
697
|
+
parse_usage_from_chunk(
|
|
698
|
+
chunk,
|
|
699
|
+
usage,
|
|
700
|
+
vendor_label=_VENDOR_PROTOCOL_LABEL_MAP.get(tier.name),
|
|
701
|
+
)
|
|
702
|
+
yield chunk, tier.name
|
|
701
703
|
|
|
702
704
|
info = self._recorder.build_usage_info(usage)
|
|
703
705
|
if has_missing_input_usage_signals(info):
|
|
@@ -863,7 +865,9 @@ class _RouteExecutor:
|
|
|
863
865
|
tier.name, failed_tier_name, session_record, body
|
|
864
866
|
)
|
|
865
867
|
body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
|
|
866
|
-
|
|
868
|
+
_mapped_model = tier.vendor.map_model(body.get("model", ""))
|
|
869
|
+
async with tier.vendor.track_in_flight(_mapped_model):
|
|
870
|
+
resp = await tier.vendor.send_message(body_for_tier, headers)
|
|
867
871
|
|
|
868
872
|
if resp.status_code < 400:
|
|
869
873
|
duration = int((time.monotonic() - start) * 1000)
|
|
@@ -629,6 +629,10 @@ _DASHBOARD_HTML = """<!DOCTYPE html>
|
|
|
629
629
|
background: rgba(251,146,60,.15);
|
|
630
630
|
color: #fb923c;
|
|
631
631
|
}
|
|
632
|
+
.mc-badge-peak {
|
|
633
|
+
background: rgba(148,163,184,.12);
|
|
634
|
+
color: #94a3b8;
|
|
635
|
+
}
|
|
632
636
|
.mc-badge-active {
|
|
633
637
|
background: rgba(74,222,128,.12);
|
|
634
638
|
color: #4ade80;
|
|
@@ -1282,10 +1286,12 @@ function updateModelCalling(status) {
|
|
|
1282
1286
|
models.push({
|
|
1283
1287
|
vendor: tier.name,
|
|
1284
1288
|
model: model,
|
|
1285
|
-
|
|
1289
|
+
mode: d.mode || 'limited',
|
|
1290
|
+
limit: d.limit,
|
|
1286
1291
|
in_use: d.in_use || 0,
|
|
1287
|
-
available: d.available
|
|
1292
|
+
available: d.available,
|
|
1288
1293
|
pending: d.pending || 0,
|
|
1294
|
+
peak_pending_recent: d.peak_pending_recent || 0,
|
|
1289
1295
|
});
|
|
1290
1296
|
}
|
|
1291
1297
|
}
|
|
@@ -1298,18 +1304,33 @@ function updateModelCalling(status) {
|
|
|
1298
1304
|
var html = '<div class="mc-grid">';
|
|
1299
1305
|
for (var k = 0; k < models.length; k++) {
|
|
1300
1306
|
var m = models[k];
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
+
|
|
1307
|
+
|
|
1308
|
+
if (m.mode === 'monitor') {
|
|
1309
|
+
// monitor 模式:纯计数徽章,无 limit/进度条
|
|
1310
|
+
html += '<div class="mc-model-row">'
|
|
1311
|
+
+ '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
|
|
1312
|
+
+ '<div class="mc-bar-wrap"></div>'
|
|
1313
|
+
+ '<div class="mc-stats">'
|
|
1314
|
+
+ '<span class="mc-badge mc-badge-active">' + m.in_use + '</span>'
|
|
1315
|
+
+ '</div>'
|
|
1316
|
+
+ '</div>';
|
|
1317
|
+
} else {
|
|
1318
|
+
// limited 模式:保留现有渲染(进度条 + limit 编辑)
|
|
1319
|
+
var limit = m.limit || 0;
|
|
1320
|
+
var pct = limit > 0 ? Math.round((m.in_use / limit) * 100) : 0;
|
|
1321
|
+
var barClass = pct <= 50 ? 'mc-low' : (pct <= 80 ? 'mc-mid' : 'mc-high');
|
|
1322
|
+
|
|
1323
|
+
html += '<div class="mc-model-row">'
|
|
1324
|
+
+ '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
|
|
1325
|
+
+ '<div class="mc-bar-wrap"><div class="mc-bar-fill ' + barClass + '" style="width:' + pct + '%"></div></div>'
|
|
1326
|
+
+ '<div class="mc-stats">'
|
|
1327
|
+
+ '<span class="mc-badge mc-badge-active">' + m.in_use
|
|
1328
|
+
+ '/<span class="mc-limit-editable" data-tier="' + escapeHtml(m.vendor) + '" data-model="' + escapeHtml(m.model) + '" data-limit="' + limit + '" title="点击修改并行度">' + limit + '</span></span>'
|
|
1329
|
+
+ (m.pending > 0 ? '<span class="mc-badge mc-badge-pending">⏳ ' + m.pending + '</span>' : '')
|
|
1330
|
+
+ (m.pending === 0 && m.peak_pending_recent > 0 ? '<span class="mc-badge mc-badge-peak">🕘 曾排队 ' + m.peak_pending_recent + '</span>' : '')
|
|
1331
|
+
+ '</div>'
|
|
1332
|
+
+ '</div>';
|
|
1333
|
+
}
|
|
1313
1334
|
}
|
|
1314
1335
|
html += '</div>';
|
|
1315
1336
|
wrap.innerHTML = html;
|
|
@@ -1325,7 +1346,7 @@ function startModelCallingPoll() {
|
|
|
1325
1346
|
}).catch(function() {});
|
|
1326
1347
|
}
|
|
1327
1348
|
tick();
|
|
1328
|
-
_mcTimer = setInterval(tick,
|
|
1349
|
+
_mcTimer = setInterval(tick, 1500);
|
|
1329
1350
|
}
|
|
1330
1351
|
function stopModelCallingPoll() {
|
|
1331
1352
|
if (_mcTimer) { clearInterval(_mcTimer); _mcTimer = null; }
|
|
@@ -254,16 +254,15 @@ def register_concurrency_route(app: Any, router: Any) -> None:
|
|
|
254
254
|
for tier in router.tiers:
|
|
255
255
|
if tier.name == tier_name:
|
|
256
256
|
vendor = tier.vendor
|
|
257
|
-
|
|
258
|
-
|
|
257
|
+
try:
|
|
258
|
+
vendor.update_concurrency(model, limit)
|
|
259
|
+
except ValueError as exc:
|
|
259
260
|
return json_error_response(
|
|
260
|
-
|
|
261
|
+
422,
|
|
261
262
|
error_type="invalid_request_error",
|
|
262
|
-
message=
|
|
263
|
+
message=str(exc),
|
|
263
264
|
)
|
|
264
|
-
|
|
265
|
-
update_fn(model, limit)
|
|
266
|
-
except (ValueError, AttributeError) as exc:
|
|
265
|
+
except AttributeError as exc:
|
|
267
266
|
return json_error_response(
|
|
268
267
|
400, error_type="invalid_request_error", message=str(exc)
|
|
269
268
|
)
|
|
@@ -44,6 +44,7 @@ from ..compat.canonical import (
|
|
|
44
44
|
)
|
|
45
45
|
from ..compat.session_store import CompatSessionRecord
|
|
46
46
|
from ..config.schema import FailoverConfig
|
|
47
|
+
from .concurrency import ModelConcurrencyController
|
|
47
48
|
|
|
48
49
|
logger = logging.getLogger(__name__)
|
|
49
50
|
|
|
@@ -63,6 +64,8 @@ class BaseVendor(ABC):
|
|
|
63
64
|
self._client: httpx.AsyncClient | None = None
|
|
64
65
|
self._compat_trace: CompatibilityTrace | None = None
|
|
65
66
|
self._compat_session_record: CompatSessionRecord | None = None
|
|
67
|
+
# 默认 monitor 模式(仅计数不限流);子类可覆盖为 limited 模式
|
|
68
|
+
self._concurrency_controller = ModelConcurrencyController(None)
|
|
66
69
|
|
|
67
70
|
def _get_client(self) -> httpx.AsyncClient:
|
|
68
71
|
if self._client is None or self._client.is_closed:
|
|
@@ -246,8 +249,30 @@ class BaseVendor(ABC):
|
|
|
246
249
|
diagnostics: dict[str, Any] = {}
|
|
247
250
|
if self._compat_trace is not None:
|
|
248
251
|
diagnostics["compat"] = self._compat_trace.to_dict()
|
|
252
|
+
concurrency = self._concurrency_controller.get_diagnostics()
|
|
253
|
+
if concurrency:
|
|
254
|
+
diagnostics["concurrency"] = concurrency
|
|
249
255
|
return diagnostics
|
|
250
256
|
|
|
257
|
+
def track_in_flight(self, mapped_model: str):
|
|
258
|
+
"""返回用于追踪在途请求的异步上下文管理器.
|
|
259
|
+
|
|
260
|
+
空 model name 时返回 no-op context(防御性处理)。
|
|
261
|
+
"""
|
|
262
|
+
if not mapped_model:
|
|
263
|
+
from contextlib import nullcontext
|
|
264
|
+
|
|
265
|
+
return nullcontext()
|
|
266
|
+
return self._concurrency_controller.track(mapped_model)
|
|
267
|
+
|
|
268
|
+
def update_concurrency(self, model: str, limit: int) -> None:
|
|
269
|
+
"""运行时更新指定模型的并发限制.
|
|
270
|
+
|
|
271
|
+
默认实现委托给 ``_concurrency_controller.set_limit``。
|
|
272
|
+
monitor 模式下抛 ``ValueError``。
|
|
273
|
+
"""
|
|
274
|
+
self._concurrency_controller.set_limit(model, limit)
|
|
275
|
+
|
|
251
276
|
def should_trigger_failover(
|
|
252
277
|
self, status_code: int, body: dict[str, Any] | None
|
|
253
278
|
) -> bool:
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""统一并发控制器 — 支持监控 (monitor) 与限流 (limited) 双模式.
|
|
2
|
+
|
|
3
|
+
为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``_ConcurrencySlot``,
|
|
4
|
+
根据模式提供不同语义:
|
|
5
|
+
|
|
6
|
+
**monitor 模式** (config=None)
|
|
7
|
+
- 仅计数 ``in_use``,不做排队与限流
|
|
8
|
+
- ``pending`` 恒为 0,``available`` / ``limit`` 为 None
|
|
9
|
+
- 所有 vendor 默认使用此模式
|
|
10
|
+
|
|
11
|
+
**limited 模式** (config 非 None)
|
|
12
|
+
- ``in_use`` 不超过 ``limit`` 时立即获取,超限时 FIFO 排队
|
|
13
|
+
- ``pending`` 反映当前排队数,``peak_pending_recent`` 记录最近 10s 峰值
|
|
14
|
+
- 由 ZhipuVendor 等需限流的 vendor 启用
|
|
15
|
+
|
|
16
|
+
设计要点:
|
|
17
|
+
- **惰性创建**:仅在首次请求到达时才为该模型创建 Slot,避免冷启动开销
|
|
18
|
+
- **FIFO 公平**:``asyncio.Event`` + while 循环天然满足 FIFO 排队语义(limited 模式)
|
|
19
|
+
- **动态调整**:支持运行时修改 per-model limit,无需重启进程
|
|
20
|
+
- **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名
|
|
21
|
+
- **峰值余晖**:记录 ``peak_pending_recent`` 使瞬时排队可观测
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import asyncio
|
|
27
|
+
import logging
|
|
28
|
+
import time
|
|
29
|
+
from collections import deque
|
|
30
|
+
from contextlib import asynccontextmanager
|
|
31
|
+
from typing import Any, Literal
|
|
32
|
+
|
|
33
|
+
from ..config.vendors import ZhipuConcurrencyConfig
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
# peak_pending_recent 滑窗宽度(秒)
|
|
38
|
+
_PEAK_WINDOW_SECONDS = 10.0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class _ConcurrencySlot:
|
|
42
|
+
"""支持双模式的并发槽位.
|
|
43
|
+
|
|
44
|
+
``limit=None`` (monitor) 时 acquire 走 fast path,仅计数。
|
|
45
|
+
``limit>0`` (limited) 时在满槽位后 FIFO 排队等待。
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, limit: int | None) -> None:
|
|
49
|
+
self._limit = limit
|
|
50
|
+
self._in_use: int = 0
|
|
51
|
+
self._pending: int = 0
|
|
52
|
+
self._wake = asyncio.Event()
|
|
53
|
+
self._wake.set()
|
|
54
|
+
# peak_pending_recent 追踪:存储 (timestamp, pending_value) 元组
|
|
55
|
+
self._peak_samples: deque[tuple[float, int]] = deque()
|
|
56
|
+
|
|
57
|
+
async def acquire(self) -> None:
|
|
58
|
+
"""获取一个并发槽位.
|
|
59
|
+
|
|
60
|
+
monitor 模式 (limit=None):仅 in_use++,永不排队。
|
|
61
|
+
limited 模式 (limit>0):满槽时阻塞等待。
|
|
62
|
+
"""
|
|
63
|
+
# monitor 模式:仅计数
|
|
64
|
+
if self._limit is None:
|
|
65
|
+
self._in_use += 1
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# limited — fast path
|
|
69
|
+
if self._in_use < self._limit:
|
|
70
|
+
self._in_use += 1
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
# limited — slow path: FIFO 排队
|
|
74
|
+
self._pending += 1
|
|
75
|
+
self._observe_peak()
|
|
76
|
+
try:
|
|
77
|
+
while True:
|
|
78
|
+
self._wake.clear()
|
|
79
|
+
await self._wake.wait()
|
|
80
|
+
if self._in_use < self._limit:
|
|
81
|
+
self._in_use += 1
|
|
82
|
+
return
|
|
83
|
+
finally:
|
|
84
|
+
self._pending -= 1
|
|
85
|
+
|
|
86
|
+
def release(self) -> None:
|
|
87
|
+
"""释放一个并发槽位."""
|
|
88
|
+
self._in_use = max(0, self._in_use - 1)
|
|
89
|
+
if self._limit is not None:
|
|
90
|
+
self._wake.set()
|
|
91
|
+
|
|
92
|
+
def set_limit(self, new_limit: int) -> None:
|
|
93
|
+
"""动态调整并发上限.
|
|
94
|
+
|
|
95
|
+
仅 limited 模式有效;monitor 模式调用抛 ValueError。
|
|
96
|
+
"""
|
|
97
|
+
if self._limit is None:
|
|
98
|
+
msg = "Cannot set limit on monitor-only slot"
|
|
99
|
+
raise ValueError(msg)
|
|
100
|
+
self._limit = new_limit
|
|
101
|
+
self._wake.set()
|
|
102
|
+
|
|
103
|
+
def _observe_peak(self) -> None:
|
|
104
|
+
"""记录当前 pending 值作为峰值采样点."""
|
|
105
|
+
now = time.monotonic()
|
|
106
|
+
self._peak_samples.append((now, self._pending))
|
|
107
|
+
|
|
108
|
+
def _get_peak_pending_recent(self) -> int:
|
|
109
|
+
"""获取最近窗口内的 peak pending 值."""
|
|
110
|
+
cutoff = time.monotonic() - _PEAK_WINDOW_SECONDS
|
|
111
|
+
# 剔除过期采样
|
|
112
|
+
while self._peak_samples and self._peak_samples[0][0] < cutoff:
|
|
113
|
+
self._peak_samples.popleft()
|
|
114
|
+
if not self._peak_samples:
|
|
115
|
+
return 0
|
|
116
|
+
return max(v for _, v in self._peak_samples)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def limit(self) -> int | None:
|
|
120
|
+
return self._limit
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def in_use(self) -> int:
|
|
124
|
+
return self._in_use
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def available(self) -> int | None:
|
|
128
|
+
if self._limit is None:
|
|
129
|
+
return None
|
|
130
|
+
return max(0, self._limit - self._in_use)
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def pending(self) -> int:
|
|
134
|
+
return self._pending
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def peak_pending_recent(self) -> int:
|
|
138
|
+
return self._get_peak_pending_recent()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ModelConcurrencyController:
|
|
142
|
+
"""按模型名提供独立并发槽位的控制器.
|
|
143
|
+
|
|
144
|
+
用法::
|
|
145
|
+
|
|
146
|
+
# monitor 模式(默认)
|
|
147
|
+
ctrl = ModelConcurrencyController(None)
|
|
148
|
+
async with ctrl.track("model-a"):
|
|
149
|
+
... # 执行请求
|
|
150
|
+
|
|
151
|
+
# limited 模式(Zhipu 等)
|
|
152
|
+
ctrl = ModelConcurrencyController(config)
|
|
153
|
+
async with ctrl.track("glm-5v-turbo"):
|
|
154
|
+
... # 满槽时排队等待
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
def __init__(self, config: ZhipuConcurrencyConfig | None) -> None:
|
|
158
|
+
self._config = config
|
|
159
|
+
self._slots: dict[str, _ConcurrencySlot] = {}
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def mode(self) -> Literal["monitor", "limited"]:
|
|
163
|
+
"""当前控制器模式."""
|
|
164
|
+
return "limited" if self._config is not None else "monitor"
|
|
165
|
+
|
|
166
|
+
def _get_or_create_slot(self, model: str) -> _ConcurrencySlot:
|
|
167
|
+
"""获取(或惰性创建)指定模型的并发槽位."""
|
|
168
|
+
slot = self._slots.get(model)
|
|
169
|
+
if slot is None:
|
|
170
|
+
if self._config is not None:
|
|
171
|
+
limit = self._config.get_limit(model)
|
|
172
|
+
else:
|
|
173
|
+
limit = None
|
|
174
|
+
slot = _ConcurrencySlot(limit)
|
|
175
|
+
self._slots[model] = slot
|
|
176
|
+
if self._config is not None:
|
|
177
|
+
logger.debug(
|
|
178
|
+
"ModelConcurrencyController: created slot mode=limited "
|
|
179
|
+
"model=%s limit=%d",
|
|
180
|
+
model,
|
|
181
|
+
limit,
|
|
182
|
+
)
|
|
183
|
+
else:
|
|
184
|
+
logger.debug(
|
|
185
|
+
"ModelConcurrencyController: created slot mode=monitor model=%s",
|
|
186
|
+
model,
|
|
187
|
+
)
|
|
188
|
+
return slot
|
|
189
|
+
|
|
190
|
+
@asynccontextmanager
|
|
191
|
+
async def track(self, model: str):
|
|
192
|
+
"""异步上下文管理器:获取 → 执行 → 释放.
|
|
193
|
+
|
|
194
|
+
用法::
|
|
195
|
+
|
|
196
|
+
async with controller.track("glm-5v-turbo"):
|
|
197
|
+
await vendor.send_message(...)
|
|
198
|
+
"""
|
|
199
|
+
slot = self._get_or_create_slot(model)
|
|
200
|
+
await slot.acquire()
|
|
201
|
+
try:
|
|
202
|
+
yield
|
|
203
|
+
finally:
|
|
204
|
+
slot.release()
|
|
205
|
+
|
|
206
|
+
def set_limit(self, model: str, new_limit: int) -> None:
|
|
207
|
+
"""运行时修改指定模型的并发上限.
|
|
208
|
+
|
|
209
|
+
仅 limited 模式支持;monitor 模式抛 ValueError。
|
|
210
|
+
"""
|
|
211
|
+
if self._config is None:
|
|
212
|
+
msg = f"vendor is monitor-only; cannot update limit for model '{model}'"
|
|
213
|
+
raise ValueError(msg)
|
|
214
|
+
slot = self._slots.get(model)
|
|
215
|
+
if slot is None:
|
|
216
|
+
slot = _ConcurrencySlot(new_limit)
|
|
217
|
+
self._slots[model] = slot
|
|
218
|
+
else:
|
|
219
|
+
slot.set_limit(new_limit)
|
|
220
|
+
self._config.models[model] = new_limit
|
|
221
|
+
logger.info(
|
|
222
|
+
"ModelConcurrencyController: updated limit model=%s new_limit=%d",
|
|
223
|
+
model,
|
|
224
|
+
new_limit,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def get_diagnostics(self) -> dict[str, dict[str, Any]]:
|
|
228
|
+
"""返回每个模型的并发状态快照(用于可观测性)."""
|
|
229
|
+
snapshot: dict[str, dict[str, Any]] = {}
|
|
230
|
+
mode = self.mode
|
|
231
|
+
for model, slot in self._slots.items():
|
|
232
|
+
entry: dict[str, Any] = {
|
|
233
|
+
"mode": mode,
|
|
234
|
+
"in_use": slot.in_use,
|
|
235
|
+
"pending": slot.pending,
|
|
236
|
+
"peak_pending_recent": slot.peak_pending_recent,
|
|
237
|
+
}
|
|
238
|
+
if mode == "limited":
|
|
239
|
+
entry["limit"] = slot.limit
|
|
240
|
+
entry["available"] = slot.available
|
|
241
|
+
else:
|
|
242
|
+
entry["limit"] = None
|
|
243
|
+
entry["available"] = None
|
|
244
|
+
snapshot[model] = entry
|
|
245
|
+
return snapshot
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# 向后兼容别名
|
|
249
|
+
ModelConcurrencyLimiter = ModelConcurrencyController
|
|
250
|
+
|
|
251
|
+
__all__ = ["ModelConcurrencyController", "ModelConcurrencyLimiter"]
|