coding-proxy 0.5.0__tar.gz → 0.5.1a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/CHANGELOG.md +5 -0
  2. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/PKG-INFO +1 -1
  3. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/pyproject.toml +1 -1
  4. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/executor.py +14 -10
  5. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/server/dashboard.py +36 -15
  6. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/server/routes.py +6 -7
  7. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/base.py +25 -0
  8. coding_proxy-0.5.1a1/src/coding/proxy/vendors/concurrency.py +251 -0
  9. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/zhipu.py +42 -102
  10. coding_proxy-0.5.1a1/tests/test_concurrency_monitor.py +158 -0
  11. coding_proxy-0.5.1a1/tests/test_executor_in_flight_tracking.py +233 -0
  12. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_router_executor.py +5 -0
  13. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_zhipu_concurrency.py +164 -72
  14. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/uv.lock +1 -1
  15. coding_proxy-0.5.0/src/coding/proxy/vendors/concurrency.py +0 -162
  16. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/.github/workflows/ci.yml +0 -0
  17. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/.github/workflows/coverage.yml +0 -0
  18. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/.github/workflows/release.yml +0 -0
  19. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/.gitignore +0 -0
  20. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/.pre-commit-config.yaml +0 -0
  21. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/AGENTS.md +0 -0
  22. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/CLAUDE.md +0 -0
  23. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/LICENSE +0 -0
  24. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/README.md +0 -0
  25. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/assets/dashboard-v0.4.0.png +0 -0
  26. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/assets/model-calling-v0.5.0.png +0 -0
  27. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/assets/session-v0.4.0.png +0 -0
  28. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/agents/browser-validation.md +0 -0
  29. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/agents/issue.md +0 -0
  30. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/agents/knowledge-map.md +0 -0
  31. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/agents/reference-specifications.md +0 -0
  32. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/arch/config-reference.md +0 -0
  33. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/arch/convert.md +0 -0
  34. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/arch/design-patterns.md +0 -0
  35. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/arch/routing.md +0 -0
  36. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/arch/testing.md +0 -0
  37. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/arch/vendors.md +0 -0
  38. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/framework.md +0 -0
  39. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/guide/api-reference.md +0 -0
  40. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/guide/cli-reference.md +0 -0
  41. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/guide/dashboard.md +0 -0
  42. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/guide/monitoring.md +0 -0
  43. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/guide/quickstart.md +0 -0
  44. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/guide/vendors.md +0 -0
  45. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/ops/ci-cd.md +0 -0
  46. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/user-guide.md +0 -0
  47. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/docs/zh-CN/README.md +0 -0
  48. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/__init__.py +0 -0
  49. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/__init__.py +0 -0
  50. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/__main__.py +0 -0
  51. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/__init__.py +0 -0
  52. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/__init__.py +0 -0
  53. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/base.py +0 -0
  54. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/github.py +0 -0
  55. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/providers/google.py +0 -0
  56. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/runtime.py +0 -0
  57. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/auth/store.py +0 -0
  58. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/cli/__init__.py +0 -0
  59. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/cli/auth_commands.py +0 -0
  60. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/cli/banner.py +0 -0
  61. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/compat/__init__.py +0 -0
  62. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/compat/canonical.py +0 -0
  63. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/compat/session_store.py +0 -0
  64. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/__init__.py +0 -0
  65. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/auth_schema.py +0 -0
  66. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/config.default.yaml +0 -0
  67. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/loader.py +0 -0
  68. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/resiliency.py +0 -0
  69. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/routing.py +0 -0
  70. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/schema.py +0 -0
  71. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/server.py +0 -0
  72. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/session_policy.py +0 -0
  73. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/config/vendors.py +0 -0
  74. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/__init__.py +0 -0
  75. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/anthropic_to_gemini.py +0 -0
  76. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/anthropic_to_openai.py +0 -0
  77. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/gemini_sse_adapter.py +0 -0
  78. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/gemini_to_anthropic.py +0 -0
  79. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/openai_to_anthropic.py +0 -0
  80. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/convert/vendor_channels.py +0 -0
  81. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/__init__.py +0 -0
  82. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/db.py +0 -0
  83. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/formatters.py +0 -0
  84. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/logging/stats.py +0 -0
  85. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/model/__init__.py +0 -0
  86. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/model/auth.py +0 -0
  87. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/model/compat.py +0 -0
  88. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/model/constants.py +0 -0
  89. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/model/pricing.py +0 -0
  90. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/model/token.py +0 -0
  91. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/model/vendor.py +0 -0
  92. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/__init__.py +0 -0
  93. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/config.py +0 -0
  94. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/__init__.py +0 -0
  95. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/anthropic.py +0 -0
  96. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/gemini.py +0 -0
  97. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/extractors/openai.py +0 -0
  98. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/handler.py +0 -0
  99. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/operation.py +0 -0
  100. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/routes.py +0 -0
  101. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/native_api/usage_registry.py +0 -0
  102. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/pricing.py +0 -0
  103. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/__init__.py +0 -0
  104. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/circuit_breaker.py +0 -0
  105. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/error_classifier.py +0 -0
  106. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/model_mapper.py +0 -0
  107. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/quota_guard.py +0 -0
  108. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/rate_limit.py +0 -0
  109. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/retry.py +0 -0
  110. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/router.py +0 -0
  111. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/session_manager.py +0 -0
  112. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/session_policy.py +0 -0
  113. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/tier.py +0 -0
  114. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/usage_parser.py +0 -0
  115. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/routing/usage_recorder.py +0 -0
  116. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/server/__init__.py +0 -0
  117. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/server/app.py +0 -0
  118. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/server/factory.py +0 -0
  119. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/server/responses.py +0 -0
  120. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/streaming/__init__.py +0 -0
  121. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/streaming/anthropic_compat.py +0 -0
  122. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/__init__.py +0 -0
  123. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/alibaba.py +0 -0
  124. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/anthropic.py +0 -0
  125. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/antigravity.py +0 -0
  126. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot.py +0 -0
  127. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot_models.py +0 -0
  128. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot_token_manager.py +0 -0
  129. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/copilot_urls.py +0 -0
  130. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/doubao.py +0 -0
  131. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/kimi.py +0 -0
  132. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/minimax.py +0 -0
  133. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/mixins.py +0 -0
  134. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/native_anthropic.py +0 -0
  135. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/token_manager.py +0 -0
  136. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/src/coding/proxy/vendors/xiaomi.py +0 -0
  137. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/__init__.py +0 -0
  138. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/e2e/__init__.py +0 -0
  139. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/e2e/conftest.py +0 -0
  140. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/e2e/test_e2e_http.py +0 -0
  141. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/e2e/test_e2e_token.py +0 -0
  142. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/e2e/test_e2e_vendor.py +0 -0
  143. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_antigravity.py +0 -0
  144. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_app_routes.py +0 -0
  145. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_auto_login.py +0 -0
  146. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_banner.py +0 -0
  147. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_circuit_breaker.py +0 -0
  148. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_cli_usage.py +0 -0
  149. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_compat.py +0 -0
  150. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_config_init.py +0 -0
  151. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_config_loader.py +0 -0
  152. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_convert_request.py +0 -0
  153. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_convert_response.py +0 -0
  154. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_convert_sse.py +0 -0
  155. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_copilot.py +0 -0
  156. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_copilot_convert_request.py +0 -0
  157. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_copilot_convert_response.py +0 -0
  158. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_copilot_models.py +0 -0
  159. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_copilot_urls.py +0 -0
  160. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_currency.py +0 -0
  161. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_error_classifier.py +0 -0
  162. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_logging_dual_write.py +0 -0
  163. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_mixins.py +0 -0
  164. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_model_auth.py +0 -0
  165. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_model_compat.py +0 -0
  166. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_model_constants.py +0 -0
  167. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_model_mapper.py +0 -0
  168. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_model_pricing.py +0 -0
  169. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_model_token.py +0 -0
  170. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_model_vendor.py +0 -0
  171. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_native_api_base_url_override.py +0 -0
  172. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_native_api_extractors.py +0 -0
  173. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_native_api_handler.py +0 -0
  174. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_native_api_operation.py +0 -0
  175. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_native_api_routes.py +0 -0
  176. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_native_vendors.py +0 -0
  177. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_parse_usage.py +0 -0
  178. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_parse_usage_gemini.py +0 -0
  179. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_pricing.py +0 -0
  180. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_quota_guard.py +0 -0
  181. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_rate_limit.py +0 -0
  182. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_router_chain.py +0 -0
  183. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_runtime_reauth.py +0 -0
  184. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_schema.py +0 -0
  185. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_session_aware.py +0 -0
  186. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_streaming_anthropic_compat.py +0 -0
  187. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_tier.py +0 -0
  188. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_tiers_config.py +0 -0
  189. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_time_range.py +0 -0
  190. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_token_logger.py +0 -0
  191. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_token_logger_native_columns.py +0 -0
  192. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_token_manager.py +0 -0
  193. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_types.py +0 -0
  194. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_vendor_channels.py +0 -0
  195. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_vendor_streaming.py +0 -0
  196. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_vendors.py +0 -0
  197. {coding_proxy-0.5.0 → coding_proxy-0.5.1a1}/tests/test_zhipu.py +0 -0
@@ -4,6 +4,11 @@
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ - feat(dashboard): Model Calling 实时监控扩展至全 vendor / 全 model(仅 CC 场景),其他 vendor 在 monitor 模式下仅计数不限流,Zhipu 保留 limited 模式 + FIFO 排队;
8
+ - feat(concurrency): 新增 `peak_pending_recent` 最近 10s 排队峰值追踪,瞬时排队释放后前端仍可见"曾排队 N" 余晖徽章;
9
+ - perf(dashboard): Model Calling 轮询间隔由 5000ms 缩短至 1500ms,提升瞬时排队可观测性;
10
+ - refactor(vendors): `ModelConcurrencyLimiter` 重构为 `ModelConcurrencyController`,统一 monitor / limited 双模式抽象(保留旧名别名);并发控制由 vendor 内部迁移至 executor 层 `track_in_flight` 包裹,行为对所有 vendor 一致;
11
+
7
12
  ## [v0.5.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.5.0) - 2026-05-27
8
13
 
9
14
  > [!IMPORTANT]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coding-proxy
3
- Version: 0.5.0
3
+ Version: 0.5.1a1
4
4
  Summary: A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao...
5
5
  Project-URL: Source Code, https://github.com/ThreeFish-AI/coding-proxy
6
6
  Project-URL: User Guide, https://github.com/ThreeFish-AI/coding-proxy/blob/master/docs/user-guide.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "coding-proxy"
3
- version = "0.5.0"
3
+ version = "0.5.1a1"
4
4
  description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -689,15 +689,17 @@ class _RouteExecutor:
689
689
  tier.name, failed_tier_name, session_record, body
690
690
  )
691
691
  body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
692
- async for chunk in tier.vendor.send_message_stream(
693
- body_for_tier, headers
694
- ):
695
- parse_usage_from_chunk(
696
- chunk,
697
- usage,
698
- vendor_label=_VENDOR_PROTOCOL_LABEL_MAP.get(tier.name),
699
- )
700
- yield chunk, tier.name
692
+ _mapped_model = tier.vendor.map_model(body.get("model", ""))
693
+ async with tier.vendor.track_in_flight(_mapped_model):
694
+ async for chunk in tier.vendor.send_message_stream(
695
+ body_for_tier, headers
696
+ ):
697
+ parse_usage_from_chunk(
698
+ chunk,
699
+ usage,
700
+ vendor_label=_VENDOR_PROTOCOL_LABEL_MAP.get(tier.name),
701
+ )
702
+ yield chunk, tier.name
701
703
 
702
704
  info = self._recorder.build_usage_info(usage)
703
705
  if has_missing_input_usage_signals(info):
@@ -863,7 +865,9 @@ class _RouteExecutor:
863
865
  tier.name, failed_tier_name, session_record, body
864
866
  )
865
867
  body_for_tier = self._prepare_body_for_tier(body, tier, source_vendor)
866
- resp = await tier.vendor.send_message(body_for_tier, headers)
868
+ _mapped_model = tier.vendor.map_model(body.get("model", ""))
869
+ async with tier.vendor.track_in_flight(_mapped_model):
870
+ resp = await tier.vendor.send_message(body_for_tier, headers)
867
871
 
868
872
  if resp.status_code < 400:
869
873
  duration = int((time.monotonic() - start) * 1000)
@@ -629,6 +629,10 @@ _DASHBOARD_HTML = """<!DOCTYPE html>
629
629
  background: rgba(251,146,60,.15);
630
630
  color: #fb923c;
631
631
  }
632
+ .mc-badge-peak {
633
+ background: rgba(148,163,184,.12);
634
+ color: #94a3b8;
635
+ }
632
636
  .mc-badge-active {
633
637
  background: rgba(74,222,128,.12);
634
638
  color: #4ade80;
@@ -1282,10 +1286,12 @@ function updateModelCalling(status) {
1282
1286
  models.push({
1283
1287
  vendor: tier.name,
1284
1288
  model: model,
1285
- limit: d.limit || 0,
1289
+ mode: d.mode || 'limited',
1290
+ limit: d.limit,
1286
1291
  in_use: d.in_use || 0,
1287
- available: d.available || 0,
1292
+ available: d.available,
1288
1293
  pending: d.pending || 0,
1294
+ peak_pending_recent: d.peak_pending_recent || 0,
1289
1295
  });
1290
1296
  }
1291
1297
  }
@@ -1298,18 +1304,33 @@ function updateModelCalling(status) {
1298
1304
  var html = '<div class="mc-grid">';
1299
1305
  for (var k = 0; k < models.length; k++) {
1300
1306
  var m = models[k];
1301
- var pct = m.limit > 0 ? Math.round((m.in_use / m.limit) * 100) : 0;
1302
- var barClass = pct <= 50 ? 'mc-low' : (pct <= 80 ? 'mc-mid' : 'mc-high');
1303
-
1304
- html += '<div class="mc-model-row">'
1305
- + '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
1306
- + '<div class="mc-bar-wrap"><div class="mc-bar-fill ' + barClass + '" style="width:' + pct + '%"></div></div>'
1307
- + '<div class="mc-stats">'
1308
- + '<span class="mc-badge mc-badge-active">' + m.in_use
1309
- + '/<span class="mc-limit-editable" data-tier="' + escapeHtml(m.vendor) + '" data-model="' + escapeHtml(m.model) + '" data-limit="' + m.limit + '" title="点击修改并行度">' + m.limit + '</span></span>'
1310
- + (m.pending > 0 ? '<span class="mc-badge mc-badge-pending">⏳ ' + m.pending + '</span>' : '')
1311
- + '</div>'
1312
- + '</div>';
1307
+
1308
+ if (m.mode === 'monitor') {
1309
+ // monitor 模式:纯计数徽章,无 limit/进度条
1310
+ html += '<div class="mc-model-row">'
1311
+ + '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
1312
+ + '<div class="mc-bar-wrap"></div>'
1313
+ + '<div class="mc-stats">'
1314
+ + '<span class="mc-badge mc-badge-active">' + m.in_use + '</span>'
1315
+ + '</div>'
1316
+ + '</div>';
1317
+ } else {
1318
+ // limited 模式:保留现有渲染(进度条 + limit 编辑)
1319
+ var limit = m.limit || 0;
1320
+ var pct = limit > 0 ? Math.round((m.in_use / limit) * 100) : 0;
1321
+ var barClass = pct <= 50 ? 'mc-low' : (pct <= 80 ? 'mc-mid' : 'mc-high');
1322
+
1323
+ html += '<div class="mc-model-row">'
1324
+ + '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
1325
+ + '<div class="mc-bar-wrap"><div class="mc-bar-fill ' + barClass + '" style="width:' + pct + '%"></div></div>'
1326
+ + '<div class="mc-stats">'
1327
+ + '<span class="mc-badge mc-badge-active">' + m.in_use
1328
+ + '/<span class="mc-limit-editable" data-tier="' + escapeHtml(m.vendor) + '" data-model="' + escapeHtml(m.model) + '" data-limit="' + limit + '" title="点击修改并行度">' + limit + '</span></span>'
1329
+ + (m.pending > 0 ? '<span class="mc-badge mc-badge-pending">⏳ ' + m.pending + '</span>' : '')
1330
+ + (m.pending === 0 && m.peak_pending_recent > 0 ? '<span class="mc-badge mc-badge-peak">🕘 曾排队 ' + m.peak_pending_recent + '</span>' : '')
1331
+ + '</div>'
1332
+ + '</div>';
1333
+ }
1313
1334
  }
1314
1335
  html += '</div>';
1315
1336
  wrap.innerHTML = html;
@@ -1325,7 +1346,7 @@ function startModelCallingPoll() {
1325
1346
  }).catch(function() {});
1326
1347
  }
1327
1348
  tick();
1328
- _mcTimer = setInterval(tick, 5000);
1349
+ _mcTimer = setInterval(tick, 1500);
1329
1350
  }
1330
1351
  function stopModelCallingPoll() {
1331
1352
  if (_mcTimer) { clearInterval(_mcTimer); _mcTimer = null; }
@@ -254,16 +254,15 @@ def register_concurrency_route(app: Any, router: Any) -> None:
254
254
  for tier in router.tiers:
255
255
  if tier.name == tier_name:
256
256
  vendor = tier.vendor
257
- update_fn = getattr(vendor, "update_concurrency", None)
258
- if update_fn is None:
257
+ try:
258
+ vendor.update_concurrency(model, limit)
259
+ except ValueError as exc:
259
260
  return json_error_response(
260
- 400,
261
+ 422,
261
262
  error_type="invalid_request_error",
262
- message=f"vendor '{tier_name}' does not support concurrency",
263
+ message=str(exc),
263
264
  )
264
- try:
265
- update_fn(model, limit)
266
- except (ValueError, AttributeError) as exc:
265
+ except AttributeError as exc:
267
266
  return json_error_response(
268
267
  400, error_type="invalid_request_error", message=str(exc)
269
268
  )
@@ -44,6 +44,7 @@ from ..compat.canonical import (
44
44
  )
45
45
  from ..compat.session_store import CompatSessionRecord
46
46
  from ..config.schema import FailoverConfig
47
+ from .concurrency import ModelConcurrencyController
47
48
 
48
49
  logger = logging.getLogger(__name__)
49
50
 
@@ -63,6 +64,8 @@ class BaseVendor(ABC):
63
64
  self._client: httpx.AsyncClient | None = None
64
65
  self._compat_trace: CompatibilityTrace | None = None
65
66
  self._compat_session_record: CompatSessionRecord | None = None
67
+ # 默认 monitor 模式(仅计数不限流);子类可覆盖为 limited 模式
68
+ self._concurrency_controller = ModelConcurrencyController(None)
66
69
 
67
70
  def _get_client(self) -> httpx.AsyncClient:
68
71
  if self._client is None or self._client.is_closed:
@@ -246,8 +249,30 @@ class BaseVendor(ABC):
246
249
  diagnostics: dict[str, Any] = {}
247
250
  if self._compat_trace is not None:
248
251
  diagnostics["compat"] = self._compat_trace.to_dict()
252
+ concurrency = self._concurrency_controller.get_diagnostics()
253
+ if concurrency:
254
+ diagnostics["concurrency"] = concurrency
249
255
  return diagnostics
250
256
 
257
+ def track_in_flight(self, mapped_model: str):
258
+ """返回用于追踪在途请求的异步上下文管理器.
259
+
260
+ 空 model name 时返回 no-op context(防御性处理)。
261
+ """
262
+ if not mapped_model:
263
+ from contextlib import nullcontext
264
+
265
+ return nullcontext()
266
+ return self._concurrency_controller.track(mapped_model)
267
+
268
+ def update_concurrency(self, model: str, limit: int) -> None:
269
+ """运行时更新指定模型的并发限制.
270
+
271
+ 默认实现委托给 ``_concurrency_controller.set_limit``。
272
+ monitor 模式下抛 ``ValueError``。
273
+ """
274
+ self._concurrency_controller.set_limit(model, limit)
275
+
251
276
  def should_trigger_failover(
252
277
  self, status_code: int, body: dict[str, Any] | None
253
278
  ) -> bool:
@@ -0,0 +1,251 @@
1
+ """统一并发控制器 — 支持监控 (monitor) 与限流 (limited) 双模式.
2
+
3
+ 为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``_ConcurrencySlot``,
4
+ 根据模式提供不同语义:
5
+
6
+ **monitor 模式** (config=None)
7
+ - 仅计数 ``in_use``,不做排队与限流
8
+ - ``pending`` 恒为 0,``available`` / ``limit`` 为 None
9
+ - 所有 vendor 默认使用此模式
10
+
11
+ **limited 模式** (config 非 None)
12
+ - ``in_use`` 不超过 ``limit`` 时立即获取,超限时 FIFO 排队
13
+ - ``pending`` 反映当前排队数,``peak_pending_recent`` 记录最近 10s 峰值
14
+ - 由 ZhipuVendor 等需限流的 vendor 启用
15
+
16
+ 设计要点:
17
+ - **惰性创建**:仅在首次请求到达时才为该模型创建 Slot,避免冷启动开销
18
+ - **FIFO 公平**:``asyncio.Event`` + while 循环天然满足 FIFO 排队语义(limited 模式)
19
+ - **动态调整**:支持运行时修改 per-model limit,无需重启进程
20
+ - **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名
21
+ - **峰值余晖**:记录 ``peak_pending_recent`` 使瞬时排队可观测
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import asyncio
27
+ import logging
28
+ import time
29
+ from collections import deque
30
+ from contextlib import asynccontextmanager
31
+ from typing import Any, Literal
32
+
33
+ from ..config.vendors import ZhipuConcurrencyConfig
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # peak_pending_recent 滑窗宽度(秒)
38
+ _PEAK_WINDOW_SECONDS = 10.0
39
+
40
+
41
+ class _ConcurrencySlot:
42
+ """支持双模式的并发槽位.
43
+
44
+ ``limit=None`` (monitor) 时 acquire 走 fast path,仅计数。
45
+ ``limit>0`` (limited) 时在满槽位后 FIFO 排队等待。
46
+ """
47
+
48
+ def __init__(self, limit: int | None) -> None:
49
+ self._limit = limit
50
+ self._in_use: int = 0
51
+ self._pending: int = 0
52
+ self._wake = asyncio.Event()
53
+ self._wake.set()
54
+ # peak_pending_recent 追踪:存储 (timestamp, pending_value) 元组
55
+ self._peak_samples: deque[tuple[float, int]] = deque()
56
+
57
+ async def acquire(self) -> None:
58
+ """获取一个并发槽位.
59
+
60
+ monitor 模式 (limit=None):仅 in_use++,永不排队。
61
+ limited 模式 (limit>0):满槽时阻塞等待。
62
+ """
63
+ # monitor 模式:仅计数
64
+ if self._limit is None:
65
+ self._in_use += 1
66
+ return
67
+
68
+ # limited — fast path
69
+ if self._in_use < self._limit:
70
+ self._in_use += 1
71
+ return
72
+
73
+ # limited — slow path: FIFO 排队
74
+ self._pending += 1
75
+ self._observe_peak()
76
+ try:
77
+ while True:
78
+ self._wake.clear()
79
+ await self._wake.wait()
80
+ if self._in_use < self._limit:
81
+ self._in_use += 1
82
+ return
83
+ finally:
84
+ self._pending -= 1
85
+
86
+ def release(self) -> None:
87
+ """释放一个并发槽位."""
88
+ self._in_use = max(0, self._in_use - 1)
89
+ if self._limit is not None:
90
+ self._wake.set()
91
+
92
+ def set_limit(self, new_limit: int) -> None:
93
+ """动态调整并发上限.
94
+
95
+ 仅 limited 模式有效;monitor 模式调用抛 ValueError。
96
+ """
97
+ if self._limit is None:
98
+ msg = "Cannot set limit on monitor-only slot"
99
+ raise ValueError(msg)
100
+ self._limit = new_limit
101
+ self._wake.set()
102
+
103
+ def _observe_peak(self) -> None:
104
+ """记录当前 pending 值作为峰值采样点."""
105
+ now = time.monotonic()
106
+ self._peak_samples.append((now, self._pending))
107
+
108
+ def _get_peak_pending_recent(self) -> int:
109
+ """获取最近窗口内的 peak pending 值."""
110
+ cutoff = time.monotonic() - _PEAK_WINDOW_SECONDS
111
+ # 剔除过期采样
112
+ while self._peak_samples and self._peak_samples[0][0] < cutoff:
113
+ self._peak_samples.popleft()
114
+ if not self._peak_samples:
115
+ return 0
116
+ return max(v for _, v in self._peak_samples)
117
+
118
+ @property
119
+ def limit(self) -> int | None:
120
+ return self._limit
121
+
122
+ @property
123
+ def in_use(self) -> int:
124
+ return self._in_use
125
+
126
+ @property
127
+ def available(self) -> int | None:
128
+ if self._limit is None:
129
+ return None
130
+ return max(0, self._limit - self._in_use)
131
+
132
+ @property
133
+ def pending(self) -> int:
134
+ return self._pending
135
+
136
+ @property
137
+ def peak_pending_recent(self) -> int:
138
+ return self._get_peak_pending_recent()
139
+
140
+
141
+ class ModelConcurrencyController:
142
+ """按模型名提供独立并发槽位的控制器.
143
+
144
+ 用法::
145
+
146
+ # monitor 模式(默认)
147
+ ctrl = ModelConcurrencyController(None)
148
+ async with ctrl.track("model-a"):
149
+ ... # 执行请求
150
+
151
+ # limited 模式(Zhipu 等)
152
+ ctrl = ModelConcurrencyController(config)
153
+ async with ctrl.track("glm-5v-turbo"):
154
+ ... # 满槽时排队等待
155
+ """
156
+
157
+ def __init__(self, config: ZhipuConcurrencyConfig | None) -> None:
158
+ self._config = config
159
+ self._slots: dict[str, _ConcurrencySlot] = {}
160
+
161
+ @property
162
+ def mode(self) -> Literal["monitor", "limited"]:
163
+ """当前控制器模式."""
164
+ return "limited" if self._config is not None else "monitor"
165
+
166
+ def _get_or_create_slot(self, model: str) -> _ConcurrencySlot:
167
+ """获取(或惰性创建)指定模型的并发槽位."""
168
+ slot = self._slots.get(model)
169
+ if slot is None:
170
+ if self._config is not None:
171
+ limit = self._config.get_limit(model)
172
+ else:
173
+ limit = None
174
+ slot = _ConcurrencySlot(limit)
175
+ self._slots[model] = slot
176
+ if self._config is not None:
177
+ logger.debug(
178
+ "ModelConcurrencyController: created slot mode=limited "
179
+ "model=%s limit=%d",
180
+ model,
181
+ limit,
182
+ )
183
+ else:
184
+ logger.debug(
185
+ "ModelConcurrencyController: created slot mode=monitor model=%s",
186
+ model,
187
+ )
188
+ return slot
189
+
190
+ @asynccontextmanager
191
+ async def track(self, model: str):
192
+ """异步上下文管理器:获取 → 执行 → 释放.
193
+
194
+ 用法::
195
+
196
+ async with controller.track("glm-5v-turbo"):
197
+ await vendor.send_message(...)
198
+ """
199
+ slot = self._get_or_create_slot(model)
200
+ await slot.acquire()
201
+ try:
202
+ yield
203
+ finally:
204
+ slot.release()
205
+
206
+ def set_limit(self, model: str, new_limit: int) -> None:
207
+ """运行时修改指定模型的并发上限.
208
+
209
+ 仅 limited 模式支持;monitor 模式抛 ValueError。
210
+ """
211
+ if self._config is None:
212
+ msg = f"vendor is monitor-only; cannot update limit for model '{model}'"
213
+ raise ValueError(msg)
214
+ slot = self._slots.get(model)
215
+ if slot is None:
216
+ slot = _ConcurrencySlot(new_limit)
217
+ self._slots[model] = slot
218
+ else:
219
+ slot.set_limit(new_limit)
220
+ self._config.models[model] = new_limit
221
+ logger.info(
222
+ "ModelConcurrencyController: updated limit model=%s new_limit=%d",
223
+ model,
224
+ new_limit,
225
+ )
226
+
227
+ def get_diagnostics(self) -> dict[str, dict[str, Any]]:
228
+ """返回每个模型的并发状态快照(用于可观测性)."""
229
+ snapshot: dict[str, dict[str, Any]] = {}
230
+ mode = self.mode
231
+ for model, slot in self._slots.items():
232
+ entry: dict[str, Any] = {
233
+ "mode": mode,
234
+ "in_use": slot.in_use,
235
+ "pending": slot.pending,
236
+ "peak_pending_recent": slot.peak_pending_recent,
237
+ }
238
+ if mode == "limited":
239
+ entry["limit"] = slot.limit
240
+ entry["available"] = slot.available
241
+ else:
242
+ entry["limit"] = None
243
+ entry["available"] = None
244
+ snapshot[model] = entry
245
+ return snapshot
246
+
247
+
248
+ # 向后兼容别名
249
+ ModelConcurrencyLimiter = ModelConcurrencyController
250
+
251
+ __all__ = ["ModelConcurrencyController", "ModelConcurrencyLimiter"]
@@ -17,6 +17,11 @@ Anthropic Messages API 协议,本模块做以下适配:
17
17
  - max_attempt = 5(1 初始 + 4 重试)
18
18
  - 指数退避 + Full Jitter(1s → 2s → 4s → 8s)
19
19
  - 优先尊重 server retry-after header
20
+
21
+ 并发限流由 BaseVendor._concurrency_controller 统一管控
22
+ (limited 模式),在 executor 层通过 ``track_in_flight`` 触发,
23
+ slot 跨 429 重试自然持有(executor 的 async with 包裹整个
24
+ send_message/send_message_stream 调用链)。
20
25
  """
21
26
 
22
27
  from __future__ import annotations
@@ -37,7 +42,7 @@ from ..routing.rate_limit import (
37
42
  )
38
43
  from ..routing.retry import RetryConfig, calculate_delay
39
44
  from .base import VendorResponse
40
- from .concurrency import ModelConcurrencyLimiter
45
+ from .concurrency import ModelConcurrencyController
41
46
  from .native_anthropic import NativeAnthropicVendor
42
47
 
43
48
  logger = logging.getLogger(__name__)
@@ -59,6 +64,7 @@ class ZhipuVendor(NativeAnthropicVendor):
59
64
  仅替换模型名和认证头,其余原样透传。
60
65
 
61
66
  429 Rate Limit 时自动重试(指数退避),降低 failover 频率。
67
+ 并发限流由 BaseVendor._concurrency_controller 统一管控。
62
68
  """
63
69
 
64
70
  _vendor_name = "zhipu"
@@ -72,12 +78,8 @@ class ZhipuVendor(NativeAnthropicVendor):
72
78
  ) -> None:
73
79
  super().__init__(config, model_mapper, failover_config)
74
80
  self._rl_retry = _RATE_LIMIT_RETRY
75
- # 每模型并发限制器(config.concurrency None 时禁用)
76
- self._concurrency_limiter: ModelConcurrencyLimiter | None = (
77
- ModelConcurrencyLimiter(config.concurrency)
78
- if config.concurrency is not None
79
- else None
80
- )
81
+ # 覆盖 BaseVendor 默认的 monitor 模式为 limited 模式
82
+ self._concurrency_controller = ModelConcurrencyController(config.concurrency)
81
83
 
82
84
  # ── 首选 tier 参数兼容转换 ────────────────────────────────
83
85
 
@@ -129,24 +131,7 @@ class ZhipuVendor(NativeAnthropicVendor):
129
131
  request_body: dict[str, Any],
130
132
  headers: dict[str, str],
131
133
  ) -> VendorResponse:
132
- """非流式请求,429 时自动重试.
133
-
134
- 在 429 重试循环外层套上每模型并发槽位获取,确保同一时间点同一模型的
135
- 在途请求数不超过配置上限;超过时新请求 FIFO 排队等待。
136
- """
137
- sem = await self._maybe_acquire_concurrency_slot(request_body)
138
- try:
139
- return await self._send_message_with_retry(request_body, headers)
140
- finally:
141
- if sem is not None:
142
- sem.release()
143
-
144
- async def _send_message_with_retry(
145
- self,
146
- request_body: dict[str, Any],
147
- headers: dict[str, str],
148
- ) -> VendorResponse:
149
- """原 send_message 主体逻辑(不含并发控制)."""
134
+ """非流式请求,429 时自动重试."""
150
135
  max_attempts = self._rl_retry.max_attempts
151
136
 
152
137
  for attempt in range(max_attempts):
@@ -186,87 +171,42 @@ class ZhipuVendor(NativeAnthropicVendor):
186
171
  安全性:429 在 BaseVendor.send_message_stream 中于
187
172
  status code 检查阶段即 raise(在任何 chunk yield 之前),
188
173
  因此重试不会导致已发出数据不一致。
189
-
190
- 在 429 重试循环外层套上每模型并发槽位获取,确保流式请求与非流式请求
191
- 共用同一信号量,统一限制同一模型的总在途并发数。
192
174
  """
193
- sem = await self._maybe_acquire_concurrency_slot(request_body)
194
175
  max_attempts = self._rl_retry.max_attempts
195
176
 
196
- try:
197
- for attempt in range(max_attempts):
198
- try:
199
- # 429 status code 检查阶段即 raise(在任何 chunk 之前),
200
- # 因此 __anext__ 安全:要么拿到首个 chunk,要么抛异常。
201
- ait = super().send_message_stream(request_body, headers)
202
- head = await ait.__anext__()
203
- except StopAsyncIteration:
204
- return
205
- except httpx.HTTPStatusError as exc:
206
- if exc.response is None or exc.response.status_code != 429:
207
- raise
208
- if attempt == max_attempts - 1:
209
- logger.warning(
210
- "Zhipu 429 stream rate limit exhausted after %d attempts",
211
- max_attempts,
212
- )
213
- raise
214
-
215
- delay = self._compute_retry_delay_from_response(
216
- exc.response, attempt
217
- )
218
- logger.info(
219
- "Zhipu 429 stream rate limit, retry %d/%d in %.1fms",
220
- attempt + 1,
221
- max_attempts - 1,
222
- delay,
223
- )
224
- await asyncio.sleep(delay / 1000.0)
225
- continue
226
-
227
- # yield 在 try/except 之外,避免捕获外部 athrow 的异常
228
- yield head
229
- async for chunk in ait:
230
- yield chunk
177
+ for attempt in range(max_attempts):
178
+ try:
179
+ # 429 在 status code 检查阶段即 raise(在任何 chunk 之前),
180
+ # 因此 __anext__ 安全:要么拿到首个 chunk,要么抛异常。
181
+ ait = super().send_message_stream(request_body, headers)
182
+ head = await ait.__anext__()
183
+ except StopAsyncIteration:
231
184
  return
232
- finally:
233
- if sem is not None:
234
- sem.release()
235
-
236
- # ── 并发控制 ────────────────────────────────────────────
237
-
238
- async def _maybe_acquire_concurrency_slot(
239
- self,
240
- request_body: dict[str, Any],
241
- ) -> asyncio.Semaphore | None:
242
- """按映射后模型名获取并发槽位;未配置 concurrency 时返回 None.
243
-
244
- ``map_model()`` 是纯同步字典查找,在 Semaphore 等待前调用是安全的,
245
- 且能确保排队键与上游真实承载模型对齐。
246
- """
247
- if self._concurrency_limiter is None:
248
- return None
249
- raw_model = request_body.get("model", "") if request_body else ""
250
- mapped_model = self.map_model(raw_model) if raw_model else ""
251
- if not mapped_model:
252
- return None
253
- return await self._concurrency_limiter.acquire(mapped_model)
254
-
255
- # ── 诊断信息 ─────────────────────────────────────────────
256
-
257
- def get_diagnostics(self) -> dict[str, Any]:
258
- """返回供应商运行时诊断信息,包含每模型并发状态."""
259
- diagnostics = super().get_diagnostics()
260
- if self._concurrency_limiter is not None:
261
- diagnostics["concurrency"] = self._concurrency_limiter.get_diagnostics()
262
- return diagnostics
263
-
264
- def update_concurrency(self, model: str, limit: int) -> None:
265
- """运行时更新指定模型的并发限制."""
266
- if self._concurrency_limiter is None:
267
- msg = "Concurrency limiter is not enabled for this vendor"
268
- raise ValueError(msg)
269
- self._concurrency_limiter.set_limit(model, limit)
185
+ except httpx.HTTPStatusError as exc:
186
+ if exc.response is None or exc.response.status_code != 429:
187
+ raise
188
+ if attempt == max_attempts - 1:
189
+ logger.warning(
190
+ "Zhipu 429 stream rate limit exhausted after %d attempts",
191
+ max_attempts,
192
+ )
193
+ raise
194
+
195
+ delay = self._compute_retry_delay_from_response(exc.response, attempt)
196
+ logger.info(
197
+ "Zhipu 429 stream rate limit, retry %d/%d in %.1fms",
198
+ attempt + 1,
199
+ max_attempts - 1,
200
+ delay,
201
+ )
202
+ await asyncio.sleep(delay / 1000.0)
203
+ continue
204
+
205
+ # yield 在 try/except 之外,避免捕获外部 athrow 的异常
206
+ yield head
207
+ async for chunk in ait:
208
+ yield chunk
209
+ return
270
210
 
271
211
  # ── 延迟计算 ────────────────────────────────────────────
272
212