coding-proxy 0.4.1a8__tar.gz → 0.4.1a10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/CHANGELOG.md +2 -0
  2. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/PKG-INFO +1 -1
  3. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/agents/issue.md +44 -0
  4. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/arch/config-reference.md +34 -6
  5. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/pyproject.toml +1 -1
  6. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/config.default.yaml +8 -0
  7. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/routing.py +10 -3
  8. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/schema.py +2 -0
  9. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/vendors.py +17 -1
  10. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/executor.py +119 -0
  11. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/server/factory.py +10 -6
  12. coding_proxy-0.4.1a10/src/coding/proxy/vendors/concurrency.py +78 -0
  13. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/zhipu.py +86 -33
  14. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_router_executor.py +213 -0
  15. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_schema.py +2 -1
  16. coding_proxy-0.4.1a10/tests/test_zhipu_concurrency.py +557 -0
  17. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/uv.lock +1 -1
  18. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/.github/workflows/ci.yml +0 -0
  19. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/.github/workflows/coverage.yml +0 -0
  20. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/.github/workflows/release.yml +0 -0
  21. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/.gitignore +0 -0
  22. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/.pre-commit-config.yaml +0 -0
  23. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/AGENTS.md +0 -0
  24. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/CLAUDE.md +0 -0
  25. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/LICENSE +0 -0
  26. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/README.md +0 -0
  27. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/assets/dashboard-v0.4.0.png +0 -0
  28. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/assets/session-v0.4.0.png +0 -0
  29. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/agents/browser-validation.md +0 -0
  30. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/agents/knowledge-map.md +0 -0
  31. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/agents/reference-specifications.md +0 -0
  32. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/arch/convert.md +0 -0
  33. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/arch/design-patterns.md +0 -0
  34. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/arch/routing.md +0 -0
  35. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/arch/testing.md +0 -0
  36. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/arch/vendors.md +0 -0
  37. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/framework.md +0 -0
  38. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/guide/api-reference.md +0 -0
  39. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/guide/cli-reference.md +0 -0
  40. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/guide/dashboard.md +0 -0
  41. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/guide/monitoring.md +0 -0
  42. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/guide/quickstart.md +0 -0
  43. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/guide/vendors.md +0 -0
  44. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/ops/ci-cd.md +0 -0
  45. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/user-guide.md +0 -0
  46. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/docs/zh-CN/README.md +0 -0
  47. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/__init__.py +0 -0
  48. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/__init__.py +0 -0
  49. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/__main__.py +0 -0
  50. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/__init__.py +0 -0
  51. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/__init__.py +0 -0
  52. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/base.py +0 -0
  53. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/github.py +0 -0
  54. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/providers/google.py +0 -0
  55. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/runtime.py +0 -0
  56. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/auth/store.py +0 -0
  57. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/cli/__init__.py +0 -0
  58. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/cli/auth_commands.py +0 -0
  59. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/cli/banner.py +0 -0
  60. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/compat/__init__.py +0 -0
  61. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/compat/canonical.py +0 -0
  62. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/compat/session_store.py +0 -0
  63. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/__init__.py +0 -0
  64. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/auth_schema.py +0 -0
  65. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/loader.py +0 -0
  66. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/resiliency.py +0 -0
  67. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/server.py +0 -0
  68. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/config/session_policy.py +0 -0
  69. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/__init__.py +0 -0
  70. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/anthropic_to_gemini.py +0 -0
  71. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/anthropic_to_openai.py +0 -0
  72. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/gemini_sse_adapter.py +0 -0
  73. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/gemini_to_anthropic.py +0 -0
  74. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/openai_to_anthropic.py +0 -0
  75. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/convert/vendor_channels.py +0 -0
  76. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/__init__.py +0 -0
  77. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/db.py +0 -0
  78. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/formatters.py +0 -0
  79. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/logging/stats.py +0 -0
  80. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/model/__init__.py +0 -0
  81. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/model/auth.py +0 -0
  82. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/model/compat.py +0 -0
  83. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/model/constants.py +0 -0
  84. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/model/pricing.py +0 -0
  85. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/model/token.py +0 -0
  86. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/model/vendor.py +0 -0
  87. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/__init__.py +0 -0
  88. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/config.py +0 -0
  89. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/__init__.py +0 -0
  90. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/anthropic.py +0 -0
  91. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/gemini.py +0 -0
  92. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/extractors/openai.py +0 -0
  93. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/handler.py +0 -0
  94. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/operation.py +0 -0
  95. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/routes.py +0 -0
  96. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/native_api/usage_registry.py +0 -0
  97. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/pricing.py +0 -0
  98. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/__init__.py +0 -0
  99. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/circuit_breaker.py +0 -0
  100. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/error_classifier.py +0 -0
  101. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/model_mapper.py +0 -0
  102. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/quota_guard.py +0 -0
  103. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/rate_limit.py +0 -0
  104. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/retry.py +0 -0
  105. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/router.py +0 -0
  106. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/session_manager.py +0 -0
  107. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/session_policy.py +0 -0
  108. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/tier.py +0 -0
  109. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/usage_parser.py +0 -0
  110. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/routing/usage_recorder.py +0 -0
  111. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/server/__init__.py +0 -0
  112. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/server/app.py +0 -0
  113. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/server/dashboard.py +0 -0
  114. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/server/responses.py +0 -0
  115. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/server/routes.py +0 -0
  116. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/streaming/__init__.py +0 -0
  117. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/streaming/anthropic_compat.py +0 -0
  118. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/__init__.py +0 -0
  119. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/alibaba.py +0 -0
  120. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/anthropic.py +0 -0
  121. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/antigravity.py +0 -0
  122. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/base.py +0 -0
  123. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot.py +0 -0
  124. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot_models.py +0 -0
  125. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot_token_manager.py +0 -0
  126. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/copilot_urls.py +0 -0
  127. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/doubao.py +0 -0
  128. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/kimi.py +0 -0
  129. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/minimax.py +0 -0
  130. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/mixins.py +0 -0
  131. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/native_anthropic.py +0 -0
  132. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/token_manager.py +0 -0
  133. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/src/coding/proxy/vendors/xiaomi.py +0 -0
  134. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/__init__.py +0 -0
  135. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/e2e/__init__.py +0 -0
  136. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/e2e/conftest.py +0 -0
  137. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/e2e/test_e2e_http.py +0 -0
  138. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/e2e/test_e2e_token.py +0 -0
  139. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/e2e/test_e2e_vendor.py +0 -0
  140. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_antigravity.py +0 -0
  141. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_app_routes.py +0 -0
  142. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_auto_login.py +0 -0
  143. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_banner.py +0 -0
  144. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_circuit_breaker.py +0 -0
  145. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_cli_usage.py +0 -0
  146. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_compat.py +0 -0
  147. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_config_init.py +0 -0
  148. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_config_loader.py +0 -0
  149. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_convert_request.py +0 -0
  150. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_convert_response.py +0 -0
  151. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_convert_sse.py +0 -0
  152. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_copilot.py +0 -0
  153. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_copilot_convert_request.py +0 -0
  154. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_copilot_convert_response.py +0 -0
  155. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_copilot_models.py +0 -0
  156. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_copilot_urls.py +0 -0
  157. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_currency.py +0 -0
  158. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_error_classifier.py +0 -0
  159. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_logging_dual_write.py +0 -0
  160. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_mixins.py +0 -0
  161. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_model_auth.py +0 -0
  162. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_model_compat.py +0 -0
  163. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_model_constants.py +0 -0
  164. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_model_mapper.py +0 -0
  165. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_model_pricing.py +0 -0
  166. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_model_token.py +0 -0
  167. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_model_vendor.py +0 -0
  168. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_native_api_base_url_override.py +0 -0
  169. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_native_api_extractors.py +0 -0
  170. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_native_api_handler.py +0 -0
  171. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_native_api_operation.py +0 -0
  172. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_native_api_routes.py +0 -0
  173. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_native_vendors.py +0 -0
  174. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_parse_usage.py +0 -0
  175. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_parse_usage_gemini.py +0 -0
  176. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_pricing.py +0 -0
  177. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_quota_guard.py +0 -0
  178. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_rate_limit.py +0 -0
  179. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_router_chain.py +0 -0
  180. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_runtime_reauth.py +0 -0
  181. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_session_aware.py +0 -0
  182. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_streaming_anthropic_compat.py +0 -0
  183. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_tier.py +0 -0
  184. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_tiers_config.py +0 -0
  185. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_time_range.py +0 -0
  186. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_token_logger.py +0 -0
  187. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_token_logger_native_columns.py +0 -0
  188. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_token_manager.py +0 -0
  189. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_types.py +0 -0
  190. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_vendor_channels.py +0 -0
  191. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_vendor_streaming.py +0 -0
  192. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_vendors.py +0 -0
  193. {coding_proxy-0.4.1a8 → coding_proxy-0.4.1a10}/tests/test_zhipu.py +0 -0
@@ -4,6 +4,8 @@
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ - feat(zhipu): 新增每模型并发限制(默认 3,可通过 `vendors[zhipu].concurrency` 配置),基于 `asyncio.Semaphore` 实现 FIFO 公平排队,流式与非流式共用同一槽位,与 429 重试机制兼容。
8
+
7
9
  ## [v0.4.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.4.0) — 2026-05-01
8
10
 
9
11
  > [!IMPORTANT]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coding-proxy
3
- Version: 0.4.1a8
3
+ Version: 0.4.1a10
4
4
  Summary: A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao...
5
5
  Project-URL: Source Code, https://github.com/ThreeFish-AI/coding-proxy
6
6
  Project-URL: User Guide, https://github.com/ThreeFish-AI/coding-proxy/blob/master/docs/user-guide.md
@@ -230,3 +230,47 @@ SUM(input_tokens + output_tokens
230
230
 
231
231
  - 历次 PR 中 cache token 字段的引入是渐进式的(schema 已有四列、`log()` 入参齐全、Overview 已全口径消费),但部分聚合视图的口径升级被遗漏;任何向 `usage_log` 增列后,**必须**审计所有 `SUM(input_tokens` / `SUM(output_tokens` 出现处的聚合表达式是否需要同步更新。
232
232
  - 跨标签页同一指标(如"总 Tokens")的口径一致性,建议在添加新视图时主动与 Overview 现有口径做交叉核对,必要时在 SQL 注释中标注口径来源,便于后续 review。
233
+
234
+ ---
235
+
236
+ ## Zhipu vendor 间歇性 `[1210][API 调用参数有误]` 拒绝(诊断阶段)
237
+
238
+ **问题描述**
239
+
240
+ Zhipu vendor 作为首选 tier 时,处理 `claude-haiku-* → glm-5-turbo` 的部分请求被上游直接拒绝:
241
+
242
+ ```
243
+ WARNING Tier zhipu semantic rejection
244
+ (type=invalid_request_error,
245
+ msg=[1210][API 调用参数有误,请检查文档。][...])
246
+ [model=claude-haiku-4-5-20251001, messages=1], trying next tier without recording failure
247
+ INFO Tier anthropic message succeeded (took over from failed tier: zhipu)
248
+ ```
249
+
250
+ 失败请求统一表现为 `duration<1s + tokens=[0 0 0 0]`,被 zhipu 在入口校验阶段直接拒绝、未消耗任何 token。两次观察窗口失败率分别为 4%(2026-05-23 22:24,glm-4.7 旧映射)与 27%(2026-05-25 17:26+,glm-5-turbo 当前映射),均触发降级至 anthropic / copilot。
251
+
252
+ **表因**
253
+
254
+ `is_semantic_rejection` 检测到 zhipu 返回 `invalid_request_error + 1210` 含「API 调用参数有误」中文标记,判定为语义拒绝,跳过下一层 tier。1210 是智谱官方错误码,[官方文档](https://docs.bigmodel.cn/cn/api/api-code) 定义为「参数格式/类型不符规范」(区别于 1213「必需字段缺失」、1214「字段参数非法」)。
255
+
256
+ **根因(仍在收集证据)**
257
+
258
+ PR #244 的初版诊断字段仅覆盖 `thinking / thinking_blocks / cache_control / model / messages`,但 2026-05-25 17:26 后的诊断日志显示失败请求**均不含**上述任何字段。说明真正祸根在更细粒度的参数(system / tools / max_tokens / sampling / metadata / content_types / body_size 等)。
259
+
260
+ **处理方式(分阶段)**
261
+
262
+ - **Step 1(PR #244,已合并)**:在 `executor.py::_build_semantic_rejection_diagnostic` 中输出 thinking / cache_control 相关字段 — 但证据反转,覆盖不足以定位真因。
263
+ - **Step 1 v2(本次)**:扩展诊断函数覆盖 `system_kind|blocks(+cc)` / `tools` / `tool_choice` / 采样参数 / `stream` / `metadata_keys` / `content_types` / `body_bytes` 等维度。所有项「仅存在时输出」以控制日志噪声。配套 14 个单元测试(`TestBuildSemanticRejectionDiagnostic`)覆盖各字段组合。
264
+ - **Step 2(待定)**:依据扩展诊断日志的新证据,定位具体祸根参数后再施修复(候选路径:`ZhipuVendor._prepare_request` 参数剥离 / 调用现有 `normalize_for_zhipu` / pre-validation 警告)。
265
+
266
+ **后续防范**
267
+
268
+ - **「无证据,不下结论」**:当初版诊断字段无法覆盖根因时,禁止反复猜测,应优先扩展诊断维度抓取更多线索。本次先扩展再修复的迭代节奏可作为同类「黑盒 API 报错」问题的范式。
269
+ - **诊断字段设计原则**:所有诊断项应「仅存在时输出」,避免常态化噪声;输出格式紧凑(`key=val`)便于日志检索;参数值用 `!r:.N` 截断防止巨型对象灌入日志。
270
+ - **错误码差异化**:智谱 12xx 系列错误码语义并不等价(1210 ≠ 1213 ≠ 1214),未来面对类似 `[code][message]` 形式的供应商错误时,应优先查阅其官方错误码字典,避免基于错误消息字面意思的误判。
271
+
272
+ **同类问题影响与处理注意事项**
273
+
274
+ - 其他薄透传 vendor(minimax / kimi / doubao / alibaba / xiaomi)共用 `NativeAnthropicVendor._prepare_request`,若它们也开始报「参数错误」类语义拒绝,可复用本次扩展的诊断函数定位差异。
275
+ - 若证据指向 `tools` 字段(如工具 schema 不兼容)、`metadata` 字段(如自定义键被 zhipu 拒收)等具体路径,修复时应优先复用 `convert/vendor_channels.py` 中已有的 `normalize_for_zhipu` / `strip_thinking_blocks` 工具,避免在 vendor 内部重复实现剥离逻辑。
276
+ - 部署 Step 1 v2 后,建议观察至少 48 小时收集足够样本(>20 次失败),通过失败/成功请求形态对比统计找出**唯一差异维度**,再进入 Step 2。
@@ -89,12 +89,13 @@ flowchart TD
89
89
 
90
90
  ## 5. VendorConfig 弹性字段
91
91
 
92
- | 字段 | 类型 | 默认值 | 说明 |
93
- | -------------------- | -------------- | -------------------- | --------------------------- |
94
- | `circuit_breaker` | config \| None | `None` | 熔断器配置(None = 终端层) |
95
- | `retry` | config | `RetryConfig()` | 重试策略配置 |
96
- | `quota_guard` | config | `QuotaGuardConfig()` | 日度配额守卫配置 |
97
- | `weekly_quota_guard` | config | `QuotaGuardConfig()` | 周度配额守卫配置 |
92
+ | 字段 | 类型 | 默认值 | 说明 |
93
+ | -------------------- | -------------- | -------------------- | ----------------------------------- |
94
+ | `circuit_breaker` | config \| None | `None` | 熔断器配置(None = 终端层) |
95
+ | `retry` | config | `RetryConfig()` | 重试策略配置 |
96
+ | `quota_guard` | config | `QuotaGuardConfig()` | 日度配额守卫配置 |
97
+ | `weekly_quota_guard` | config | `QuotaGuardConfig()` | 周度配额守卫配置 |
98
+ | `concurrency` | config \| None | `None` | `[zhipu]` 每模型并发限制(详见 5.5) |
98
99
 
99
100
  <a id="elastic-params"></a>
100
101
 
@@ -143,6 +144,33 @@ flowchart TD
143
144
  | `error_types` | list[str] | `["rate_limit_error", "overloaded_error", "api_error"]` |
144
145
  | `error_message_patterns` | list[str] | `["quota", "limit exceeded", "usage cap", "capacity", "internal network failure"]` |
145
146
 
147
+ ### 5.5 ZhipuConcurrencyConfig — Zhipu 每模型并发参数
148
+
149
+ 仅对 `vendor: zhipu` 生效,基于 `asyncio.Semaphore` 实现 FIFO 公平排队。
150
+
151
+ | 字段 | 类型 | 默认值 | 说明 |
152
+ | --------- | -------------- | ------ | -------------------------------------------------------------------------------- |
153
+ | `default` | int | `3` | 全局默认并行度(适用于所有未在 `models` 中显式覆盖的模型);取值范围 `[1, 20]` |
154
+ | `models` | map[str → int] | `{}` | 按映射后模型名(如 `glm-5v-turbo` / `glm-5.1` / `glm-4.5-air`)自定义并行度上限 |
155
+
156
+ YAML 示例:
157
+
158
+ ```yaml
159
+ - vendor: zhipu
160
+ concurrency:
161
+ default: 3
162
+ models:
163
+ glm-5v-turbo: 5
164
+ glm-5.1: 2
165
+ ```
166
+
167
+ 行为语义:
168
+
169
+ - 信号量按**映射后模型名**键控,与上游真实承载模型对齐;流式与非流式请求共用同一槽位。
170
+ - 槽位满时新请求按 FIFO 顺序排队,直到任一在途请求释放槽位才被唤醒。
171
+ - 429 重试期间持续占用槽位(重试视为同一请求的延续)。
172
+ - 顶层 `concurrency` 字段缺省为 `None` → 转发至 `ZhipuConfig` 时回退默认值 `default=3`;如需完全关闭限流,可在 `ZhipuConfig` 构造层显式置 `null`(一般无需操作)。
173
+
146
174
  ---
147
175
 
148
176
  ## 6. 供应商专属字段
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "coding-proxy"
3
- version = "0.4.1a8"
3
+ version = "0.4.1a10"
4
4
  description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -119,6 +119,14 @@ vendors:
119
119
  window_hours: 24.0
120
120
  threshold_percent: 95.0
121
121
  probe_interval_seconds: 300
122
+ # 每模型并发限制:默认 3 个并行请求;超出则按 FIFO 排队等待
123
+ # 可通过 models 字段覆盖单个模型的限制(如 glm-5.1: 5)
124
+ concurrency:
125
+ default: 3
126
+ # models:
127
+ # glm-5v-turbo: 3
128
+ # glm-5.1: 3
129
+ # glm-4.5-air: 3
122
130
 
123
131
  # Vendor 4: MiniMax(默认禁用,需手动启用并添加到 tiers)
124
132
  - vendor: minimax
@@ -9,6 +9,7 @@ from typing import Annotated, Any, Literal
9
9
  from pydantic import BaseModel, BeforeValidator, Field, PrivateAttr, model_validator
10
10
 
11
11
  from .resiliency import CircuitBreakerConfig, QuotaGuardConfig, RetryConfig
12
+ from .vendors import ZhipuConcurrencyConfig
12
13
 
13
14
  # ── 价格字段解析($ / ¥ 前缀支持) ──────────────────────────
14
15
 
@@ -64,13 +65,13 @@ _NATIVE_ANTHROPIC_FIELDS: frozenset[str] = frozenset(
64
65
  "api_key",
65
66
  }
66
67
  )
67
- # 向后兼容别名
68
- _ZHIPU_FIELDS = _NATIVE_ANTHROPIC_FIELDS
68
+ # Zhipu 独占字段:在通用 api_key 基础上增加每模型并发限制
69
+ _ZHIPU_FIELDS: frozenset[str] = _NATIVE_ANTHROPIC_FIELDS | frozenset({"concurrency"})
69
70
 
70
71
  _VENDOR_EXCLUSIVE_FIELDS: dict[str, frozenset[str]] = {
71
72
  "copilot": _COPILOT_FIELDS,
72
73
  "antigravity": _ANTIGRAVITY_FIELDS,
73
- "zhipu": _NATIVE_ANTHROPIC_FIELDS,
74
+ "zhipu": _ZHIPU_FIELDS,
74
75
  "minimax": _NATIVE_ANTHROPIC_FIELDS,
75
76
  "kimi": _NATIVE_ANTHROPIC_FIELDS,
76
77
  "doubao": _NATIVE_ANTHROPIC_FIELDS,
@@ -285,6 +286,12 @@ class VendorConfig(BaseModel):
285
286
  quota_guard: QuotaGuardConfig = Field(default_factory=QuotaGuardConfig)
286
287
  weekly_quota_guard: QuotaGuardConfig = Field(default_factory=QuotaGuardConfig)
287
288
 
289
+ # ── Zhipu 专属:每模型并发限制 ───────────────────────────
290
+ concurrency: ZhipuConcurrencyConfig | None = Field(
291
+ default=None,
292
+ description="[zhipu] 每模型并发限制;None 表示不限并发",
293
+ )
294
+
288
295
  @model_validator(mode="after")
289
296
  def _warn_irrelevant_fields(self) -> VendorConfig:
290
297
  """对非当前 vendor 类型的非空专属字段发出 warning."""
@@ -54,6 +54,7 @@ from .vendors import ( # noqa: F401
54
54
  KimiConfig,
55
55
  MinimaxConfig,
56
56
  XiaomiConfig,
57
+ ZhipuConcurrencyConfig,
57
58
  ZhipuConfig,
58
59
  )
59
60
 
@@ -318,6 +319,7 @@ __all__ = [
318
319
  "CopilotConfig",
319
320
  "AntigravityConfig",
320
321
  "ZhipuConfig",
322
+ "ZhipuConcurrencyConfig",
321
323
  # resiliency
322
324
  "CircuitBreakerConfig",
323
325
  "RetryConfig",
@@ -2,7 +2,21 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class ZhipuConcurrencyConfig(BaseModel):
9
+ """Zhipu 每模型并发限制配置."""
10
+
11
+ default: int = Field(default=3, ge=1, le=20, description="全局默认并行度")
12
+ models: dict[str, int] = Field(
13
+ default_factory=dict,
14
+ description="按映射后模型名自定义并行度(覆盖 default)",
15
+ )
16
+
17
+ def get_limit(self, model: str) -> int:
18
+ """获取指定模型的并行度限制."""
19
+ return self.models.get(model, self.default)
6
20
 
7
21
 
8
22
  class AnthropicConfig(BaseModel):
@@ -48,6 +62,7 @@ class ZhipuConfig(BaseModel):
48
62
  base_url: str = "https://open.bigmodel.cn/api/anthropic"
49
63
  api_key: str = ""
50
64
  timeout_ms: int = 3000000
65
+ concurrency: ZhipuConcurrencyConfig = Field(default_factory=ZhipuConcurrencyConfig)
51
66
 
52
67
 
53
68
  class MinimaxConfig(BaseModel):
@@ -100,6 +115,7 @@ __all__ = [
100
115
  "CopilotConfig",
101
116
  "AntigravityConfig",
102
117
  "ZhipuConfig",
118
+ "ZhipuConcurrencyConfig",
103
119
  "MinimaxConfig",
104
120
  "KimiConfig",
105
121
  "DoubaoConfig",
@@ -6,6 +6,7 @@
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ import json
9
10
  import logging
10
11
  import re
11
12
  import time
@@ -175,6 +176,124 @@ def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str:
175
176
  return f" [{', '.join(parts)}]" if parts else ""
176
177
 
177
178
 
179
+ def _build_semantic_rejection_diagnostic(body: dict[str, Any]) -> str:
180
+ """构建语义拒绝的请求体诊断上下文.
181
+
182
+ 在 semantic rejection 日志中附加请求体的可疑参数快照,
183
+ 用于定位供应商参数校验失败的具体祸根参数。
184
+
185
+ 覆盖范围:
186
+ * 模型 / messages 数(baseline)
187
+ * thinking 系列顶层参数 + history thinking_blocks 数
188
+ * system 形态(string / blocks,含 cache_control 计数)
189
+ * tools 数量 + tool_choice 形态
190
+ * 采样参数(max_tokens / temperature / top_p / top_k / stop_sequences)
191
+ * stream / metadata 形态
192
+ * cache_control 存在性
193
+ * messages.content 类型分布
194
+ * 请求体大小估算(json.dumps 字节数)
195
+ """
196
+ parts: list[str] = []
197
+
198
+ # ── 模型 + 消息数(baseline,始终输出)──
199
+ parts.append(f"model={body.get('model', 'N/A')}")
200
+ parts.append(f"messages={len(body.get('messages', []))}")
201
+
202
+ # ── 顶层 thinking 系列参数 ──
203
+ for key in ("thinking", "extended_thinking", "reasoning_effort"):
204
+ if key in body:
205
+ val = body[key]
206
+ parts.append(f"{key}={val!r:.80}")
207
+
208
+ # ── system 形态 ──
209
+ system = body.get("system")
210
+ if isinstance(system, str):
211
+ parts.append(f"system_kind=string(len={len(system)})")
212
+ elif isinstance(system, list):
213
+ cc_count = sum(
214
+ 1 for item in system if isinstance(item, dict) and "cache_control" in item
215
+ )
216
+ if cc_count:
217
+ parts.append(f"system_blocks={len(system)},cc={cc_count}")
218
+ else:
219
+ parts.append(f"system_blocks={len(system)}")
220
+
221
+ # ── tools 与 tool_choice ──
222
+ tools = body.get("tools")
223
+ if isinstance(tools, list):
224
+ parts.append(f"tools={len(tools)}")
225
+ tool_choice = body.get("tool_choice")
226
+ if tool_choice is not None:
227
+ parts.append(f"tool_choice={tool_choice!r:.60}")
228
+
229
+ # ── 采样参数(仅存在时输出)──
230
+ for key in ("max_tokens", "temperature", "top_p", "top_k"):
231
+ if key in body:
232
+ parts.append(f"{key}={body[key]!r:.40}")
233
+ stop_sequences = body.get("stop_sequences")
234
+ if isinstance(stop_sequences, list) and stop_sequences:
235
+ parts.append(f"stop_sequences={len(stop_sequences)}")
236
+
237
+ # ── stream / metadata ──
238
+ if "stream" in body:
239
+ parts.append(f"stream={body['stream']}")
240
+ metadata = body.get("metadata")
241
+ if isinstance(metadata, dict) and metadata:
242
+ parts.append(f"metadata_keys={len(metadata)}")
243
+
244
+ # ── 会话历史中的 thinking blocks 与 content_types 分布 ──
245
+ thinking_count = 0
246
+ content_type_counts: dict[str, int] = {}
247
+ for msg in body.get("messages", []):
248
+ content = msg.get("content")
249
+ if isinstance(content, str):
250
+ content_type_counts["string"] = content_type_counts.get("string", 0) + 1
251
+ continue
252
+ if not isinstance(content, list):
253
+ continue
254
+ for block in content:
255
+ if not isinstance(block, dict):
256
+ continue
257
+ btype = block.get("type")
258
+ if isinstance(btype, str):
259
+ content_type_counts[btype] = content_type_counts.get(btype, 0) + 1
260
+ if btype in ("thinking", "redacted_thinking"):
261
+ thinking_count += 1
262
+ if thinking_count:
263
+ parts.append(f"thinking_blocks_in_history={thinking_count}")
264
+ if content_type_counts:
265
+ type_repr = ",".join(f"{k}:{v}" for k, v in sorted(content_type_counts.items()))
266
+ parts.append(f"content_types={{{type_repr}}}")
267
+
268
+ # ── cache_control 存在检测(messages / tools,不含 system 因已单独统计)──
269
+ has_cc = False
270
+ sections: list[Any] = []
271
+ for m in body.get("messages", []):
272
+ if isinstance(m.get("content"), list):
273
+ sections.append(m["content"])
274
+ if isinstance(body.get("tools"), list):
275
+ sections.append(body["tools"])
276
+ for section in sections:
277
+ for item in section:
278
+ if isinstance(item, dict) and "cache_control" in item:
279
+ has_cc = True
280
+ break
281
+ if has_cc:
282
+ break
283
+ if has_cc:
284
+ parts.append("cache_control_fields=present")
285
+
286
+ # ── 请求体大小估算 ──
287
+ try:
288
+ body_bytes = len(json.dumps(body, ensure_ascii=False).encode("utf-8"))
289
+ parts.append(f"body_bytes={body_bytes}")
290
+ except (TypeError, ValueError):
291
+ # 极少数情况下 body 含非可序列化对象,跳过
292
+ pass
293
+
294
+ return f" [{', '.join(parts)}]" if parts else ""
295
+
296
+
178
297
  def _log_http_error_detail(
179
298
  tier_name: str,
180
299
  exc: Exception,
@@ -156,13 +156,17 @@ def _create_vendor_from_config(
156
156
  cfg = _resolve_antigravity_credentials(cfg, token_store)
157
157
  return AntigravityVendor(cfg, failover_cfg, mapper)
158
158
  case "zhipu":
159
- cfg = ZhipuConfig(
160
- enabled=vendor_cfg.enabled,
161
- base_url=vendor_cfg.base_url
159
+ zhipu_kwargs: dict[str, Any] = {
160
+ "enabled": vendor_cfg.enabled,
161
+ "base_url": vendor_cfg.base_url
162
162
  or "https://open.bigmodel.cn/api/anthropic",
163
- api_key=vendor_cfg.api_key,
164
- timeout_ms=vendor_cfg.timeout_ms,
165
- )
163
+ "api_key": vendor_cfg.api_key,
164
+ "timeout_ms": vendor_cfg.timeout_ms,
165
+ }
166
+ # 仅当显式配置了 concurrency 时转发,否则使用 ZhipuConfig 默认值
167
+ if vendor_cfg.concurrency is not None:
168
+ zhipu_kwargs["concurrency"] = vendor_cfg.concurrency
169
+ cfg = ZhipuConfig(**zhipu_kwargs)
166
170
  return ZhipuVendor(cfg, mapper, failover_cfg)
167
171
  case "minimax":
168
172
  cfg = MinimaxConfig(
@@ -0,0 +1,78 @@
1
+ """每模型并发限制器 — 基于 asyncio.Semaphore 的公平排队.
2
+
3
+ 为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``asyncio.Semaphore``,
4
+ 确保同一时间点该模型的并行请求数不超过配置的上限。当所有槽位被占满时,
5
+ 新请求按 FIFO 顺序排队等待,直到有槽位释放。
6
+
7
+ 设计要点:
8
+ - **惰性创建**:仅在首次请求到达时才为该模型创建 Semaphore,避免冷启动开销
9
+ - **FIFO 公平**:``asyncio.Semaphore`` 内部使用 FIFO 队列,天然满足排队语义
10
+ - **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名(如 ``claude-sonnet-*``)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ import logging
17
+
18
+ from ..config.vendors import ZhipuConcurrencyConfig
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class ModelConcurrencyLimiter:
24
+ """按模型名提供独立并发槽位的限制器.
25
+
26
+ 用法::
27
+
28
+ limiter = ModelConcurrencyLimiter(config)
29
+ sem = await limiter.acquire("glm-5v-turbo")
30
+ try:
31
+ ... # 执行请求
32
+ finally:
33
+ sem.release()
34
+ """
35
+
36
+ def __init__(self, config: ZhipuConcurrencyConfig) -> None:
37
+ self._config = config
38
+ self._semaphores: dict[str, asyncio.Semaphore] = {}
39
+
40
+ def _get_semaphore(self, model: str) -> asyncio.Semaphore:
41
+ """获取(或惰性创建)指定模型的信号量."""
42
+ sem = self._semaphores.get(model)
43
+ if sem is None:
44
+ limit = self._config.get_limit(model)
45
+ sem = asyncio.Semaphore(limit)
46
+ self._semaphores[model] = sem
47
+ logger.debug(
48
+ "ModelConcurrencyLimiter: created semaphore model=%s limit=%d",
49
+ model,
50
+ limit,
51
+ )
52
+ return sem
53
+
54
+ async def acquire(self, model: str) -> asyncio.Semaphore:
55
+ """获取指定模型的并发槽位,必要时阻塞排队.
56
+
57
+ 返回已获取的 Semaphore 实例,调用方负责在请求完成后调用 ``release()``。
58
+ """
59
+ sem = self._get_semaphore(model)
60
+ await sem.acquire()
61
+ return sem
62
+
63
+ def get_diagnostics(self) -> dict[str, dict[str, int]]:
64
+ """返回每个模型的并发状态快照(用于可观测性)."""
65
+ snapshot: dict[str, dict[str, int]] = {}
66
+ for model, sem in self._semaphores.items():
67
+ limit = self._config.get_limit(model)
68
+ # asyncio.Semaphore 内部 _value 表示剩余可用槽位
69
+ available = sem._value # noqa: SLF001 — 公开 API 未暴露
70
+ snapshot[model] = {
71
+ "limit": limit,
72
+ "in_use": max(limit - available, 0),
73
+ "available": max(available, 0),
74
+ }
75
+ return snapshot
76
+
77
+
78
+ __all__ = ["ModelConcurrencyLimiter"]
@@ -34,6 +34,7 @@ from ..routing.rate_limit import (
34
34
  )
35
35
  from ..routing.retry import RetryConfig, calculate_delay
36
36
  from .base import VendorResponse
37
+ from .concurrency import ModelConcurrencyLimiter
37
38
  from .native_anthropic import NativeAnthropicVendor
38
39
 
39
40
  logger = logging.getLogger(__name__)
@@ -68,6 +69,12 @@ class ZhipuVendor(NativeAnthropicVendor):
68
69
  ) -> None:
69
70
  super().__init__(config, model_mapper, failover_config)
70
71
  self._rl_retry = _RATE_LIMIT_RETRY
72
+ # 每模型并发限制器(config.concurrency 为 None 时禁用)
73
+ self._concurrency_limiter: ModelConcurrencyLimiter | None = (
74
+ ModelConcurrencyLimiter(config.concurrency)
75
+ if config.concurrency is not None
76
+ else None
77
+ )
71
78
 
72
79
  # ── 非流式:429 重试 ────────────────────────────────────
73
80
 
@@ -76,7 +83,24 @@ class ZhipuVendor(NativeAnthropicVendor):
76
83
  request_body: dict[str, Any],
77
84
  headers: dict[str, str],
78
85
  ) -> VendorResponse:
79
- """非流式请求,429 时自动重试."""
86
+ """非流式请求,429 时自动重试.
87
+
88
+ 在 429 重试循环外层套上每模型并发槽位获取,确保同一时间点同一模型的
89
+ 在途请求数不超过配置上限;超过时新请求 FIFO 排队等待。
90
+ """
91
+ sem = await self._maybe_acquire_concurrency_slot(request_body)
92
+ try:
93
+ return await self._send_message_with_retry(request_body, headers)
94
+ finally:
95
+ if sem is not None:
96
+ sem.release()
97
+
98
+ async def _send_message_with_retry(
99
+ self,
100
+ request_body: dict[str, Any],
101
+ headers: dict[str, str],
102
+ ) -> VendorResponse:
103
+ """原 send_message 主体逻辑(不含并发控制)."""
80
104
  max_attempts = self._rl_retry.max_attempts
81
105
 
82
106
  for attempt in range(max_attempts):
@@ -116,42 +140,71 @@ class ZhipuVendor(NativeAnthropicVendor):
116
140
  安全性:429 在 BaseVendor.send_message_stream 中于
117
141
  status code 检查阶段即 raise(在任何 chunk yield 之前),
118
142
  因此重试不会导致已发出数据不一致。
143
+
144
+ 在 429 重试循环外层套上每模型并发槽位获取,确保流式请求与非流式请求
145
+ 共用同一信号量,统一限制同一模型的总在途并发数。
119
146
  """
147
+ sem = await self._maybe_acquire_concurrency_slot(request_body)
120
148
  max_attempts = self._rl_retry.max_attempts
121
149
 
122
- for attempt in range(max_attempts):
123
- try:
124
- # 429 在 status code 检查阶段即 raise(在任何 chunk 之前),
125
- # 因此 __anext__ 安全:要么拿到首个 chunk,要么抛异常。
126
- ait = super().send_message_stream(request_body, headers)
127
- head = await ait.__anext__()
128
- except StopAsyncIteration:
129
- return
130
- except httpx.HTTPStatusError as exc:
131
- if exc.response is None or exc.response.status_code != 429:
132
- raise
133
- if attempt == max_attempts - 1:
134
- logger.warning(
135
- "Zhipu 429 stream rate limit exhausted after %d attempts",
136
- max_attempts,
150
+ try:
151
+ for attempt in range(max_attempts):
152
+ try:
153
+ # 429 status code 检查阶段即 raise(在任何 chunk 之前),
154
+ # 因此 __anext__ 安全:要么拿到首个 chunk,要么抛异常。
155
+ ait = super().send_message_stream(request_body, headers)
156
+ head = await ait.__anext__()
157
+ except StopAsyncIteration:
158
+ return
159
+ except httpx.HTTPStatusError as exc:
160
+ if exc.response is None or exc.response.status_code != 429:
161
+ raise
162
+ if attempt == max_attempts - 1:
163
+ logger.warning(
164
+ "Zhipu 429 stream rate limit exhausted after %d attempts",
165
+ max_attempts,
166
+ )
167
+ raise
168
+
169
+ delay = self._compute_retry_delay_from_response(
170
+ exc.response, attempt
137
171
  )
138
- raise
139
-
140
- delay = self._compute_retry_delay_from_response(exc.response, attempt)
141
- logger.info(
142
- "Zhipu 429 stream rate limit, retry %d/%d in %.1fms",
143
- attempt + 1,
144
- max_attempts - 1,
145
- delay,
146
- )
147
- await asyncio.sleep(delay / 1000.0)
148
- continue
149
-
150
- # yield 在 try/except 之外,避免捕获外部 athrow 的异常
151
- yield head
152
- async for chunk in ait:
153
- yield chunk
154
- return
172
+ logger.info(
173
+ "Zhipu 429 stream rate limit, retry %d/%d in %.1fms",
174
+ attempt + 1,
175
+ max_attempts - 1,
176
+ delay,
177
+ )
178
+ await asyncio.sleep(delay / 1000.0)
179
+ continue
180
+
181
+ # yield 在 try/except 之外,避免捕获外部 athrow 的异常
182
+ yield head
183
+ async for chunk in ait:
184
+ yield chunk
185
+ return
186
+ finally:
187
+ if sem is not None:
188
+ sem.release()
189
+
190
+ # ── 并发控制 ────────────────────────────────────────────
191
+
192
+ async def _maybe_acquire_concurrency_slot(
193
+ self,
194
+ request_body: dict[str, Any],
195
+ ) -> asyncio.Semaphore | None:
196
+ """按映射后模型名获取并发槽位;未配置 concurrency 时返回 None.
197
+
198
+ ``map_model()`` 是纯同步字典查找,在 Semaphore 等待前调用是安全的,
199
+ 且能确保排队键与上游真实承载模型对齐。
200
+ """
201
+ if self._concurrency_limiter is None:
202
+ return None
203
+ raw_model = request_body.get("model", "") if request_body else ""
204
+ mapped_model = self.map_model(raw_model) if raw_model else ""
205
+ if not mapped_model:
206
+ return None
207
+ return await self._concurrency_limiter.acquire(mapped_model)
155
208
 
156
209
  # ── 延迟计算 ────────────────────────────────────────────
157
210