coding-proxy 0.4.1a11__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/AGENTS.md +3 -2
  2. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/CHANGELOG.md +23 -1
  3. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/PKG-INFO +1 -1
  4. coding_proxy-0.5.0/assets/model-calling-v0.5.0.png +0 -0
  5. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/pyproject.toml +1 -1
  6. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/server/dashboard.py +112 -1
  7. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/server/routes.py +56 -0
  8. coding_proxy-0.5.0/src/coding/proxy/vendors/concurrency.py +162 -0
  9. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/zhipu.py +7 -0
  10. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_zhipu_concurrency.py +11 -11
  11. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/uv.lock +1 -1
  12. coding_proxy-0.4.1a11/src/coding/proxy/vendors/concurrency.py +0 -83
  13. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/.github/workflows/ci.yml +0 -0
  14. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/.github/workflows/coverage.yml +0 -0
  15. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/.github/workflows/release.yml +0 -0
  16. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/.gitignore +0 -0
  17. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/.pre-commit-config.yaml +0 -0
  18. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/CLAUDE.md +0 -0
  19. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/LICENSE +0 -0
  20. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/README.md +0 -0
  21. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/assets/dashboard-v0.4.0.png +0 -0
  22. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/assets/session-v0.4.0.png +0 -0
  23. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/agents/browser-validation.md +0 -0
  24. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/agents/issue.md +0 -0
  25. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/agents/knowledge-map.md +0 -0
  26. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/agents/reference-specifications.md +0 -0
  27. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/arch/config-reference.md +0 -0
  28. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/arch/convert.md +0 -0
  29. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/arch/design-patterns.md +0 -0
  30. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/arch/routing.md +0 -0
  31. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/arch/testing.md +0 -0
  32. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/arch/vendors.md +0 -0
  33. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/framework.md +0 -0
  34. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/guide/api-reference.md +0 -0
  35. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/guide/cli-reference.md +0 -0
  36. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/guide/dashboard.md +0 -0
  37. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/guide/monitoring.md +0 -0
  38. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/guide/quickstart.md +0 -0
  39. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/guide/vendors.md +0 -0
  40. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/ops/ci-cd.md +0 -0
  41. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/user-guide.md +0 -0
  42. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/docs/zh-CN/README.md +0 -0
  43. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/__init__.py +0 -0
  44. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/__init__.py +0 -0
  45. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/__main__.py +0 -0
  46. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/auth/__init__.py +0 -0
  47. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/auth/providers/__init__.py +0 -0
  48. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/auth/providers/base.py +0 -0
  49. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/auth/providers/github.py +0 -0
  50. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/auth/providers/google.py +0 -0
  51. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/auth/runtime.py +0 -0
  52. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/auth/store.py +0 -0
  53. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/cli/__init__.py +0 -0
  54. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/cli/auth_commands.py +0 -0
  55. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/cli/banner.py +0 -0
  56. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/compat/__init__.py +0 -0
  57. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/compat/canonical.py +0 -0
  58. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/compat/session_store.py +0 -0
  59. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/__init__.py +0 -0
  60. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/auth_schema.py +0 -0
  61. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/config.default.yaml +0 -0
  62. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/loader.py +0 -0
  63. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/resiliency.py +0 -0
  64. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/routing.py +0 -0
  65. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/schema.py +0 -0
  66. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/server.py +0 -0
  67. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/session_policy.py +0 -0
  68. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/config/vendors.py +0 -0
  69. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/convert/__init__.py +0 -0
  70. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/convert/anthropic_to_gemini.py +0 -0
  71. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/convert/anthropic_to_openai.py +0 -0
  72. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/convert/gemini_sse_adapter.py +0 -0
  73. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/convert/gemini_to_anthropic.py +0 -0
  74. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/convert/openai_to_anthropic.py +0 -0
  75. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/convert/vendor_channels.py +0 -0
  76. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/logging/__init__.py +0 -0
  77. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/logging/db.py +0 -0
  78. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/logging/formatters.py +0 -0
  79. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/logging/stats.py +0 -0
  80. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/model/__init__.py +0 -0
  81. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/model/auth.py +0 -0
  82. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/model/compat.py +0 -0
  83. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/model/constants.py +0 -0
  84. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/model/pricing.py +0 -0
  85. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/model/token.py +0 -0
  86. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/model/vendor.py +0 -0
  87. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/__init__.py +0 -0
  88. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/config.py +0 -0
  89. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/extractors/__init__.py +0 -0
  90. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/extractors/anthropic.py +0 -0
  91. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/extractors/gemini.py +0 -0
  92. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/extractors/openai.py +0 -0
  93. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/handler.py +0 -0
  94. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/operation.py +0 -0
  95. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/routes.py +0 -0
  96. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/native_api/usage_registry.py +0 -0
  97. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/pricing.py +0 -0
  98. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/__init__.py +0 -0
  99. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/circuit_breaker.py +0 -0
  100. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/error_classifier.py +0 -0
  101. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/executor.py +0 -0
  102. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/model_mapper.py +0 -0
  103. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/quota_guard.py +0 -0
  104. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/rate_limit.py +0 -0
  105. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/retry.py +0 -0
  106. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/router.py +0 -0
  107. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/session_manager.py +0 -0
  108. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/session_policy.py +0 -0
  109. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/tier.py +0 -0
  110. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/usage_parser.py +0 -0
  111. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/routing/usage_recorder.py +0 -0
  112. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/server/__init__.py +0 -0
  113. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/server/app.py +0 -0
  114. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/server/factory.py +0 -0
  115. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/server/responses.py +0 -0
  116. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/streaming/__init__.py +0 -0
  117. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/streaming/anthropic_compat.py +0 -0
  118. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/__init__.py +0 -0
  119. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/alibaba.py +0 -0
  120. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/anthropic.py +0 -0
  121. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/antigravity.py +0 -0
  122. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/base.py +0 -0
  123. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/copilot.py +0 -0
  124. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/copilot_models.py +0 -0
  125. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/copilot_token_manager.py +0 -0
  126. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/copilot_urls.py +0 -0
  127. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/doubao.py +0 -0
  128. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/kimi.py +0 -0
  129. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/minimax.py +0 -0
  130. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/mixins.py +0 -0
  131. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/native_anthropic.py +0 -0
  132. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/token_manager.py +0 -0
  133. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/src/coding/proxy/vendors/xiaomi.py +0 -0
  134. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/__init__.py +0 -0
  135. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/e2e/__init__.py +0 -0
  136. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/e2e/conftest.py +0 -0
  137. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/e2e/test_e2e_http.py +0 -0
  138. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/e2e/test_e2e_token.py +0 -0
  139. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/e2e/test_e2e_vendor.py +0 -0
  140. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_antigravity.py +0 -0
  141. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_app_routes.py +0 -0
  142. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_auto_login.py +0 -0
  143. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_banner.py +0 -0
  144. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_circuit_breaker.py +0 -0
  145. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_cli_usage.py +0 -0
  146. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_compat.py +0 -0
  147. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_config_init.py +0 -0
  148. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_config_loader.py +0 -0
  149. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_convert_request.py +0 -0
  150. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_convert_response.py +0 -0
  151. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_convert_sse.py +0 -0
  152. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_copilot.py +0 -0
  153. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_copilot_convert_request.py +0 -0
  154. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_copilot_convert_response.py +0 -0
  155. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_copilot_models.py +0 -0
  156. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_copilot_urls.py +0 -0
  157. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_currency.py +0 -0
  158. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_error_classifier.py +0 -0
  159. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_logging_dual_write.py +0 -0
  160. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_mixins.py +0 -0
  161. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_model_auth.py +0 -0
  162. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_model_compat.py +0 -0
  163. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_model_constants.py +0 -0
  164. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_model_mapper.py +0 -0
  165. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_model_pricing.py +0 -0
  166. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_model_token.py +0 -0
  167. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_model_vendor.py +0 -0
  168. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_native_api_base_url_override.py +0 -0
  169. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_native_api_extractors.py +0 -0
  170. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_native_api_handler.py +0 -0
  171. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_native_api_operation.py +0 -0
  172. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_native_api_routes.py +0 -0
  173. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_native_vendors.py +0 -0
  174. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_parse_usage.py +0 -0
  175. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_parse_usage_gemini.py +0 -0
  176. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_pricing.py +0 -0
  177. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_quota_guard.py +0 -0
  178. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_rate_limit.py +0 -0
  179. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_router_chain.py +0 -0
  180. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_router_executor.py +0 -0
  181. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_runtime_reauth.py +0 -0
  182. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_schema.py +0 -0
  183. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_session_aware.py +0 -0
  184. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_streaming_anthropic_compat.py +0 -0
  185. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_tier.py +0 -0
  186. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_tiers_config.py +0 -0
  187. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_time_range.py +0 -0
  188. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_token_logger.py +0 -0
  189. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_token_logger_native_columns.py +0 -0
  190. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_token_manager.py +0 -0
  191. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_types.py +0 -0
  192. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_vendor_channels.py +0 -0
  193. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_vendor_streaming.py +0 -0
  194. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_vendors.py +0 -0
  195. {coding_proxy-0.4.1a11 → coding_proxy-0.5.0}/tests/test_zhipu.py +0 -0
@@ -44,14 +44,15 @@
44
44
  1. **Python**: 严禁使用 pip/poetry,**必须**统一使用 `uv` 进行包管理与脚本执行(如 `uv run`);
45
45
  2. **JavaScript/TypeScript**: 严禁使用 npm/yarn,**必须**统一使用 `pnpm` 进行包管理与脚本执行;
46
46
  - **Database Management**: 谨慎操作,数据迁移、测试等操作严禁将现有数据删除,谨慎操作数据迁移的回滚,防止数据被清理。
47
+ - **In-depth and close to the facts**:系统且全面地进行问题的分析,深入贴近事实,如有疑问,需先发问,不要乱做决定。
47
48
  - **Browser Validation Protocol (浏览器验证准则)**:Agent 不得自行完成、绕过或模拟任何 OAuth / SSO 认证流程,所有登录态均来源于用户已认证的 Chrome 主 profile(真实用户登录态)。完整协议(连通性自检、凭证管理、E2E 集成、实机回归等)详见 [浏览器验证协议](./docs/agents/browser-validation.md);
48
49
  1. **安全红线**:禁止在 Sandbox 浏览器中跳转 Google 同意屏;禁止以模拟用户或第三方账号替代真实登录态;禁止要求用户在 chat 中粘贴密码、Cookie 或验证码;
49
50
  - **Knowledge Map (知识索引)**:项目所有文档索引统一维护在 [知识索引](./docs/agents/knowledge-map.md),并在文档目录变更时即时同步跟新;
50
51
  - **Documentation Standards (文档规范)**:
51
- 1. **Visual Documentation (图文并茂)**: 对于复杂逻辑,优先 **Mermaid Visualization Norms (Mermaid 可视化规范)**,构建“图文并茂”的直观文档;
52
+ 1. **Visual Documentation (图文并茂)**: 对于复杂逻辑,优先 **Mermaid Visualization Norms (Mermaid 可视化规范)**,构建”图文并茂”的直观文档;
52
53
  - **色彩语义与兼容性**:为图表节点配置具备语义辨识度的色彩,并确保在深色模式(Dark Mode)下具有极高的对比度与清晰度;
53
54
  - **逻辑模块化解构**:针对业务跨度较大的架构流程,强制采用 `subgraph` 容器进行层级解构与边界划分,以增强图表的自解说(Self-explaining)能力;
54
55
  2. **语言叙事**:用语精准,叙事完备,行文专业,聚焦核心,篇幅精炼,形象具体,体现真实作用与用户吸引性,字数恰当;
55
- 3. **Direct Hyperlinking (直接跳转)**: 在文档中提及 Repo 内其他资源(文档/代码)时,**必须**构建可跳转的相对路径链接(如 `[Doc Name](./path.md)`),严禁使用“死文本”引用,以降低信息检索熵;
56
+ 3. **Direct Hyperlinking (直接跳转)**: 在文档中提及 Repo 内其他资源(文档/代码)时,**必须**构建可跳转的相对路径链接(如 `[Doc Name](./path.md)`),严禁使用”死文本”引用,以降低信息检索熵;
56
57
  4. **实操截图**:文档需要引入必要的浏览器实操截图时,需自行通过默认浏览器打开相关页面,通过实操现场截图并保留到文档路径进行文档引用;
57
58
  - **Reference Specifications (IEEE)**:为保障工程决策的可追溯性与学术严谨性,核心引用需遵循 [reference-specifications.md](docs/agents/reference-specifications.md)IEEE 标准引用格式;
@@ -4,7 +4,29 @@
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
- - feat(zhipu): 新增每模型并发限制(默认 3,可通过 `vendors[zhipu].concurrency` 配置),基于 `asyncio.Semaphore` 实现 FIFO 公平排队,流式与非流式共用同一槽位,与 429 重试机制兼容。
7
+ ## [v0.5.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.5.0) - 2026-05-27
8
+
9
+ > [!IMPORTANT]
10
+ >
11
+ > **🚀 Model Calling 实时状态!**
12
+ >
13
+ > 模型并发与排队深度一目了然,运行时动态调整每个模型并行度,预防 vendor 侧的 429 幺蛾子。
14
+
15
+ ![model-calling](assets/model-calling-v0.5.0.png)
16
+
17
+ ### ✨ 核心亮点
18
+
19
+ - feat(concurrency): 新增 Model Calling 实时状态模块,可视化每模型并发与排队深度,支持运行时动态修改每模型并行度 (#250) (#251)
20
+ - feat(zhipu): 新增每模型并发限制,默认 3 个并行请求 FIFO 排队 (#248)
21
+ - feat(zhipu): 为 429 Rate Limit 添加指数退避重试挽回机制 (#242)
22
+
23
+ ### 🔧 更多特性
24
+
25
+ - fix(antigravity): 修复 v1internal 模式检测逻辑并新增 E2E 测试; (#234)
26
+ - fix(routes): 修复 count_tokens 路由对 target_vendor.name 的错误属性访问; (#235)
27
+ - fix(vendor-channels): 修复 zhipu→anthropic 通道 tool_use/tool_result 配对漏洞; (#236)
28
+ - fix(native-api): 修复 Gemini :verb 路径中 %3A URL 编码导致上游 400 的兼容问题; (#237)
29
+ - fix(zhipu): 诊断首选 tier 语义拒绝降级问题,增强可观测性并提取跨供应商清洗共享函数 (#243)
8
30
 
9
31
  ## [v0.4.0](https://github.com/ThreeFish-AI/coding-proxy/releases/tag/v0.4.0) — 2026-05-01
10
32
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coding-proxy
3
- Version: 0.4.1a11
3
+ Version: 0.5.0
4
4
  Summary: A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao...
5
5
  Project-URL: Source Code, https://github.com/ThreeFish-AI/coding-proxy
6
6
  Project-URL: User Guide, https://github.com/ThreeFish-AI/coding-proxy/blob/master/docs/user-guide.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "coding-proxy"
3
- version = "0.4.1a11"
3
+ version = "0.5.0"
4
4
  description = "A High-Availability, Transparent, and Smart Multi-Vendor Proxy for Claude Code. Support Claude Plans, GitHub Copilot, Google Antigravity, ZAI/GLM, MiniMax, Qwen, Xiaomi, Kimi, Doubao..."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -640,6 +640,43 @@ _DASHBOARD_HTML = """<!DOCTYPE html>
640
640
  padding: 1px 6px;
641
641
  border-radius: 3px;
642
642
  }
643
+ .mc-limit-editable {
644
+ cursor: pointer;
645
+ border-bottom: 1px dashed rgba(74,222,128,.4);
646
+ transition: border-color .2s, color .2s;
647
+ }
648
+ .mc-limit-editable:hover {
649
+ border-bottom-color: #4ade80;
650
+ color: #4ade80;
651
+ }
652
+ .mc-limit-input {
653
+ width: 36px;
654
+ background: var(--bg-primary);
655
+ border: 1px solid var(--accent-blue);
656
+ border-radius: 3px;
657
+ color: var(--text-primary);
658
+ font-size: 10px;
659
+ font-family: 'JetBrains Mono', monospace;
660
+ text-align: center;
661
+ padding: 0 2px;
662
+ outline: none;
663
+ -moz-appearance: textfield;
664
+ }
665
+ .mc-limit-input::-webkit-outer-spin-button,
666
+ .mc-limit-input::-webkit-inner-spin-button {
667
+ -webkit-appearance: none;
668
+ margin: 0;
669
+ }
670
+ .mc-limit-flash-ok { animation: mc-flash-ok .6s ease; }
671
+ .mc-limit-flash-err { animation: mc-flash-err .6s ease; }
672
+ @keyframes mc-flash-ok {
673
+ 0%,100% { color: inherit; }
674
+ 40% { color: #4ade80; }
675
+ }
676
+ @keyframes mc-flash-err {
677
+ 0%,100% { color: inherit; }
678
+ 40% { color: #f87171; }
679
+ }
643
680
  </style>
644
681
  </head>
645
682
  <body>
@@ -1268,7 +1305,8 @@ function updateModelCalling(status) {
1268
1305
  + '<span class="mc-model-name">' + escapeHtml(m.vendor + '/' + m.model) + '</span>'
1269
1306
  + '<div class="mc-bar-wrap"><div class="mc-bar-fill ' + barClass + '" style="width:' + pct + '%"></div></div>'
1270
1307
  + '<div class="mc-stats">'
1271
- + '<span class="mc-badge mc-badge-active">' + m.in_use + '/' + m.limit + '</span>'
1308
+ + '<span class="mc-badge mc-badge-active">' + m.in_use
1309
+ + '/<span class="mc-limit-editable" data-tier="' + escapeHtml(m.vendor) + '" data-model="' + escapeHtml(m.model) + '" data-limit="' + m.limit + '" title="点击修改并行度">' + m.limit + '</span></span>'
1272
1310
  + (m.pending > 0 ? '<span class="mc-badge mc-badge-pending">⏳ ' + m.pending + '</span>' : '')
1273
1311
  + '</div>'
1274
1312
  + '</div>';
@@ -1293,6 +1331,79 @@ function stopModelCallingPoll() {
1293
1331
  if (_mcTimer) { clearInterval(_mcTimer); _mcTimer = null; }
1294
1332
  }
1295
1333
 
1334
+ // ── 并行度运行时编辑 ──────────────────────────────────────
1335
+ var _mcEditing = false;
1336
+ document.addEventListener('click', function(e) {
1337
+ if (_mcEditing) return;
1338
+ var el = e.target.closest('.mc-limit-editable');
1339
+ if (!el) return;
1340
+ e.preventDefault();
1341
+ _mcEditing = true;
1342
+ var oldVal = el.getAttribute('data-limit');
1343
+ var tier = el.getAttribute('data-tier');
1344
+ var model = el.getAttribute('data-model');
1345
+ var input = document.createElement('input');
1346
+ input.type = 'number';
1347
+ input.className = 'mc-limit-input';
1348
+ input.min = '1';
1349
+ input.max = '20';
1350
+ input.value = oldVal;
1351
+ el.style.display = 'none';
1352
+ el.parentNode.insertBefore(input, el.nextSibling);
1353
+ input.focus();
1354
+ input.select();
1355
+
1356
+ var _cancelled = false;
1357
+
1358
+ function restore() {
1359
+ _mcEditing = false;
1360
+ if (input.parentNode) input.parentNode.removeChild(input);
1361
+ el.style.display = '';
1362
+ }
1363
+
1364
+ function flash(cls) {
1365
+ el.classList.add(cls);
1366
+ setTimeout(function() { el.classList.remove(cls); }, 600);
1367
+ }
1368
+
1369
+ input.addEventListener('keydown', function(ev) {
1370
+ if (ev.key === 'Escape') { _cancelled = true; restore(); return; }
1371
+ if (ev.key !== 'Enter') return;
1372
+ ev.preventDefault();
1373
+ submit();
1374
+ });
1375
+
1376
+ input.addEventListener('blur', function() {
1377
+ setTimeout(function() { if (!_cancelled) submit(); }, 50);
1378
+ });
1379
+
1380
+ function submit() {
1381
+ if (_cancelled) return;
1382
+ var v = parseInt(input.value, 10);
1383
+ if (isNaN(v) || v < 1 || v > 20) { restore(); flash('mc-limit-flash-err'); return; }
1384
+ if (String(v) === oldVal) { restore(); return; }
1385
+ fetch('/api/concurrency', {
1386
+ method: 'PUT',
1387
+ headers: {'Content-Type': 'application/json'},
1388
+ body: JSON.stringify({tier: tier, model: model, limit: v})
1389
+ }).then(function(res) {
1390
+ if (res.ok) {
1391
+ return res.json().then(function() {
1392
+ el.textContent = v;
1393
+ el.setAttribute('data-limit', v);
1394
+ flash('mc-limit-flash-ok');
1395
+ });
1396
+ } else {
1397
+ flash('mc-limit-flash-err');
1398
+ }
1399
+ }).catch(function() {
1400
+ flash('mc-limit-flash-err');
1401
+ }).finally(function() {
1402
+ restore();
1403
+ });
1404
+ }
1405
+ });
1406
+
1296
1407
  // ── 按 tiers 顺序排序 vendor 列表 ─────────────────────────
1297
1408
  function sortByTierOrder(vendors, tierOrder) {
1298
1409
  if (!tierOrder || !tierOrder.length) return vendors.sort();
@@ -225,6 +225,61 @@ def register_status_route(app: Any, router: Any) -> None:
225
225
  return result
226
226
 
227
227
 
228
+ def register_concurrency_route(app: Any, router: Any) -> None:
229
+ """注册运行时并发限制调整路由."""
230
+
231
+ @app.put("/api/concurrency")
232
+ async def update_concurrency(request: Request) -> Response:
233
+ try:
234
+ body = await request.json()
235
+ except Exception:
236
+ return json_error_response(
237
+ 400, error_type="invalid_request_error", message="body must be JSON"
238
+ )
239
+ tier_name = body.get("tier")
240
+ model = body.get("model")
241
+ limit = body.get("limit")
242
+ if not tier_name or not model or limit is None:
243
+ return json_error_response(
244
+ 400,
245
+ error_type="invalid_request_error",
246
+ message="requires tier, model, limit",
247
+ )
248
+ if not isinstance(limit, int) or limit < 1 or limit > 20:
249
+ return json_error_response(
250
+ 400,
251
+ error_type="invalid_request_error",
252
+ message="limit must be an integer between 1 and 20",
253
+ )
254
+ for tier in router.tiers:
255
+ if tier.name == tier_name:
256
+ vendor = tier.vendor
257
+ update_fn = getattr(vendor, "update_concurrency", None)
258
+ if update_fn is None:
259
+ return json_error_response(
260
+ 400,
261
+ error_type="invalid_request_error",
262
+ message=f"vendor '{tier_name}' does not support concurrency",
263
+ )
264
+ try:
265
+ update_fn(model, limit)
266
+ except (ValueError, AttributeError) as exc:
267
+ return json_error_response(
268
+ 400, error_type="invalid_request_error", message=str(exc)
269
+ )
270
+ return Response(
271
+ content=json.dumps(
272
+ {"ok": True, "tier": tier_name, "model": model, "limit": limit},
273
+ ensure_ascii=False,
274
+ ).encode(),
275
+ status_code=200,
276
+ media_type="application/json",
277
+ )
278
+ return json_error_response(
279
+ 404, error_type="not_found", message=f"tier '{tier_name}' not found"
280
+ )
281
+
282
+
228
283
  def register_copilot_routes(app: Any, router: Any) -> None:
229
284
  """注册 Copilot 诊断与模型探测路由."""
230
285
  from .factory import _find_copilot_vendor
@@ -457,6 +512,7 @@ def register_all_routes(
457
512
  register_core_routes(app, router)
458
513
  register_health_routes(app)
459
514
  register_status_route(app, router)
515
+ register_concurrency_route(app, router)
460
516
  register_copilot_routes(app, router)
461
517
  register_admin_routes(app, router)
462
518
  register_session_vendor_routes(app, router)
@@ -0,0 +1,162 @@
1
+ """每模型并发限制器 — 支持运行时动态调整的公平排队.
2
+
3
+ 为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``_ConcurrencySlot`,
4
+ 确保同一时间点该模型的并行请求数不超过配置的上限。当所有槽位被占满时,
5
+ 新请求按 FIFO 顺序排队等待,直到有槽位释放。
6
+
7
+ 设计要点:
8
+ - **惰性创建**:仅在首次请求到达时才为该模型创建 Slot,避免冷启动开销
9
+ - **FIFO 公平**:``asyncio.Event`` + while 循环天然满足 FIFO 排队语义
10
+ - **动态调整**:支持运行时修改 per-model limit,无需重启进程
11
+ - **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import logging
18
+
19
+ from ..config.vendors import ZhipuConcurrencyConfig
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class _ConcurrencySlot:
25
+ """支持动态 limit 的并发槽位.
26
+
27
+ 使用 ``asyncio.Event`` 作为等待/通知原语,在 ``acquire`` 中 await 等待,
28
+ 在 ``release`` / ``set_limit`` 中唤醒。``set_limit`` 修改上限后立即唤醒
29
+ 所有等待者,由它们重新判断是否可获得槽位。
30
+ """
31
+
32
+ def __init__(self, limit: int) -> None:
33
+ self._limit = limit
34
+ self._in_use: int = 0
35
+ self._pending: int = 0
36
+ self._wake = asyncio.Event()
37
+ self._wake.set()
38
+
39
+ async def acquire(self) -> _ConcurrencySlot:
40
+ """获取一个并发槽位,必要时阻塞排队.
41
+
42
+ 返回 ``self``,调用方在请求完成后调用 ``release()``。
43
+ """
44
+ # Fast path
45
+ if self._in_use < self._limit:
46
+ self._in_use += 1
47
+ return self
48
+ # Slow path — 等待槽位释放
49
+ self._pending += 1
50
+ try:
51
+ while True:
52
+ self._wake.clear()
53
+ await self._wake.wait()
54
+ if self._in_use < self._limit:
55
+ self._in_use += 1
56
+ return self
57
+ finally:
58
+ self._pending -= 1
59
+
60
+ def release(self) -> None:
61
+ """释放一个并发槽位."""
62
+ self._in_use = max(0, self._in_use - 1)
63
+ self._wake.set()
64
+
65
+ def set_limit(self, new_limit: int) -> None:
66
+ """动态调整并发上限.
67
+
68
+ 增大 limit 时立即唤醒等待者;缩小时已持有的槽位不受影响,
69
+ 新 limit 在后续 acquire 中自然生效。
70
+ """
71
+ self._limit = new_limit
72
+ self._wake.set()
73
+
74
+ @property
75
+ def limit(self) -> int:
76
+ return self._limit
77
+
78
+ @property
79
+ def in_use(self) -> int:
80
+ return self._in_use
81
+
82
+ @property
83
+ def available(self) -> int:
84
+ return max(0, self._limit - self._in_use)
85
+
86
+ @property
87
+ def pending(self) -> int:
88
+ return self._pending
89
+
90
+
91
+ class ModelConcurrencyLimiter:
92
+ """按模型名提供独立并发槽位的限制器.
93
+
94
+ 用法::
95
+
96
+ limiter = ModelConcurrencyLimiter(config)
97
+ slot = await limiter.acquire("glm-5v-turbo")
98
+ try:
99
+ ... # 执行请求
100
+ finally:
101
+ slot.release()
102
+ """
103
+
104
+ def __init__(self, config: ZhipuConcurrencyConfig) -> None:
105
+ self._config = config
106
+ self._slots: dict[str, _ConcurrencySlot] = {}
107
+
108
+ def _get_or_create_slot(self, model: str) -> _ConcurrencySlot:
109
+ """获取(或惰性创建)指定模型的并发槽位."""
110
+ slot = self._slots.get(model)
111
+ if slot is None:
112
+ limit = self._config.get_limit(model)
113
+ slot = _ConcurrencySlot(limit)
114
+ self._slots[model] = slot
115
+ logger.debug(
116
+ "ModelConcurrencyLimiter: created slot model=%s limit=%d",
117
+ model,
118
+ limit,
119
+ )
120
+ return slot
121
+
122
+ async def acquire(self, model: str) -> _ConcurrencySlot:
123
+ """获取指定模型的并发槽位,必要时阻塞排队.
124
+
125
+ 返回已获取的 Slot 实例,调用方负责在请求完成后调用 ``release()``。
126
+ """
127
+ slot = self._get_or_create_slot(model)
128
+ await slot.acquire()
129
+ return slot
130
+
131
+ def set_limit(self, model: str, new_limit: int) -> None:
132
+ """运行时修改指定模型的并发上限.
133
+
134
+ 同时更新 config.models 以确保后续惰性创建使用新值。
135
+ """
136
+ slot = self._slots.get(model)
137
+ if slot is None:
138
+ slot = _ConcurrencySlot(new_limit)
139
+ self._slots[model] = slot
140
+ else:
141
+ slot.set_limit(new_limit)
142
+ self._config.models[model] = new_limit
143
+ logger.info(
144
+ "ModelConcurrencyLimiter: updated limit model=%s new_limit=%d",
145
+ model,
146
+ new_limit,
147
+ )
148
+
149
+ def get_diagnostics(self) -> dict[str, dict[str, int]]:
150
+ """返回每个模型的并发状态快照(用于可观测性)."""
151
+ snapshot: dict[str, dict[str, int]] = {}
152
+ for model, slot in self._slots.items():
153
+ snapshot[model] = {
154
+ "limit": slot.limit,
155
+ "in_use": slot.in_use,
156
+ "available": slot.available,
157
+ "pending": slot.pending,
158
+ }
159
+ return snapshot
160
+
161
+
162
+ __all__ = ["ModelConcurrencyLimiter"]
@@ -261,6 +261,13 @@ class ZhipuVendor(NativeAnthropicVendor):
261
261
  diagnostics["concurrency"] = self._concurrency_limiter.get_diagnostics()
262
262
  return diagnostics
263
263
 
264
+ def update_concurrency(self, model: str, limit: int) -> None:
265
+ """运行时更新指定模型的并发限制."""
266
+ if self._concurrency_limiter is None:
267
+ msg = "Concurrency limiter is not enabled for this vendor"
268
+ raise ValueError(msg)
269
+ self._concurrency_limiter.set_limit(model, limit)
270
+
264
271
  # ── 延迟计算 ────────────────────────────────────────────
265
272
 
266
273
  def _compute_retry_delay_from_headers(
@@ -141,12 +141,12 @@ class TestModelConcurrencyLimiter:
141
141
  @pytest.mark.asyncio
142
142
  async def test_lazy_semaphore_creation(self) -> None:
143
143
  limiter = ModelConcurrencyLimiter(ZhipuConcurrencyConfig(default=2))
144
- sem_a = limiter._get_semaphore("model-a")
145
- sem_b = limiter._get_semaphore("model-b")
146
- # 不同模型独立 semaphore
147
- assert sem_a is not sem_b
148
- # 相同模型复用 semaphore
149
- assert limiter._get_semaphore("model-a") is sem_a
144
+ slot_a = limiter._get_or_create_slot("model-a")
145
+ slot_b = limiter._get_or_create_slot("model-b")
146
+ # 不同模型独立 slot
147
+ assert slot_a is not slot_b
148
+ # 相同模型复用 slot
149
+ assert limiter._get_or_create_slot("model-a") is slot_a
150
150
 
151
151
  @pytest.mark.asyncio
152
152
  async def test_acquire_blocks_when_full(self) -> None:
@@ -184,8 +184,8 @@ class TestModelConcurrencyLimiter:
184
184
 
185
185
  def test_diagnostics_snapshot(self) -> None:
186
186
  limiter = ModelConcurrencyLimiter(ZhipuConcurrencyConfig(default=3))
187
- # 触发 semaphore 创建
188
- limiter._get_semaphore("glm-5.1")
187
+ # 触发 slot 创建
188
+ limiter._get_or_create_slot("glm-5.1")
189
189
  snap = limiter.get_diagnostics()
190
190
  assert "glm-5.1" in snap
191
191
  assert snap["glm-5.1"]["limit"] == 3
@@ -459,10 +459,10 @@ class TestZhipuVendorStreamConcurrency:
459
459
  chunks.append(chunk)
460
460
  assert len(chunks) == 2
461
461
 
462
- # 确认 semaphore 当前完全可用
462
+ # 确认 slot 当前完全可用
463
463
  assert vendor._concurrency_limiter is not None
464
- sem = vendor._concurrency_limiter._get_semaphore("glm-5.1")
465
- assert sem._value == 1 # noqa: SLF001
464
+ slot = vendor._concurrency_limiter._get_or_create_slot("glm-5.1")
465
+ assert slot.available == 1
466
466
 
467
467
  @pytest.mark.asyncio
468
468
  async def test_stream_releases_slot_on_error(self) -> None:
@@ -74,7 +74,7 @@ wheels = [
74
74
 
75
75
  [[package]]
76
76
  name = "coding-proxy"
77
- version = "0.4.1a11"
77
+ version = "0.5.0"
78
78
  source = { editable = "." }
79
79
  dependencies = [
80
80
  { name = "aiosqlite" },
@@ -1,83 +0,0 @@
1
- """每模型并发限制器 — 基于 asyncio.Semaphore 的公平排队.
2
-
3
- 为每个映射后的模型(如 ``glm-5v-turbo``)独立维护一个 ``asyncio.Semaphore``,
4
- 确保同一时间点该模型的并行请求数不超过配置的上限。当所有槽位被占满时,
5
- 新请求按 FIFO 顺序排队等待,直到有槽位释放。
6
-
7
- 设计要点:
8
- - **惰性创建**:仅在首次请求到达时才为该模型创建 Semaphore,避免冷启动开销
9
- - **FIFO 公平**:``asyncio.Semaphore`` 内部使用 FIFO 队列,天然满足排队语义
10
- - **按映射后模型名键控**:与上游真实承载能力对齐,而非按客户端请求名(如 ``claude-sonnet-*``)
11
- """
12
-
13
- from __future__ import annotations
14
-
15
- import asyncio
16
- import logging
17
-
18
- from ..config.vendors import ZhipuConcurrencyConfig
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- class ModelConcurrencyLimiter:
24
- """按模型名提供独立并发槽位的限制器.
25
-
26
- 用法::
27
-
28
- limiter = ModelConcurrencyLimiter(config)
29
- sem = await limiter.acquire("glm-5v-turbo")
30
- try:
31
- ... # 执行请求
32
- finally:
33
- sem.release()
34
- """
35
-
36
- def __init__(self, config: ZhipuConcurrencyConfig) -> None:
37
- self._config = config
38
- self._semaphores: dict[str, asyncio.Semaphore] = {}
39
-
40
- def _get_semaphore(self, model: str) -> asyncio.Semaphore:
41
- """获取(或惰性创建)指定模型的信号量."""
42
- sem = self._semaphores.get(model)
43
- if sem is None:
44
- limit = self._config.get_limit(model)
45
- sem = asyncio.Semaphore(limit)
46
- self._semaphores[model] = sem
47
- logger.debug(
48
- "ModelConcurrencyLimiter: created semaphore model=%s limit=%d",
49
- model,
50
- limit,
51
- )
52
- return sem
53
-
54
- async def acquire(self, model: str) -> asyncio.Semaphore:
55
- """获取指定模型的并发槽位,必要时阻塞排队.
56
-
57
- 返回已获取的 Semaphore 实例,调用方负责在请求完成后调用 ``release()``。
58
- """
59
- sem = self._get_semaphore(model)
60
- await sem.acquire()
61
- return sem
62
-
63
- def get_diagnostics(self) -> dict[str, dict[str, int]]:
64
- """返回每个模型的并发状态快照(用于可观测性)."""
65
- snapshot: dict[str, dict[str, int]] = {}
66
- for model, sem in self._semaphores.items():
67
- limit = self._config.get_limit(model)
68
- # asyncio.Semaphore 内部 _value 表示剩余可用槽位
69
- available = sem._value # noqa: SLF001 — 公开 API 未暴露
70
- in_use = max(limit - available, 0)
71
- # _waiters 为正在排队等待的协程集合,无等待者时为 None
72
- waiters = getattr(sem, "_waiters", None) # noqa: SLF001
73
- pending = len(waiters) if waiters else 0
74
- snapshot[model] = {
75
- "limit": limit,
76
- "in_use": in_use,
77
- "available": max(available, 0),
78
- "pending": pending,
79
- }
80
- return snapshot
81
-
82
-
83
- __all__ = ["ModelConcurrencyLimiter"]
File without changes
File without changes
File without changes
File without changes