devcopilot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. api/__init__.py +17 -0
  2. api/admin_config.py +1303 -0
  3. api/admin_routes.py +287 -0
  4. api/admin_static/admin.css +459 -0
  5. api/admin_static/admin.js +497 -0
  6. api/admin_static/index.html +77 -0
  7. api/admin_urls.py +34 -0
  8. api/app.py +194 -0
  9. api/command_utils.py +164 -0
  10. api/dependencies.py +144 -0
  11. api/detection.py +152 -0
  12. api/gateway_model_ids.py +54 -0
  13. api/model_catalog.py +133 -0
  14. api/model_router.py +125 -0
  15. api/models/__init__.py +45 -0
  16. api/models/anthropic.py +234 -0
  17. api/models/openai_responses.py +28 -0
  18. api/models/responses.py +60 -0
  19. api/optimization_handlers.py +154 -0
  20. api/request_pipeline.py +424 -0
  21. api/routes.py +156 -0
  22. api/runtime.py +334 -0
  23. api/validation_log.py +48 -0
  24. api/web_server_tools.py +22 -0
  25. api/web_tools/__init__.py +17 -0
  26. api/web_tools/constants.py +15 -0
  27. api/web_tools/egress.py +99 -0
  28. api/web_tools/outbound.py +278 -0
  29. api/web_tools/parsers.py +104 -0
  30. api/web_tools/request.py +87 -0
  31. api/web_tools/streaming.py +206 -0
  32. cli/__init__.py +5 -0
  33. cli/claude_env.py +12 -0
  34. cli/entrypoints.py +166 -0
  35. cli/env.example +209 -0
  36. cli/launchers/__init__.py +1 -0
  37. cli/launchers/claude.py +84 -0
  38. cli/launchers/codex.py +204 -0
  39. cli/launchers/codex_model_catalog.py +186 -0
  40. cli/launchers/common.py +93 -0
  41. cli/managed/__init__.py +6 -0
  42. cli/managed/claude.py +215 -0
  43. cli/managed/manager.py +157 -0
  44. cli/managed/session.py +260 -0
  45. cli/process_registry.py +78 -0
  46. config/__init__.py +5 -0
  47. config/constants.py +13 -0
  48. config/logging_config.py +159 -0
  49. config/nim.py +118 -0
  50. config/paths.py +91 -0
  51. config/provider_catalog.py +259 -0
  52. config/provider_ids.py +7 -0
  53. config/settings.py +538 -0
  54. core/__init__.py +1 -0
  55. core/anthropic/__init__.py +46 -0
  56. core/anthropic/content.py +31 -0
  57. core/anthropic/conversion.py +587 -0
  58. core/anthropic/emitted_sse_tracker.py +346 -0
  59. core/anthropic/errors.py +70 -0
  60. core/anthropic/native_messages_request.py +280 -0
  61. core/anthropic/native_sse_block_policy.py +313 -0
  62. core/anthropic/provider_stream_error.py +34 -0
  63. core/anthropic/server_tool_sse.py +14 -0
  64. core/anthropic/sse.py +440 -0
  65. core/anthropic/stream_contracts.py +205 -0
  66. core/anthropic/stream_recovery.py +346 -0
  67. core/anthropic/stream_recovery_session.py +133 -0
  68. core/anthropic/thinking.py +140 -0
  69. core/anthropic/tokens.py +117 -0
  70. core/anthropic/tools.py +212 -0
  71. core/anthropic/utils.py +9 -0
  72. core/openai_responses/__init__.py +5 -0
  73. core/openai_responses/adapter.py +31 -0
  74. core/openai_responses/anthropic_sse.py +59 -0
  75. core/openai_responses/errors.py +22 -0
  76. core/openai_responses/events.py +19 -0
  77. core/openai_responses/ids.py +21 -0
  78. core/openai_responses/input.py +258 -0
  79. core/openai_responses/items.py +37 -0
  80. core/openai_responses/reasoning.py +52 -0
  81. core/openai_responses/stream.py +25 -0
  82. core/openai_responses/stream_state.py +654 -0
  83. core/openai_responses/tools.py +374 -0
  84. core/openai_responses/usage.py +37 -0
  85. core/rate_limit.py +60 -0
  86. core/trace.py +216 -0
  87. devcopilot-0.2.0.dist-info/METADATA +687 -0
  88. devcopilot-0.2.0.dist-info/RECORD +189 -0
  89. devcopilot-0.2.0.dist-info/WHEEL +4 -0
  90. devcopilot-0.2.0.dist-info/entry_points.txt +6 -0
  91. devcopilot-0.2.0.dist-info/licenses/LICENSE +21 -0
  92. messaging/__init__.py +26 -0
  93. messaging/cli_event_constants.py +67 -0
  94. messaging/command_context.py +66 -0
  95. messaging/command_dispatcher.py +37 -0
  96. messaging/commands.py +275 -0
  97. messaging/event_parser.py +181 -0
  98. messaging/limiter.py +300 -0
  99. messaging/models.py +36 -0
  100. messaging/node_event_pipeline.py +127 -0
  101. messaging/node_runner.py +342 -0
  102. messaging/platforms/__init__.py +15 -0
  103. messaging/platforms/base.py +228 -0
  104. messaging/platforms/discord.py +567 -0
  105. messaging/platforms/factory.py +103 -0
  106. messaging/platforms/outbox.py +144 -0
  107. messaging/platforms/telegram.py +688 -0
  108. messaging/platforms/voice_flow.py +295 -0
  109. messaging/rendering/__init__.py +3 -0
  110. messaging/rendering/discord_markdown.py +318 -0
  111. messaging/rendering/markdown_tables.py +49 -0
  112. messaging/rendering/profiles.py +55 -0
  113. messaging/rendering/telegram_markdown.py +327 -0
  114. messaging/safe_diagnostics.py +17 -0
  115. messaging/session.py +334 -0
  116. messaging/transcript.py +581 -0
  117. messaging/transcription.py +164 -0
  118. messaging/trees/__init__.py +15 -0
  119. messaging/trees/data.py +482 -0
  120. messaging/trees/manager.py +433 -0
  121. messaging/trees/processor.py +179 -0
  122. messaging/trees/repository.py +177 -0
  123. messaging/turn_intake.py +235 -0
  124. messaging/ui_updates.py +101 -0
  125. messaging/voice.py +76 -0
  126. messaging/workflow.py +200 -0
  127. providers/__init__.py +31 -0
  128. providers/base.py +152 -0
  129. providers/cerebras/__init__.py +7 -0
  130. providers/cerebras/client.py +31 -0
  131. providers/cerebras/request.py +55 -0
  132. providers/codestral/__init__.py +7 -0
  133. providers/codestral/client.py +34 -0
  134. providers/deepseek/__init__.py +11 -0
  135. providers/deepseek/client.py +51 -0
  136. providers/deepseek/request.py +475 -0
  137. providers/defaults.py +41 -0
  138. providers/error_mapping.py +309 -0
  139. providers/exceptions.py +113 -0
  140. providers/fireworks/__init__.py +5 -0
  141. providers/fireworks/client.py +45 -0
  142. providers/fireworks/request.py +48 -0
  143. providers/gemini/__init__.py +7 -0
  144. providers/gemini/client.py +49 -0
  145. providers/gemini/request.py +199 -0
  146. providers/groq/__init__.py +7 -0
  147. providers/groq/client.py +31 -0
  148. providers/groq/request.py +83 -0
  149. providers/kimi/__init__.py +10 -0
  150. providers/kimi/client.py +53 -0
  151. providers/kimi/request.py +42 -0
  152. providers/llamacpp/__init__.py +3 -0
  153. providers/llamacpp/client.py +16 -0
  154. providers/lmstudio/__init__.py +5 -0
  155. providers/lmstudio/client.py +16 -0
  156. providers/mistral/__init__.py +7 -0
  157. providers/mistral/client.py +31 -0
  158. providers/mistral/request.py +37 -0
  159. providers/model_listing.py +133 -0
  160. providers/nvidia_nim/__init__.py +7 -0
  161. providers/nvidia_nim/client.py +91 -0
  162. providers/nvidia_nim/request.py +430 -0
  163. providers/nvidia_nim/voice.py +95 -0
  164. providers/ollama/__init__.py +7 -0
  165. providers/ollama/client.py +39 -0
  166. providers/open_router/__init__.py +7 -0
  167. providers/open_router/client.py +124 -0
  168. providers/open_router/request.py +42 -0
  169. providers/opencode/__init__.py +11 -0
  170. providers/opencode/client.py +31 -0
  171. providers/opencode/request.py +35 -0
  172. providers/rate_limit.py +300 -0
  173. providers/registry.py +527 -0
  174. providers/transports/__init__.py +1 -0
  175. providers/transports/anthropic_messages/__init__.py +5 -0
  176. providers/transports/anthropic_messages/http.py +118 -0
  177. providers/transports/anthropic_messages/recovery.py +206 -0
  178. providers/transports/anthropic_messages/stream.py +295 -0
  179. providers/transports/anthropic_messages/transport.py +236 -0
  180. providers/transports/openai_chat/__init__.py +5 -0
  181. providers/transports/openai_chat/recovery.py +217 -0
  182. providers/transports/openai_chat/stream.py +384 -0
  183. providers/transports/openai_chat/tool_calls.py +293 -0
  184. providers/transports/openai_chat/transport.py +156 -0
  185. providers/wafer/__init__.py +10 -0
  186. providers/wafer/client.py +50 -0
  187. providers/zai/__init__.py +10 -0
  188. providers/zai/client.py +46 -0
  189. providers/zai/request.py +42 -0
@@ -0,0 +1,124 @@
1
+ """OpenRouter provider implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterator
6
+ from typing import Any
7
+
8
+ from core.anthropic import iter_provider_stream_error_sse_events
9
+ from core.anthropic.native_sse_block_policy import (
10
+ NativeSseBlockPolicyState,
11
+ is_terminal_openrouter_done_event,
12
+ parse_native_sse_event,
13
+ transform_native_sse_block_event,
14
+ )
15
+ from providers.base import ProviderConfig
16
+ from providers.defaults import OPENROUTER_DEFAULT_BASE
17
+ from providers.model_listing import (
18
+ ProviderModelInfo,
19
+ extract_openrouter_tool_model_ids,
20
+ extract_openrouter_tool_model_infos,
21
+ )
22
+ from providers.transports.anthropic_messages import (
23
+ AnthropicMessagesTransport,
24
+ StreamChunkMode,
25
+ )
26
+
27
+ from .request import build_request_body
28
+
29
+ _ANTHROPIC_VERSION = "2023-06-01"
30
+
31
+
32
+ class OpenRouterProvider(AnthropicMessagesTransport):
33
+ """OpenRouter provider using the native Anthropic-compatible messages API."""
34
+
35
+ stream_chunk_mode: StreamChunkMode = "event"
36
+
37
+ def __init__(self, config: ProviderConfig):
38
+ super().__init__(
39
+ config,
40
+ provider_name="OPENROUTER",
41
+ default_base_url=OPENROUTER_DEFAULT_BASE,
42
+ )
43
+
44
+ def _build_request_body(
45
+ self, request: Any, thinking_enabled: bool | None = None
46
+ ) -> dict:
47
+ """Internal helper for tests and direct request dispatch."""
48
+ return build_request_body(
49
+ request,
50
+ thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
51
+ )
52
+
53
+ def _request_headers(self) -> dict[str, str]:
54
+ """Return OpenRouter's Anthropic-compatible messages headers."""
55
+ return {
56
+ "Accept": "text/event-stream",
57
+ "Authorization": f"Bearer {self._api_key}",
58
+ "Content-Type": "application/json",
59
+ "anthropic-version": _ANTHROPIC_VERSION,
60
+ }
61
+
62
+ def _model_list_headers(self) -> dict[str, str]:
63
+ """Return OpenRouter's OpenAI-compatible model-list headers."""
64
+ return {"Authorization": f"Bearer {self._api_key}"}
65
+
66
+ def _extract_model_ids_from_model_list_payload(
67
+ self, payload: Any
68
+ ) -> frozenset[str]:
69
+ """Only advertise OpenRouter models that can run Claude Code tools."""
70
+ return extract_openrouter_tool_model_ids(
71
+ payload, provider_name=self._provider_name
72
+ )
73
+
74
+ def _extract_model_infos_from_model_list_payload(
75
+ self, payload: Any
76
+ ) -> frozenset[ProviderModelInfo]:
77
+ """Advertise OpenRouter tool models with reasoning capability metadata."""
78
+ return extract_openrouter_tool_model_infos(
79
+ payload, provider_name=self._provider_name
80
+ )
81
+
82
+ def _new_stream_state(self, request: Any, *, thinking_enabled: bool) -> Any:
83
+ """Create per-stream state for thinking block filtering."""
84
+ return NativeSseBlockPolicyState()
85
+
86
+ def _transform_stream_event(
87
+ self,
88
+ event: str,
89
+ state: Any,
90
+ *,
91
+ thinking_enabled: bool,
92
+ ) -> str | None:
93
+ """Drop provider-specific terminal noise and hidden thinking events."""
94
+ if isinstance(state, NativeSseBlockPolicyState):
95
+ event_name, data_text = parse_native_sse_event(event)
96
+ if state.message_stopped or is_terminal_openrouter_done_event(
97
+ event_name, data_text
98
+ ):
99
+ return None
100
+ if event_name == "message_stop":
101
+ state.message_stopped = True
102
+
103
+ if isinstance(state, NativeSseBlockPolicyState):
104
+ return transform_native_sse_block_event(
105
+ event, state, thinking_enabled=thinking_enabled
106
+ )
107
+ return event
108
+
109
+ def _emit_error_events(
110
+ self,
111
+ *,
112
+ request: Any,
113
+ input_tokens: int,
114
+ error_message: str,
115
+ sent_any_event: bool,
116
+ ) -> Iterator[str]:
117
+ """Emit the Anthropic SSE error shape expected by Claude clients."""
118
+ yield from iter_provider_stream_error_sse_events(
119
+ request=request,
120
+ input_tokens=input_tokens,
121
+ error_message=error_message,
122
+ sent_any_event=sent_any_event,
123
+ log_raw_sse_events=self._config.log_raw_sse_events,
124
+ )
@@ -0,0 +1,42 @@
1
+ """Native Anthropic Messages request builder for OpenRouter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from loguru import logger
8
+
9
+ from config.constants import (
10
+ ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS as OPENROUTER_DEFAULT_MAX_TOKENS,
11
+ )
12
+ from core.anthropic.native_messages_request import (
13
+ OpenRouterExtraBodyError,
14
+ build_openrouter_native_request_body,
15
+ )
16
+ from providers.exceptions import InvalidRequestError
17
+
18
+
19
+ def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
20
+ """Build an Anthropic-format request body for OpenRouter's messages API."""
21
+ logger.debug(
22
+ "OPENROUTER_REQUEST: conversion start model={} msgs={}",
23
+ getattr(request_data, "model", "?"),
24
+ len(getattr(request_data, "messages", [])),
25
+ )
26
+
27
+ try:
28
+ body = build_openrouter_native_request_body(
29
+ request_data,
30
+ thinking_enabled=thinking_enabled,
31
+ default_max_tokens=OPENROUTER_DEFAULT_MAX_TOKENS,
32
+ )
33
+ except OpenRouterExtraBodyError as exc:
34
+ raise InvalidRequestError(str(exc)) from exc
35
+
36
+ logger.debug(
37
+ "OPENROUTER_REQUEST: conversion done model={} msgs={} tools={}",
38
+ body.get("model"),
39
+ len(body.get("messages", [])),
40
+ len(body.get("tools", [])),
41
+ )
42
+ return body
@@ -0,0 +1,11 @@
1
+ """OpenCode Zen provider exports."""
2
+
3
+ from providers.defaults import OPENCODE_DEFAULT_BASE, OPENCODE_GO_DEFAULT_BASE
4
+
5
+ from .client import OpenCodeProvider
6
+
7
+ __all__ = [
8
+ "OPENCODE_DEFAULT_BASE",
9
+ "OPENCODE_GO_DEFAULT_BASE",
10
+ "OpenCodeProvider",
11
+ ]
@@ -0,0 +1,31 @@
1
+ """OpenCode Zen provider implementation (OpenAI-compatible Chat Completions)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from providers.base import ProviderConfig
8
+ from providers.defaults import OPENCODE_DEFAULT_BASE
9
+ from providers.transports.openai_chat import OpenAIChatTransport
10
+
11
+ from .request import build_request_body
12
+
13
+
14
+ class OpenCodeProvider(OpenAIChatTransport):
15
+ """OpenCode Zen provider using ``https://opencode.ai/zen/v1/chat/completions``."""
16
+
17
+ def __init__(self, config: ProviderConfig, provider_name: str = "OPENCODE"):
18
+ super().__init__(
19
+ config,
20
+ provider_name=provider_name,
21
+ base_url=config.base_url or OPENCODE_DEFAULT_BASE,
22
+ api_key=config.api_key,
23
+ )
24
+
25
+ def _build_request_body(
26
+ self, request: Any, thinking_enabled: bool | None = None
27
+ ) -> dict:
28
+ return build_request_body(
29
+ request,
30
+ thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
31
+ )
@@ -0,0 +1,35 @@
1
+ """Request builder for OpenCode Zen provider."""
2
+
3
+ from typing import Any
4
+
5
+ from loguru import logger
6
+
7
+ from core.anthropic import ReasoningReplayMode, build_base_request_body
8
+ from core.anthropic.conversion import OpenAIConversionError
9
+ from providers.exceptions import InvalidRequestError
10
+
11
+
12
+ def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
13
+ """Build OpenAI-format request body from Anthropic request for OpenCode Zen."""
14
+ logger.debug(
15
+ "OPENCODE_REQUEST: conversion start model={} msgs={}",
16
+ getattr(request_data, "model", "?"),
17
+ len(getattr(request_data, "messages", [])),
18
+ )
19
+ try:
20
+ body = build_base_request_body(
21
+ request_data,
22
+ reasoning_replay=ReasoningReplayMode.REASONING_CONTENT
23
+ if thinking_enabled
24
+ else ReasoningReplayMode.DISABLED,
25
+ )
26
+ except OpenAIConversionError as exc:
27
+ raise InvalidRequestError(str(exc)) from exc
28
+
29
+ logger.debug(
30
+ "OPENCODE_REQUEST: conversion done model={} msgs={} tools={}",
31
+ body.get("model"),
32
+ len(body.get("messages", [])),
33
+ len(body.get("tools", [])),
34
+ )
35
+ return body
@@ -0,0 +1,300 @@
1
+ """Global rate limiter for API requests."""
2
+
3
+ import asyncio
4
+ import random
5
+ import time
6
+ from collections.abc import AsyncIterator, Callable
7
+ from contextlib import asynccontextmanager
8
+ from typing import Any, ClassVar, TypeVar
9
+
10
+ import httpx
11
+ import openai
12
+ from loguru import logger
13
+
14
+ from core.rate_limit import StrictSlidingWindowLimiter
15
+ from core.trace import trace_event
16
+
17
+ T = TypeVar("T")
18
+
19
+ UPSTREAM_TRANSIENT_TOTAL_ATTEMPTS = 5
20
+ DEFAULT_UPSTREAM_MAX_RETRIES = UPSTREAM_TRANSIENT_TOTAL_ATTEMPTS - 1
21
+
22
+
23
+ def _upstream_http_retryable(code: int) -> bool:
24
+ """True for rate limit / upstream server failures that should backoff-retry."""
25
+ return 500 <= code <= 599
26
+
27
+
28
+ def retryable_upstream_status(exc: BaseException) -> int | None:
29
+ """Return HTTP-like status codes that qualify for reactive backoff retries.
30
+
31
+ Only upstream ``5xx`` use the same exponential backoff and scoped limiter
32
+ blocking semantics as today's transient failure path.
33
+ """
34
+ if isinstance(exc, httpx.HTTPStatusError):
35
+ status = exc.response.status_code
36
+ if _upstream_http_retryable(status):
37
+ return status
38
+ return None
39
+ if isinstance(exc, openai.APIError):
40
+ status = getattr(exc, "status_code", None)
41
+ if isinstance(status, int) and 500 <= status <= 599:
42
+ return status
43
+ return None
44
+ return None
45
+
46
+
47
+ class GlobalRateLimiter:
48
+ """
49
+ Global singleton rate limiter that blocks all requests
50
+ when a rate limit error is encountered (reactive) and
51
+ throttles requests (proactive) using a strict rolling window.
52
+
53
+ Optionally enforces a max_concurrency cap: at most N provider streams
54
+ may be open simultaneously, independent of the sliding window.
55
+
56
+ Proactive limits - throttles requests to stay within API limits.
57
+ Reactive limits - pauses all requests when a 5xx retry backoff is active.
58
+ Concurrency limit - caps simultaneously open streams.
59
+ """
60
+
61
+ _instance: ClassVar[GlobalRateLimiter | None] = None
62
+ _scoped_instances: ClassVar[dict[str, GlobalRateLimiter]] = {}
63
+
64
+ def __init__(
65
+ self,
66
+ rate_limit: int = 40,
67
+ rate_window: float = 60.0,
68
+ max_concurrency: int = 5,
69
+ ):
70
+ # Prevent re-initialization on singleton reuse
71
+ if hasattr(self, "_initialized"):
72
+ return
73
+
74
+ if rate_limit <= 0:
75
+ raise ValueError("rate_limit must be > 0")
76
+ if rate_window <= 0:
77
+ raise ValueError("rate_window must be > 0")
78
+ if max_concurrency <= 0:
79
+ raise ValueError("max_concurrency must be > 0")
80
+
81
+ self._rate_limit = rate_limit
82
+ self._rate_window = float(rate_window)
83
+ self._max_concurrency = max_concurrency
84
+ self._proactive_limiter = StrictSlidingWindowLimiter(
85
+ self._rate_limit, self._rate_window
86
+ )
87
+ self._blocked_until: float = 0
88
+ self._concurrency_sem = asyncio.Semaphore(max_concurrency)
89
+ self._initialized = True
90
+
91
+ logger.info(
92
+ f"GlobalRateLimiter (Provider) initialized ({rate_limit} req / {rate_window}s, max_concurrency={max_concurrency})"
93
+ )
94
+
95
+ @classmethod
96
+ def get_instance(
97
+ cls,
98
+ rate_limit: int | None = None,
99
+ rate_window: float | None = None,
100
+ max_concurrency: int = 5,
101
+ ) -> GlobalRateLimiter:
102
+ """Get or create the singleton instance.
103
+
104
+ Args:
105
+ rate_limit: Requests per window (only used on first creation)
106
+ rate_window: Window in seconds (only used on first creation)
107
+ max_concurrency: Max simultaneous open streams (only used on first creation)
108
+ """
109
+ if cls._instance is None:
110
+ cls._instance = cls(
111
+ rate_limit=rate_limit or 40,
112
+ rate_window=rate_window or 60.0,
113
+ max_concurrency=max_concurrency,
114
+ )
115
+ return cls._instance
116
+
117
+ @classmethod
118
+ def get_scoped_instance(
119
+ cls,
120
+ scope: str,
121
+ *,
122
+ rate_limit: int | None = None,
123
+ rate_window: float | None = None,
124
+ max_concurrency: int = 5,
125
+ ) -> GlobalRateLimiter:
126
+ """Get or create a provider-scoped limiter instance."""
127
+ if not scope:
128
+ raise ValueError("scope must be non-empty")
129
+ desired_rate_limit = rate_limit or 40
130
+ desired_rate_window = float(rate_window or 60.0)
131
+ existing = cls._scoped_instances.get(scope)
132
+ if existing and existing.matches_config(
133
+ desired_rate_limit, desired_rate_window, max_concurrency
134
+ ):
135
+ return existing
136
+ if existing:
137
+ logger.info(
138
+ "Rebuilding provider rate limiter for updated scope '{}'", scope
139
+ )
140
+ cls._scoped_instances[scope] = cls(
141
+ rate_limit=desired_rate_limit,
142
+ rate_window=desired_rate_window,
143
+ max_concurrency=max_concurrency,
144
+ )
145
+ return cls._scoped_instances[scope]
146
+
147
+ @classmethod
148
+ def reset_instance(cls) -> None:
149
+ """Reset singleton (for testing)."""
150
+ cls._instance = None
151
+ cls._scoped_instances = {}
152
+
153
+ async def wait_if_blocked(self) -> bool:
154
+ """
155
+ Wait if currently rate limited or throttle to meet quota.
156
+
157
+ Returns:
158
+ True if was reactively blocked and waited, False otherwise.
159
+ """
160
+ # 1. Reactive check: Wait if someone hit a reactive backoff (429/5xx retries)
161
+ waited_reactively = False
162
+ now = time.monotonic()
163
+ if now < self._blocked_until:
164
+ wait_time = self._blocked_until - now
165
+ logger.warning(
166
+ f"Global provider rate limit active (reactive), waiting {wait_time:.1f}s..."
167
+ )
168
+ await asyncio.sleep(wait_time)
169
+ waited_reactively = True
170
+
171
+ # 2. Proactive check: strict rolling window (no bursts beyond N in last W seconds)
172
+ await self._acquire_proactive_slot()
173
+ return waited_reactively
174
+
175
+ async def _acquire_proactive_slot(self) -> None:
176
+ """
177
+ Acquire a proactive slot enforcing a strict rolling window.
178
+
179
+ Guarantees: at most `self._rate_limit` acquisitions in any interval of length
180
+ `self._rate_window` (seconds).
181
+ """
182
+ await self._proactive_limiter.acquire()
183
+
184
+ def set_blocked(self, seconds: float = 60) -> None:
185
+ """
186
+ Set global block for specified seconds (reactive).
187
+
188
+ Args:
189
+ seconds: How long to block (default 60s)
190
+ """
191
+ self._blocked_until = time.monotonic() + seconds
192
+ logger.warning(f"Global provider rate limit set for {seconds:.1f}s (reactive)")
193
+
194
+ def is_blocked(self) -> bool:
195
+ """Check if currently reactively blocked."""
196
+ return time.monotonic() < self._blocked_until
197
+
198
+ def matches_config(
199
+ self, rate_limit: int, rate_window: float, max_concurrency: int
200
+ ) -> bool:
201
+ """Return whether this limiter matches the requested runtime config."""
202
+ return (
203
+ self._rate_limit == rate_limit
204
+ and self._rate_window == float(rate_window)
205
+ and self._max_concurrency == max_concurrency
206
+ )
207
+
208
+ def remaining_wait(self) -> float:
209
+ """Get remaining reactive wait time in seconds."""
210
+ return max(0.0, self._blocked_until - time.monotonic())
211
+
212
+ @asynccontextmanager
213
+ async def concurrency_slot(self) -> AsyncIterator[None]:
214
+ """Async context manager that holds one concurrency slot for a stream.
215
+
216
+ Blocks until a slot is available (controlled by max_concurrency).
217
+ """
218
+ await self._concurrency_sem.acquire()
219
+ try:
220
+ yield
221
+ finally:
222
+ self._concurrency_sem.release()
223
+
224
+ async def execute_with_retry(
225
+ self,
226
+ fn: Callable[..., Any],
227
+ *args: Any,
228
+ max_retries: int = DEFAULT_UPSTREAM_MAX_RETRIES,
229
+ base_delay: float = 2.0,
230
+ max_delay: float = 60.0,
231
+ jitter: float = 1.0,
232
+ **kwargs: Any,
233
+ ) -> Any:
234
+ """Execute an async callable with rate limiting and retry on transient limits.
235
+
236
+ Waits for the proactive limiter before each attempt. On upstream ``5xx``
237
+ server errors, applies exponential backoff with jitter and sets the
238
+ reactive block before retrying.
239
+
240
+ Args:
241
+ fn: Async callable to execute.
242
+ max_retries: Maximum number of retry attempts after the first failure.
243
+ base_delay: Base delay in seconds for exponential backoff.
244
+ max_delay: Maximum delay cap in seconds.
245
+ jitter: Maximum random jitter in seconds added to each delay.
246
+
247
+ Returns:
248
+ The result of the callable.
249
+
250
+ Raises:
251
+ The last exception if all retries are exhausted.
252
+ """
253
+ last_exc: Exception | None = None
254
+ total_attempts = 1 + max_retries
255
+
256
+ for attempt in range(total_attempts):
257
+ await self.wait_if_blocked()
258
+
259
+ try:
260
+ return await fn(*args, **kwargs)
261
+ except Exception as e:
262
+ status = retryable_upstream_status(e)
263
+ if status is None:
264
+ raise
265
+
266
+ label = f"Upstream server error ({status})"
267
+ last_exc = e
268
+ if attempt >= max_retries:
269
+ logger.warning(
270
+ "{} retry exhausted after {} retries (attempts={})",
271
+ label,
272
+ max_retries,
273
+ total_attempts,
274
+ )
275
+ break
276
+
277
+ delay = min(base_delay * (2**attempt), max_delay)
278
+ delay += random.uniform(0, jitter)
279
+ attempt_no = attempt + 1
280
+ logger.warning(
281
+ "{}, attempt {}/{}. Retrying in {:.1f}s...",
282
+ label,
283
+ attempt_no,
284
+ total_attempts,
285
+ delay,
286
+ )
287
+ trace_event(
288
+ stage="provider",
289
+ event="provider.retry.scheduled",
290
+ source="provider",
291
+ status_code=status,
292
+ attempt=attempt_no,
293
+ max_attempts=total_attempts,
294
+ delay_s=round(delay, 3),
295
+ )
296
+ self.set_blocked(delay)
297
+ await asyncio.sleep(delay)
298
+
299
+ assert last_exc is not None
300
+ raise last_exc