PyPI - devcopilot - Versions diffs - 0.2.0__py3-none-any.whl - Mend

devcopilot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

api/__init__.py +17 -0
api/admin_config.py +1303 -0
api/admin_routes.py +287 -0
api/admin_static/admin.css +459 -0
api/admin_static/admin.js +497 -0
api/admin_static/index.html +77 -0
api/admin_urls.py +34 -0
api/app.py +194 -0
api/command_utils.py +164 -0
api/dependencies.py +144 -0
api/detection.py +152 -0
api/gateway_model_ids.py +54 -0
api/model_catalog.py +133 -0
api/model_router.py +125 -0
api/models/__init__.py +45 -0
api/models/anthropic.py +234 -0
api/models/openai_responses.py +28 -0
api/models/responses.py +60 -0
api/optimization_handlers.py +154 -0
api/request_pipeline.py +424 -0
api/routes.py +156 -0
api/runtime.py +334 -0
api/validation_log.py +48 -0
api/web_server_tools.py +22 -0
api/web_tools/__init__.py +17 -0
api/web_tools/constants.py +15 -0
api/web_tools/egress.py +99 -0
api/web_tools/outbound.py +278 -0
api/web_tools/parsers.py +104 -0
api/web_tools/request.py +87 -0
api/web_tools/streaming.py +206 -0
cli/__init__.py +5 -0
cli/claude_env.py +12 -0
cli/entrypoints.py +166 -0
cli/env.example +209 -0
cli/launchers/__init__.py +1 -0
cli/launchers/claude.py +84 -0
cli/launchers/codex.py +204 -0
cli/launchers/codex_model_catalog.py +186 -0
cli/launchers/common.py +93 -0
cli/managed/__init__.py +6 -0
cli/managed/claude.py +215 -0
cli/managed/manager.py +157 -0
cli/managed/session.py +260 -0
cli/process_registry.py +78 -0
config/__init__.py +5 -0
config/constants.py +13 -0
config/logging_config.py +159 -0
config/nim.py +118 -0
config/paths.py +91 -0
config/provider_catalog.py +259 -0
config/provider_ids.py +7 -0
config/settings.py +538 -0
core/__init__.py +1 -0
core/anthropic/__init__.py +46 -0
core/anthropic/content.py +31 -0
core/anthropic/conversion.py +587 -0
core/anthropic/emitted_sse_tracker.py +346 -0
core/anthropic/errors.py +70 -0
core/anthropic/native_messages_request.py +280 -0
core/anthropic/native_sse_block_policy.py +313 -0
core/anthropic/provider_stream_error.py +34 -0
core/anthropic/server_tool_sse.py +14 -0
core/anthropic/sse.py +440 -0
core/anthropic/stream_contracts.py +205 -0
core/anthropic/stream_recovery.py +346 -0
core/anthropic/stream_recovery_session.py +133 -0
core/anthropic/thinking.py +140 -0
core/anthropic/tokens.py +117 -0
core/anthropic/tools.py +212 -0
core/anthropic/utils.py +9 -0
core/openai_responses/__init__.py +5 -0
core/openai_responses/adapter.py +31 -0
core/openai_responses/anthropic_sse.py +59 -0
core/openai_responses/errors.py +22 -0
core/openai_responses/events.py +19 -0
core/openai_responses/ids.py +21 -0
core/openai_responses/input.py +258 -0
core/openai_responses/items.py +37 -0
core/openai_responses/reasoning.py +52 -0
core/openai_responses/stream.py +25 -0
core/openai_responses/stream_state.py +654 -0
core/openai_responses/tools.py +374 -0
core/openai_responses/usage.py +37 -0
core/rate_limit.py +60 -0
core/trace.py +216 -0
devcopilot-0.2.0.dist-info/METADATA +687 -0
devcopilot-0.2.0.dist-info/RECORD +189 -0
devcopilot-0.2.0.dist-info/WHEEL +4 -0
devcopilot-0.2.0.dist-info/entry_points.txt +6 -0
devcopilot-0.2.0.dist-info/licenses/LICENSE +21 -0
messaging/__init__.py +26 -0
messaging/cli_event_constants.py +67 -0
messaging/command_context.py +66 -0
messaging/command_dispatcher.py +37 -0
messaging/commands.py +275 -0
messaging/event_parser.py +181 -0
messaging/limiter.py +300 -0
messaging/models.py +36 -0
messaging/node_event_pipeline.py +127 -0
messaging/node_runner.py +342 -0
messaging/platforms/__init__.py +15 -0
messaging/platforms/base.py +228 -0
messaging/platforms/discord.py +567 -0
messaging/platforms/factory.py +103 -0
messaging/platforms/outbox.py +144 -0
messaging/platforms/telegram.py +688 -0
messaging/platforms/voice_flow.py +295 -0
messaging/rendering/__init__.py +3 -0
messaging/rendering/discord_markdown.py +318 -0
messaging/rendering/markdown_tables.py +49 -0
messaging/rendering/profiles.py +55 -0
messaging/rendering/telegram_markdown.py +327 -0
messaging/safe_diagnostics.py +17 -0
messaging/session.py +334 -0
messaging/transcript.py +581 -0
messaging/transcription.py +164 -0
messaging/trees/__init__.py +15 -0
messaging/trees/data.py +482 -0
messaging/trees/manager.py +433 -0
messaging/trees/processor.py +179 -0
messaging/trees/repository.py +177 -0
messaging/turn_intake.py +235 -0
messaging/ui_updates.py +101 -0
messaging/voice.py +76 -0
messaging/workflow.py +200 -0
providers/__init__.py +31 -0
providers/base.py +152 -0
providers/cerebras/__init__.py +7 -0
providers/cerebras/client.py +31 -0
providers/cerebras/request.py +55 -0
providers/codestral/__init__.py +7 -0
providers/codestral/client.py +34 -0
providers/deepseek/__init__.py +11 -0
providers/deepseek/client.py +51 -0
providers/deepseek/request.py +475 -0
providers/defaults.py +41 -0
providers/error_mapping.py +309 -0
providers/exceptions.py +113 -0
providers/fireworks/__init__.py +5 -0
providers/fireworks/client.py +45 -0
providers/fireworks/request.py +48 -0
providers/gemini/__init__.py +7 -0
providers/gemini/client.py +49 -0
providers/gemini/request.py +199 -0
providers/groq/__init__.py +7 -0
providers/groq/client.py +31 -0
providers/groq/request.py +83 -0
providers/kimi/__init__.py +10 -0
providers/kimi/client.py +53 -0
providers/kimi/request.py +42 -0
providers/llamacpp/__init__.py +3 -0
providers/llamacpp/client.py +16 -0
providers/lmstudio/__init__.py +5 -0
providers/lmstudio/client.py +16 -0
providers/mistral/__init__.py +7 -0
providers/mistral/client.py +31 -0
providers/mistral/request.py +37 -0
providers/model_listing.py +133 -0
providers/nvidia_nim/__init__.py +7 -0
providers/nvidia_nim/client.py +91 -0
providers/nvidia_nim/request.py +430 -0
providers/nvidia_nim/voice.py +95 -0
providers/ollama/__init__.py +7 -0
providers/ollama/client.py +39 -0
providers/open_router/__init__.py +7 -0
providers/open_router/client.py +124 -0
providers/open_router/request.py +42 -0
providers/opencode/__init__.py +11 -0
providers/opencode/client.py +31 -0
providers/opencode/request.py +35 -0
providers/rate_limit.py +300 -0
providers/registry.py +527 -0
providers/transports/__init__.py +1 -0
providers/transports/anthropic_messages/__init__.py +5 -0
providers/transports/anthropic_messages/http.py +118 -0
providers/transports/anthropic_messages/recovery.py +206 -0
providers/transports/anthropic_messages/stream.py +295 -0
providers/transports/anthropic_messages/transport.py +236 -0
providers/transports/openai_chat/__init__.py +5 -0
providers/transports/openai_chat/recovery.py +217 -0
providers/transports/openai_chat/stream.py +384 -0
providers/transports/openai_chat/tool_calls.py +293 -0
providers/transports/openai_chat/transport.py +156 -0
providers/wafer/__init__.py +10 -0
providers/wafer/client.py +50 -0
providers/zai/__init__.py +10 -0
providers/zai/client.py +46 -0
providers/zai/request.py +42 -0

providers/open_router/client.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""OpenRouter provider implementation."""
+from __future__ import annotations
+from collections.abc import Iterator
+from typing import Any
+from core.anthropic import iter_provider_stream_error_sse_events
+from core.anthropic.native_sse_block_policy import (
+    NativeSseBlockPolicyState,
+    is_terminal_openrouter_done_event,
+    parse_native_sse_event,
+    transform_native_sse_block_event,
+)
+from providers.base import ProviderConfig
+from providers.defaults import OPENROUTER_DEFAULT_BASE
+from providers.model_listing import (
+    ProviderModelInfo,
+    extract_openrouter_tool_model_ids,
+    extract_openrouter_tool_model_infos,
+)
+from providers.transports.anthropic_messages import (
+    AnthropicMessagesTransport,
+    StreamChunkMode,
+)
+from .request import build_request_body
+_ANTHROPIC_VERSION = "2023-06-01"
+class OpenRouterProvider(AnthropicMessagesTransport):
+    """OpenRouter provider using the native Anthropic-compatible messages API."""
+    stream_chunk_mode: StreamChunkMode = "event"
+    def __init__(self, config: ProviderConfig):
+        super().__init__(
+            config,
+            provider_name="OPENROUTER",
+            default_base_url=OPENROUTER_DEFAULT_BASE,
+        )
+    def _build_request_body(
+        self, request: Any, thinking_enabled: bool | None = None
+    ) -> dict:
+        """Internal helper for tests and direct request dispatch."""
+        return build_request_body(
+            request,
+            thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
+        )
+    def _request_headers(self) -> dict[str, str]:
+        """Return OpenRouter's Anthropic-compatible messages headers."""
+        return {
+            "Accept": "text/event-stream",
+            "Authorization": f"Bearer {self._api_key}",
+            "Content-Type": "application/json",
+            "anthropic-version": _ANTHROPIC_VERSION,
+        }
+    def _model_list_headers(self) -> dict[str, str]:
+        """Return OpenRouter's OpenAI-compatible model-list headers."""
+        return {"Authorization": f"Bearer {self._api_key}"}
+    def _extract_model_ids_from_model_list_payload(
+        self, payload: Any
+    ) -> frozenset[str]:
+        """Only advertise OpenRouter models that can run Claude Code tools."""
+        return extract_openrouter_tool_model_ids(
+            payload, provider_name=self._provider_name
+        )
+    def _extract_model_infos_from_model_list_payload(
+        self, payload: Any
+    ) -> frozenset[ProviderModelInfo]:
+        """Advertise OpenRouter tool models with reasoning capability metadata."""
+        return extract_openrouter_tool_model_infos(
+            payload, provider_name=self._provider_name
+        )
+    def _new_stream_state(self, request: Any, *, thinking_enabled: bool) -> Any:
+        """Create per-stream state for thinking block filtering."""
+        return NativeSseBlockPolicyState()
+    def _transform_stream_event(
+        self,
+        event: str,
+        state: Any,
+        *,
+        thinking_enabled: bool,
+    ) -> str | None:
+        """Drop provider-specific terminal noise and hidden thinking events."""
+        if isinstance(state, NativeSseBlockPolicyState):
+            event_name, data_text = parse_native_sse_event(event)
+            if state.message_stopped or is_terminal_openrouter_done_event(
+                event_name, data_text
+            ):
+                return None
+            if event_name == "message_stop":
+                state.message_stopped = True
+        if isinstance(state, NativeSseBlockPolicyState):
+            return transform_native_sse_block_event(
+                event, state, thinking_enabled=thinking_enabled
+            )
+        return event
+    def _emit_error_events(
+        self,
+        *,
+        request: Any,
+        input_tokens: int,
+        error_message: str,
+        sent_any_event: bool,
+    ) -> Iterator[str]:
+        """Emit the Anthropic SSE error shape expected by Claude clients."""
+        yield from iter_provider_stream_error_sse_events(
+            request=request,
+            input_tokens=input_tokens,
+            error_message=error_message,
+            sent_any_event=sent_any_event,
+            log_raw_sse_events=self._config.log_raw_sse_events,
+        )

providers/open_router/request.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Native Anthropic Messages request builder for OpenRouter."""
+from __future__ import annotations
+from typing import Any
+from loguru import logger
+from config.constants import (
+    ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS as OPENROUTER_DEFAULT_MAX_TOKENS,
+)
+from core.anthropic.native_messages_request import (
+    OpenRouterExtraBodyError,
+    build_openrouter_native_request_body,
+)
+from providers.exceptions import InvalidRequestError
+def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
+    """Build an Anthropic-format request body for OpenRouter's messages API."""
+    logger.debug(
+        "OPENROUTER_REQUEST: conversion start model={} msgs={}",
+        getattr(request_data, "model", "?"),
+        len(getattr(request_data, "messages", [])),
+    )
+    try:
+        body = build_openrouter_native_request_body(
+            request_data,
+            thinking_enabled=thinking_enabled,
+            default_max_tokens=OPENROUTER_DEFAULT_MAX_TOKENS,
+        )
+    except OpenRouterExtraBodyError as exc:
+        raise InvalidRequestError(str(exc)) from exc
+    logger.debug(
+        "OPENROUTER_REQUEST: conversion done model={} msgs={} tools={}",
+        body.get("model"),
+        len(body.get("messages", [])),
+        len(body.get("tools", [])),
+    )
+    return body

providers/opencode/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""OpenCode Zen provider exports."""
+from providers.defaults import OPENCODE_DEFAULT_BASE, OPENCODE_GO_DEFAULT_BASE
+from .client import OpenCodeProvider
+__all__ = [
+    "OPENCODE_DEFAULT_BASE",
+    "OPENCODE_GO_DEFAULT_BASE",
+    "OpenCodeProvider",
+]

providers/opencode/client.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""OpenCode Zen provider implementation (OpenAI-compatible Chat Completions)."""
+from __future__ import annotations
+from typing import Any
+from providers.base import ProviderConfig
+from providers.defaults import OPENCODE_DEFAULT_BASE
+from providers.transports.openai_chat import OpenAIChatTransport
+from .request import build_request_body
+class OpenCodeProvider(OpenAIChatTransport):
+    """OpenCode Zen provider using ``https://opencode.ai/zen/v1/chat/completions``."""
+    def __init__(self, config: ProviderConfig, provider_name: str = "OPENCODE"):
+        super().__init__(
+            config,
+            provider_name=provider_name,
+            base_url=config.base_url or OPENCODE_DEFAULT_BASE,
+            api_key=config.api_key,
+        )
+    def _build_request_body(
+        self, request: Any, thinking_enabled: bool | None = None
+    ) -> dict:
+        return build_request_body(
+            request,
+            thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
+        )

providers/opencode/request.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Request builder for OpenCode Zen provider."""
+from typing import Any
+from loguru import logger
+from core.anthropic import ReasoningReplayMode, build_base_request_body
+from core.anthropic.conversion import OpenAIConversionError
+from providers.exceptions import InvalidRequestError
+def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
+    """Build OpenAI-format request body from Anthropic request for OpenCode Zen."""
+    logger.debug(
+        "OPENCODE_REQUEST: conversion start model={} msgs={}",
+        getattr(request_data, "model", "?"),
+        len(getattr(request_data, "messages", [])),
+    )
+    try:
+        body = build_base_request_body(
+            request_data,
+            reasoning_replay=ReasoningReplayMode.REASONING_CONTENT
+            if thinking_enabled
+            else ReasoningReplayMode.DISABLED,
+        )
+    except OpenAIConversionError as exc:
+        raise InvalidRequestError(str(exc)) from exc
+    logger.debug(
+        "OPENCODE_REQUEST: conversion done model={} msgs={} tools={}",
+        body.get("model"),
+        len(body.get("messages", [])),
+        len(body.get("tools", [])),
+    )
+    return body

providers/rate_limit.py ADDED Viewed

@@ -0,0 +1,300 @@
+"""Global rate limiter for API requests."""
+import asyncio
+import random
+import time
+from collections.abc import AsyncIterator, Callable
+from contextlib import asynccontextmanager
+from typing import Any, ClassVar, TypeVar
+import httpx
+import openai
+from loguru import logger
+from core.rate_limit import StrictSlidingWindowLimiter
+from core.trace import trace_event
+T = TypeVar("T")
+UPSTREAM_TRANSIENT_TOTAL_ATTEMPTS = 5
+DEFAULT_UPSTREAM_MAX_RETRIES = UPSTREAM_TRANSIENT_TOTAL_ATTEMPTS - 1
+def _upstream_http_retryable(code: int) -> bool:
+    """True for rate limit / upstream server failures that should backoff-retry."""
+    return 500 <= code <= 599
+def retryable_upstream_status(exc: BaseException) -> int | None:
+    """Return HTTP-like status codes that qualify for reactive backoff retries.
+    Only upstream ``5xx`` use the same exponential backoff and scoped limiter
+    blocking semantics as today's transient failure path.
+    """
+    if isinstance(exc, httpx.HTTPStatusError):
+        status = exc.response.status_code
+        if _upstream_http_retryable(status):
+            return status
+        return None
+    if isinstance(exc, openai.APIError):
+        status = getattr(exc, "status_code", None)
+        if isinstance(status, int) and 500 <= status <= 599:
+            return status
+        return None
+    return None
+class GlobalRateLimiter:
+    """
+    Global singleton rate limiter that blocks all requests
+    when a rate limit error is encountered (reactive) and
+    throttles requests (proactive) using a strict rolling window.
+    Optionally enforces a max_concurrency cap: at most N provider streams
+    may be open simultaneously, independent of the sliding window.
+    Proactive limits - throttles requests to stay within API limits.
+    Reactive limits - pauses all requests when a 5xx retry backoff is active.
+    Concurrency limit - caps simultaneously open streams.
+    """
+    _instance: ClassVar[GlobalRateLimiter | None] = None
+    _scoped_instances: ClassVar[dict[str, GlobalRateLimiter]] = {}
+    def __init__(
+        self,
+        rate_limit: int = 40,
+        rate_window: float = 60.0,
+        max_concurrency: int = 5,
+    ):
+        # Prevent re-initialization on singleton reuse
+        if hasattr(self, "_initialized"):
+            return
+        if rate_limit <= 0:
+            raise ValueError("rate_limit must be > 0")
+        if rate_window <= 0:
+            raise ValueError("rate_window must be > 0")
+        if max_concurrency <= 0:
+            raise ValueError("max_concurrency must be > 0")
+        self._rate_limit = rate_limit
+        self._rate_window = float(rate_window)
+        self._max_concurrency = max_concurrency
+        self._proactive_limiter = StrictSlidingWindowLimiter(
+            self._rate_limit, self._rate_window
+        )
+        self._blocked_until: float = 0
+        self._concurrency_sem = asyncio.Semaphore(max_concurrency)
+        self._initialized = True
+        logger.info(
+            f"GlobalRateLimiter (Provider) initialized ({rate_limit} req / {rate_window}s, max_concurrency={max_concurrency})"
+        )
+    @classmethod
+    def get_instance(
+        cls,
+        rate_limit: int | None = None,
+        rate_window: float | None = None,
+        max_concurrency: int = 5,
+    ) -> GlobalRateLimiter:
+        """Get or create the singleton instance.
+        Args:
+            rate_limit: Requests per window (only used on first creation)
+            rate_window: Window in seconds (only used on first creation)
+            max_concurrency: Max simultaneous open streams (only used on first creation)
+        """
+        if cls._instance is None:
+            cls._instance = cls(
+                rate_limit=rate_limit or 40,
+                rate_window=rate_window or 60.0,
+                max_concurrency=max_concurrency,
+            )
+        return cls._instance
+    @classmethod
+    def get_scoped_instance(
+        cls,
+        scope: str,
+        *,
+        rate_limit: int | None = None,
+        rate_window: float | None = None,
+        max_concurrency: int = 5,
+    ) -> GlobalRateLimiter:
+        """Get or create a provider-scoped limiter instance."""
+        if not scope:
+            raise ValueError("scope must be non-empty")
+        desired_rate_limit = rate_limit or 40
+        desired_rate_window = float(rate_window or 60.0)
+        existing = cls._scoped_instances.get(scope)
+        if existing and existing.matches_config(
+            desired_rate_limit, desired_rate_window, max_concurrency
+        ):
+            return existing
+        if existing:
+            logger.info(
+                "Rebuilding provider rate limiter for updated scope '{}'", scope
+            )
+        cls._scoped_instances[scope] = cls(
+            rate_limit=desired_rate_limit,
+            rate_window=desired_rate_window,
+            max_concurrency=max_concurrency,
+        )
+        return cls._scoped_instances[scope]
+    @classmethod
+    def reset_instance(cls) -> None:
+        """Reset singleton (for testing)."""
+        cls._instance = None
+        cls._scoped_instances = {}
+    async def wait_if_blocked(self) -> bool:
+        """
+        Wait if currently rate limited or throttle to meet quota.
+        Returns:
+            True if was reactively blocked and waited, False otherwise.
+        """
+        # 1. Reactive check: Wait if someone hit a reactive backoff (429/5xx retries)
+        waited_reactively = False
+        now = time.monotonic()
+        if now < self._blocked_until:
+            wait_time = self._blocked_until - now
+            logger.warning(
+                f"Global provider rate limit active (reactive), waiting {wait_time:.1f}s..."
+            )
+            await asyncio.sleep(wait_time)
+            waited_reactively = True
+        # 2. Proactive check: strict rolling window (no bursts beyond N in last W seconds)
+        await self._acquire_proactive_slot()
+        return waited_reactively
+    async def _acquire_proactive_slot(self) -> None:
+        """
+        Acquire a proactive slot enforcing a strict rolling window.
+        Guarantees: at most `self._rate_limit` acquisitions in any interval of length
+        `self._rate_window` (seconds).
+        """
+        await self._proactive_limiter.acquire()
+    def set_blocked(self, seconds: float = 60) -> None:
+        """
+        Set global block for specified seconds (reactive).
+        Args:
+            seconds: How long to block (default 60s)
+        """
+        self._blocked_until = time.monotonic() + seconds
+        logger.warning(f"Global provider rate limit set for {seconds:.1f}s (reactive)")
+    def is_blocked(self) -> bool:
+        """Check if currently reactively blocked."""
+        return time.monotonic() < self._blocked_until
+    def matches_config(
+        self, rate_limit: int, rate_window: float, max_concurrency: int
+    ) -> bool:
+        """Return whether this limiter matches the requested runtime config."""
+        return (
+            self._rate_limit == rate_limit
+            and self._rate_window == float(rate_window)
+            and self._max_concurrency == max_concurrency
+        )
+    def remaining_wait(self) -> float:
+        """Get remaining reactive wait time in seconds."""
+        return max(0.0, self._blocked_until - time.monotonic())
+    @asynccontextmanager
+    async def concurrency_slot(self) -> AsyncIterator[None]:
+        """Async context manager that holds one concurrency slot for a stream.
+        Blocks until a slot is available (controlled by max_concurrency).
+        """
+        await self._concurrency_sem.acquire()
+        try:
+            yield
+        finally:
+            self._concurrency_sem.release()
+    async def execute_with_retry(
+        self,
+        fn: Callable[..., Any],
+        *args: Any,
+        max_retries: int = DEFAULT_UPSTREAM_MAX_RETRIES,
+        base_delay: float = 2.0,
+        max_delay: float = 60.0,
+        jitter: float = 1.0,
+        **kwargs: Any,
+    ) -> Any:
+        """Execute an async callable with rate limiting and retry on transient limits.
+        Waits for the proactive limiter before each attempt. On upstream ``5xx``
+        server errors, applies exponential backoff with jitter and sets the
+        reactive block before retrying.
+        Args:
+            fn: Async callable to execute.
+            max_retries: Maximum number of retry attempts after the first failure.
+            base_delay: Base delay in seconds for exponential backoff.
+            max_delay: Maximum delay cap in seconds.
+            jitter: Maximum random jitter in seconds added to each delay.
+        Returns:
+            The result of the callable.
+        Raises:
+            The last exception if all retries are exhausted.
+        """
+        last_exc: Exception | None = None
+        total_attempts = 1 + max_retries
+        for attempt in range(total_attempts):
+            await self.wait_if_blocked()
+            try:
+                return await fn(*args, **kwargs)
+            except Exception as e:
+                status = retryable_upstream_status(e)
+                if status is None:
+                    raise
+                label = f"Upstream server error ({status})"
+                last_exc = e
+                if attempt >= max_retries:
+                    logger.warning(
+                        "{} retry exhausted after {} retries (attempts={})",
+                        label,
+                        max_retries,
+                        total_attempts,
+                    )
+                    break
+                delay = min(base_delay * (2**attempt), max_delay)
+                delay += random.uniform(0, jitter)
+                attempt_no = attempt + 1
+                logger.warning(
+                    "{}, attempt {}/{}. Retrying in {:.1f}s...",
+                    label,
+                    attempt_no,
+                    total_attempts,
+                    delay,
+                )
+                trace_event(
+                    stage="provider",
+                    event="provider.retry.scheduled",
+                    source="provider",
+                    status_code=status,
+                    attempt=attempt_no,
+                    max_attempts=total_attempts,
+                    delay_s=round(delay, 3),
+                )
+                self.set_blocked(delay)
+                await asyncio.sleep(delay)
+        assert last_exc is not None
+        raise last_exc