modelmeld 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. modelmeld/__init__.py +6 -0
  2. modelmeld/__main__.py +24 -0
  3. modelmeld/adapters/__init__.py +11 -0
  4. modelmeld/adapters/anthropic_adapter.py +165 -0
  5. modelmeld/adapters/base.py +116 -0
  6. modelmeld/adapters/openai_adapter.py +125 -0
  7. modelmeld/adapters/retry.py +181 -0
  8. modelmeld/adapters/stub.py +87 -0
  9. modelmeld/adapters/tensorrt_llm_adapter.py +60 -0
  10. modelmeld/adapters/vllm_adapter.py +57 -0
  11. modelmeld/api/__init__.py +3 -0
  12. modelmeld/api/body_size_limit.py +124 -0
  13. modelmeld/api/byok.py +197 -0
  14. modelmeld/api/routes/__init__.py +3 -0
  15. modelmeld/api/routes/chat.py +950 -0
  16. modelmeld/api/routes/healthz.py +13 -0
  17. modelmeld/api/routes/messages.py +728 -0
  18. modelmeld/api/routes/models.py +84 -0
  19. modelmeld/api/routing_hints.py +180 -0
  20. modelmeld/api/schemas.py +341 -0
  21. modelmeld/api/schemas_anthropic.py +418 -0
  22. modelmeld/api/server.py +125 -0
  23. modelmeld/cache/__init__.py +59 -0
  24. modelmeld/cache/base.py +125 -0
  25. modelmeld/cache/embedding.py +94 -0
  26. modelmeld/cache/in_memory.py +79 -0
  27. modelmeld/cache/semantic.py +134 -0
  28. modelmeld/cli/__init__.py +95 -0
  29. modelmeld/cli/__main__.py +7 -0
  30. modelmeld/cli/doctor.py +488 -0
  31. modelmeld/cli/setup.py +504 -0
  32. modelmeld/config.py +114 -0
  33. modelmeld/hooks.py +117 -0
  34. modelmeld/licensing.py +247 -0
  35. modelmeld/memory/__init__.py +110 -0
  36. modelmeld/memory/base.py +352 -0
  37. modelmeld/memory/context.py +313 -0
  38. modelmeld/memory/identity.py +130 -0
  39. modelmeld/memory/in_memory.py +249 -0
  40. modelmeld/memory/summarizer.py +304 -0
  41. modelmeld/memory/tiers.py +82 -0
  42. modelmeld/privacy/__init__.py +31 -0
  43. modelmeld/privacy/scrubber.py +220 -0
  44. modelmeld/py.typed +0 -0
  45. modelmeld/router/__init__.py +173 -0
  46. modelmeld/router/base.py +338 -0
  47. modelmeld/router/capability.py +263 -0
  48. modelmeld/scout/__init__.py +81 -0
  49. modelmeld/scout/base.py +54 -0
  50. modelmeld/scout/benchmarks/__init__.py +55 -0
  51. modelmeld/scout/benchmarks/aider_polyglot.py +147 -0
  52. modelmeld/scout/benchmarks/artificial_analysis.py +201 -0
  53. modelmeld/scout/benchmarks/base.py +52 -0
  54. modelmeld/scout/benchmarks/livebench.py +140 -0
  55. modelmeld/scout/benchmarks/lmarena.py +174 -0
  56. modelmeld/scout/benchmarks/refresher.py +339 -0
  57. modelmeld/scout/capability.py +472 -0
  58. modelmeld/scout/data/LICENSE.md +91 -0
  59. modelmeld/scout/data/__init__.py +3 -0
  60. modelmeld/scout/data/default_registry.json +502 -0
  61. modelmeld/scout/devtool.py +179 -0
  62. modelmeld/scout/feed.py +320 -0
  63. modelmeld/scout/heuristics.py +192 -0
  64. modelmeld/scout/policy.py +226 -0
  65. modelmeld/scout/registry.py +283 -0
  66. modelmeld/scout/task_category.py +193 -0
  67. modelmeld/tokens/__init__.py +27 -0
  68. modelmeld/tokens/counter.py +170 -0
  69. modelmeld/translation/__init__.py +32 -0
  70. modelmeld/translation/openai_anthropic.py +1104 -0
  71. modelmeld-0.1.0.dist-info/METADATA +262 -0
  72. modelmeld-0.1.0.dist-info/RECORD +76 -0
  73. modelmeld-0.1.0.dist-info/WHEEL +4 -0
  74. modelmeld-0.1.0.dist-info/entry_points.txt +2 -0
  75. modelmeld-0.1.0.dist-info/licenses/LICENSE +661 -0
  76. modelmeld-0.1.0.dist-info/licenses/NOTICE +88 -0
modelmeld/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 ModelMeld.
3
+
4
+ """Gateway core engine."""
5
+
6
+ __version__ = "0.0.1"
modelmeld/__main__.py ADDED
@@ -0,0 +1,24 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 ModelMeld.
3
+
4
+ """Run the gateway server: `python -m modelmeld`."""
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ def main() -> None:
10
+ import uvicorn
11
+
12
+ from modelmeld.config import GatewaySettings
13
+
14
+ settings = GatewaySettings()
15
+ uvicorn.run(
16
+ "modelmeld.api.server:app",
17
+ host=settings.host,
18
+ port=settings.port,
19
+ log_level=settings.log_level.lower(),
20
+ )
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()
@@ -0,0 +1,11 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 ModelMeld.
3
+
4
+ """Provider adapters. See base.py for the contract."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from modelmeld.adapters.base import AdapterError, ProviderAdapter
9
+ from modelmeld.adapters.stub import StubAdapter
10
+
11
+ __all__ = ["AdapterError", "ProviderAdapter", "StubAdapter"]
@@ -0,0 +1,165 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 ModelMeld.
3
+
4
+ """AnthropicAdapter — pass-through to Anthropic Messages API with schema translation."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ from collections.abc import AsyncIterator
10
+
11
+ from modelmeld.adapters.base import AdapterError, ProviderAdapter
12
+ from modelmeld.adapters.retry import (
13
+ RetryConfig,
14
+ retry_async,
15
+ wrap_as_adapter_error,
16
+ )
17
+ from modelmeld.api.schemas import (
18
+ ChatCompletion,
19
+ ChatCompletionChunk,
20
+ ChatCompletionRequest,
21
+ )
22
+ from modelmeld.api.schemas_anthropic import AnthropicMessagesRequest
23
+ from modelmeld.translation import (
24
+ AnthropicStreamTranslator,
25
+ from_anthropic_response,
26
+ to_anthropic_params,
27
+ )
28
+
29
+
30
+ class AnthropicAdapter(ProviderAdapter):
31
+ name = "anthropic"
32
+ is_egress = True
33
+
34
+ def __init__(
35
+ self,
36
+ api_key: str | None = None,
37
+ base_url: str | None = None,
38
+ retry_config: RetryConfig | None = None,
39
+ served_model: str | None = None,
40
+ ) -> None:
41
+ try:
42
+ from anthropic import AsyncAnthropic
43
+ except ImportError as e:
44
+ raise AdapterError(
45
+ "AnthropicAdapter requires the `anthropic` package. "
46
+ "Install with: pip install 'modelmeld[anthropic]'"
47
+ ) from e
48
+
49
+ key = api_key or os.environ.get("ANTHROPIC_API_KEY")
50
+ if not key:
51
+ raise AdapterError(
52
+ "AnthropicAdapter requires an API key "
53
+ "(pass api_key= or set ANTHROPIC_API_KEY / MODELMELD_ANTHROPIC_API_KEY)."
54
+ )
55
+
56
+ # Disable the SDK's built-in retry; we own retry policy via retry_async.
57
+ # Stacking SDK retries on top of ours wastes time and rate limit.
58
+ kwargs: dict = {"api_key": key, "max_retries": 0}
59
+ if base_url:
60
+ kwargs["base_url"] = base_url
61
+ self._client = AsyncAnthropic(**kwargs)
62
+ self._retry_config = retry_config or RetryConfig()
63
+ # F-8: operator-pinned upstream model (overrides request.model).
64
+ self.served_model = served_model
65
+
66
+ async def chat(
67
+ self,
68
+ request: ChatCompletionRequest,
69
+ *,
70
+ native_request: object | None = None,
71
+ extra_headers: dict[str, str] | None = None,
72
+ ) -> ChatCompletion:
73
+ """Non-streaming chat.
74
+
75
+ `extra_headers` is the optional /v1/messages escape hatch for
76
+ forwarding caller-supplied Anthropic protocol headers
77
+ (`anthropic-beta`, `anthropic-version`, etc.) verbatim to the
78
+ upstream. Without this, beta features the customer activates
79
+ silently fall back at our gateway.
80
+ """
81
+ params = self._build_params(request, native_request)
82
+ if extra_headers:
83
+ params["extra_headers"] = dict(extra_headers)
84
+
85
+ async def _call():
86
+ return await self._client.messages.create(**params)
87
+
88
+ try:
89
+ sdk_message = await retry_async(
90
+ _call, self._retry_config, label="anthropic.chat",
91
+ )
92
+ except Exception as e:
93
+ raise wrap_as_adapter_error(e, "Anthropic chat call failed") from e
94
+ return from_anthropic_response(sdk_message.model_dump())
95
+
96
+ async def stream_chat(
97
+ self,
98
+ request: ChatCompletionRequest,
99
+ *,
100
+ native_request: object | None = None,
101
+ extra_headers: dict[str, str] | None = None,
102
+ ) -> AsyncIterator[ChatCompletionChunk]:
103
+ params = self._build_params(request, native_request)
104
+ params["stream"] = True
105
+ if extra_headers:
106
+ params["extra_headers"] = dict(extra_headers)
107
+
108
+ async def _open_stream():
109
+ return await self._client.messages.create(**params)
110
+
111
+ try:
112
+ stream = await retry_async(
113
+ _open_stream, self._retry_config, label="anthropic.stream_chat",
114
+ )
115
+ except Exception as e:
116
+ raise wrap_as_adapter_error(
117
+ e, "Anthropic stream_chat call failed",
118
+ ) from e
119
+
120
+ translator = AnthropicStreamTranslator()
121
+ async for event in stream:
122
+ chunk = translator.translate_event(event.model_dump())
123
+ if chunk is not None:
124
+ yield chunk
125
+
126
+ def _build_params(
127
+ self,
128
+ request: ChatCompletionRequest,
129
+ native_request: object | None,
130
+ ) -> dict:
131
+ """Construct the Anthropic SDK params from either the native
132
+ Anthropic request (preserving cache_control + tool schemas +
133
+ image content blocks intact) or, when not available, by
134
+ round-tripping through the OpenAI internal shape.
135
+
136
+ Native-passthrough is the path /v1/messages takes when routing
137
+ to an Anthropic upstream — without it, cache_control breakpoints
138
+ get silently dropped and customers pay ~5x more on what would
139
+ otherwise be cache hits (the failure mode musistudio/claude-code-router
140
+ ships today). /v1/chat/completions callers don't supply
141
+ native_request and use the translation path.
142
+ """
143
+ if isinstance(native_request, AnthropicMessagesRequest):
144
+ # Native passthrough — preserve the customer's exact request
145
+ # shape. Apply F-8 served_model substitution at this layer so
146
+ # operators can still pin the upstream model regardless of
147
+ # what the customer asked for.
148
+ params = native_request.model_dump(exclude_none=True)
149
+ if self.served_model is not None:
150
+ params["model"] = self.served_model
151
+ elif params.get("model") is None:
152
+ # Defense in depth — Anthropic SDK requires `model`.
153
+ params["model"] = request.model
154
+ return params
155
+ # Translation path (the existing /v1/chat/completions behavior).
156
+ request = self._apply_served_model(request)
157
+ return to_anthropic_params(request)
158
+
159
+ async def health(self) -> bool:
160
+ # Anthropic has no cheap public health endpoint; consider the client
161
+ # configured-and-imported as healthy. Real check happens on first call.
162
+ return True
163
+
164
+ async def close(self) -> None:
165
+ await self._client.close()
@@ -0,0 +1,116 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 ModelMeld.
3
+
4
+ """Provider adapter abstract base class.
5
+
6
+ `ProviderAdapter` is the extension point through which the gateway forwards
7
+ OpenAI-shaped requests to a concrete upstream (OpenAI cloud, Anthropic cloud,
8
+ local vLLM, etc.). Implementations live in sibling modules.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from abc import ABC, abstractmethod
14
+ from collections.abc import AsyncIterator
15
+
16
+ from modelmeld.api.schemas import (
17
+ ChatCompletion,
18
+ ChatCompletionChunk,
19
+ ChatCompletionRequest,
20
+ )
21
+
22
+
23
+ class AdapterError(Exception):
24
+ """Raised when an adapter fails to fulfill a request.
25
+
26
+ Network failures, upstream 5xx responses, schema-translation errors, and
27
+ misconfiguration (missing API key, etc.) all surface as this exception.
28
+
29
+ Subclasses `TransientAdapterError` and `PermanentAdapterError` carry the
30
+ retry-ability signal so the TieredRouter can decide whether to fail over
31
+ to the other tier or bubble the error up to the caller.
32
+ """
33
+
34
+
35
+ class TransientAdapterError(AdapterError):
36
+ """Adapter failed in a way that may succeed on retry / failover.
37
+
38
+ Examples: HTTP 5xx, 429 rate limit, 529 overloaded, network blip,
39
+ timeout. Routers should attempt the other tier; callers should treat
40
+ repeated occurrences as a real outage.
41
+ """
42
+
43
+
44
+ class PermanentAdapterError(AdapterError):
45
+ """Adapter failed in a way that retry won't fix.
46
+
47
+ Examples: HTTP 401/403 auth failure, HTTP 404 model-not-found,
48
+ schema-translation errors, misconfiguration. Routers should NOT fail
49
+ over — surface the error so the caller sees the real cause instead of
50
+ a misleading fallback response.
51
+ """
52
+
53
+
54
+ class ProviderAdapter(ABC):
55
+ """Translate an OpenAI-shaped request to a concrete upstream provider."""
56
+
57
+ name: str
58
+ # True when this adapter sends traffic outside the customer's network.
59
+ # Used by the chat route to gate PII scrubbing.
60
+ is_egress: bool = False
61
+ # F-8: operator-configured model this adapter actually serves upstream.
62
+ # When set, the adapter substitutes `request.model` with this value on
63
+ # outbound calls — the client can send any model name (or none) and the
64
+ # gateway routes them based on the scout's tier decision while the
65
+ # adapter uses its configured upstream model.
66
+ # When None, the adapter passes the client's model name through unchanged
67
+ # (default for adapters that proxy to multi-model providers).
68
+ served_model: str | None = None
69
+
70
+ @abstractmethod
71
+ async def chat(self, request: ChatCompletionRequest) -> ChatCompletion:
72
+ """Non-streaming chat completion."""
73
+
74
+ @abstractmethod
75
+ def stream_chat(
76
+ self, request: ChatCompletionRequest
77
+ ) -> AsyncIterator[ChatCompletionChunk]:
78
+ """Streaming chat completion. Implementations are async generators."""
79
+
80
+ @abstractmethod
81
+ async def health(self) -> bool:
82
+ """Cheap upstream reachability check. Returns False on failure."""
83
+
84
+ def serves_model(self, model_id: str) -> bool: # noqa: ARG002 — base default
85
+ """Whether this adapter can serve the given model id (F-8).
86
+
87
+ Default returns True for both pinned and pass-through configurations:
88
+ - `served_model=None` → pass-through; we don't know what upstream
89
+ supports, so we assume failover is safe.
90
+ - `served_model="X"` → substitution; `_apply_served_model()` will
91
+ rewrite `request.model` to X on the outbound call, so the
92
+ adapter will serve any request regardless of the client's
93
+ model id. Setting `served_model` is opting into substitution.
94
+
95
+ TieredRouter consults this before failover. Subclasses can
96
+ override for stricter behavior (e.g. compliance-mode adapter
97
+ that rejects non-matching model ids outright).
98
+ """
99
+ return True
100
+
101
+ def _apply_served_model(
102
+ self, request: ChatCompletionRequest,
103
+ ) -> ChatCompletionRequest:
104
+ """Return a request with `model` substituted to `served_model` if set.
105
+
106
+ Returns the original request when `served_model` is None — no copy
107
+ on the hot path for the pass-through case. Adapters call this at
108
+ the top of `chat()` / `stream_chat()` before delegating upstream.
109
+ """
110
+ if self.served_model is None or request.model == self.served_model:
111
+ return request
112
+ # Pydantic model_copy is shallow + cheap; preserves all other fields.
113
+ return request.model_copy(update={"model": self.served_model})
114
+
115
+ async def close(self) -> None:
116
+ """Release any held resources. Default no-op; override if needed."""
@@ -0,0 +1,125 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 ModelMeld.
3
+
4
+ """OpenAIAdapter — pass-through to OpenAI's cloud API via the official SDK.
5
+
6
+ Named `openai_adapter` (not `openai`) to avoid shadowing the upstream package
7
+ when read by humans.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ from collections.abc import AsyncIterator
14
+ from typing import Any
15
+
16
+ import httpx
17
+
18
+ from modelmeld.adapters.base import AdapterError, ProviderAdapter
19
+ from modelmeld.adapters.retry import (
20
+ RetryConfig,
21
+ retry_async,
22
+ wrap_as_adapter_error,
23
+ )
24
+ from modelmeld.api.schemas import (
25
+ ChatCompletion,
26
+ ChatCompletionChunk,
27
+ ChatCompletionRequest,
28
+ )
29
+
30
+
31
+ class OpenAIAdapter(ProviderAdapter):
32
+ name = "openai"
33
+ is_egress = True
34
+
35
+ def __init__(
36
+ self,
37
+ api_key: str | None = None,
38
+ base_url: str | None = None,
39
+ http_client: httpx.AsyncClient | None = None,
40
+ retry_config: RetryConfig | None = None,
41
+ served_model: str | None = None,
42
+ ) -> None:
43
+ try:
44
+ from openai import AsyncOpenAI
45
+ except ImportError as e:
46
+ raise AdapterError(
47
+ "OpenAIAdapter requires the `openai` package. "
48
+ "Install with: pip install 'modelmeld[openai]'"
49
+ ) from e
50
+
51
+ key = api_key or os.environ.get("OPENAI_API_KEY")
52
+ if not key:
53
+ raise AdapterError(
54
+ "OpenAIAdapter requires an API key "
55
+ "(pass api_key= or set OPENAI_API_KEY / MODELMELD_OPENAI_API_KEY)."
56
+ )
57
+ # Disable the SDK's built-in retry; retry policy lives in retry_async.
58
+ self._client = AsyncOpenAI(
59
+ api_key=key,
60
+ base_url=base_url,
61
+ http_client=http_client,
62
+ max_retries=0,
63
+ )
64
+ self._retry_config = retry_config or RetryConfig()
65
+ # F-8: operator-pinned upstream model (overrides request.model).
66
+ self.served_model = served_model
67
+
68
+ def _to_params(
69
+ self, request: ChatCompletionRequest, *, stream: bool
70
+ ) -> dict[str, Any]:
71
+ # exclude_none keeps optional fields off the wire so we don't override the
72
+ # upstream's defaults; we set `stream` explicitly per call.
73
+ excluded: set[str] = {"stream"}
74
+ if not stream:
75
+ excluded.add("stream_options")
76
+ params = request.model_dump(exclude_none=True, exclude=excluded)
77
+ params["stream"] = stream
78
+ return params
79
+
80
+ async def chat(self, request: ChatCompletionRequest) -> ChatCompletion:
81
+ request = self._apply_served_model(request)
82
+
83
+ async def _call():
84
+ return await self._client.chat.completions.create(
85
+ **self._to_params(request, stream=False)
86
+ )
87
+
88
+ try:
89
+ sdk_response = await retry_async(
90
+ _call, self._retry_config, label="openai.chat",
91
+ )
92
+ except Exception as e:
93
+ raise wrap_as_adapter_error(e, "OpenAI chat call failed") from e
94
+ return ChatCompletion.model_validate(sdk_response.model_dump())
95
+
96
+ async def stream_chat(
97
+ self, request: ChatCompletionRequest
98
+ ) -> AsyncIterator[ChatCompletionChunk]:
99
+ request = self._apply_served_model(request)
100
+
101
+ async def _open_stream():
102
+ return await self._client.chat.completions.create(
103
+ **self._to_params(request, stream=True)
104
+ )
105
+
106
+ try:
107
+ stream = await retry_async(
108
+ _open_stream, self._retry_config, label="openai.stream_chat",
109
+ )
110
+ except Exception as e:
111
+ raise wrap_as_adapter_error(
112
+ e, "OpenAI stream_chat call failed",
113
+ ) from e
114
+ async for chunk in stream:
115
+ yield ChatCompletionChunk.model_validate(chunk.model_dump())
116
+
117
+ async def health(self) -> bool:
118
+ try:
119
+ await self._client.models.list()
120
+ return True
121
+ except Exception:
122
+ return False
123
+
124
+ async def close(self) -> None:
125
+ await self._client.close()
@@ -0,0 +1,181 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (c) 2026 ModelMeld.
3
+
4
+ """Retry-with-backoff utility for adapter calls (F-5).
5
+
6
+ Wraps an async adapter call with exponential backoff retry on transient
7
+ errors. Permanent errors (auth failure, config mismatch, schema errors)
8
+ raise immediately - retrying them just wastes time and exhausts the
9
+ provider's rate limit.
10
+
11
+ Used by `AnthropicAdapter` and `OpenAIAdapter` to absorb provider
12
+ throttling and 5xx blips before they reach the `TieredRouter`. With this
13
+ in place, the router's failover logic only triggers on outages that
14
+ genuinely persist across retries.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import logging
21
+ import random
22
+ from collections.abc import Awaitable, Callable
23
+ from dataclasses import dataclass
24
+ from typing import TypeVar
25
+
26
+ from modelmeld.adapters.base import (
27
+ AdapterError,
28
+ PermanentAdapterError,
29
+ TransientAdapterError,
30
+ )
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ T = TypeVar("T")
35
+
36
+ # HTTP status codes that justify a retry. Anything else is treated as
37
+ # permanent - retrying a 401 won't make the credentials valid.
38
+ TRANSIENT_STATUS_CODES: frozenset[int] = frozenset({
39
+ 408, # Request Timeout
40
+ 409, # Conflict (some APIs use for "operation in progress")
41
+ 425, # Too Early
42
+ 429, # Too Many Requests
43
+ 500, 502, 503, 504, # Server-side failures
44
+ 529, # Anthropic-specific Overloaded
45
+ })
46
+
47
+ # Class-name fragments that indicate transience when status_code isn't
48
+ # available on the exception (some SDKs raise network errors that wrap
49
+ # the underlying httpx/aiohttp exception).
50
+ TRANSIENT_CLASS_HINTS: tuple[str, ...] = (
51
+ "ratelimit",
52
+ "overloaded",
53
+ "timeout",
54
+ "connection",
55
+ "apiconnection",
56
+ "internalservererror",
57
+ "serviceunavailable",
58
+ )
59
+
60
+
61
+ @dataclass(frozen=True)
62
+ class RetryConfig:
63
+ """Retry policy. Defaults aim for ~7 seconds of total backoff over 3 tries.
64
+
65
+ max_attempts=3, base_delay=1s, jitter=20% → waits ~1s, ~2s between
66
+ attempts, with up to 20% randomization to avoid thundering herd.
67
+ """
68
+
69
+ max_attempts: int = 3
70
+ base_delay_sec: float = 1.0
71
+ max_delay_sec: float = 30.0
72
+ jitter: float = 0.2 # ±20% randomization of the computed delay
73
+
74
+
75
+ def is_transient_error(exc: BaseException) -> bool:
76
+ """Classify whether an exception should trigger retry.
77
+
78
+ Inspects, in order:
79
+ 1. Network-level exception types (asyncio.TimeoutError, ConnectionError)
80
+ 2. HTTP status code on the exception (.status_code attribute)
81
+ 3. Class-name fragments (fallback for SDKs without status_code)
82
+ """
83
+ if isinstance(exc, (asyncio.TimeoutError, ConnectionError)):
84
+ return True
85
+
86
+ status = getattr(exc, "status_code", None)
87
+ if isinstance(status, int):
88
+ return status in TRANSIENT_STATUS_CODES
89
+
90
+ cls_name = type(exc).__name__.lower()
91
+ return any(hint in cls_name for hint in TRANSIENT_CLASS_HINTS)
92
+
93
+
94
+ def _compute_backoff(
95
+ attempt: int, config: RetryConfig, rng: random.Random | None = None,
96
+ ) -> float:
97
+ """Exponential backoff with optional ±jitter."""
98
+ raw = config.base_delay_sec * (2 ** (attempt - 1))
99
+ capped = min(raw, config.max_delay_sec)
100
+ if config.jitter > 0:
101
+ r = (rng or random).uniform(-config.jitter, config.jitter)
102
+ capped = capped * (1.0 + r)
103
+ return max(capped, 0.0)
104
+
105
+
106
+ async def retry_async(
107
+ func: Callable[[], Awaitable[T]],
108
+ config: RetryConfig | None = None,
109
+ *,
110
+ label: str = "adapter call",
111
+ sleep: Callable[[float], Awaitable[None]] | None = None,
112
+ rng: random.Random | None = None,
113
+ ) -> T:
114
+ """Run an async callable with exponential-backoff retry.
115
+
116
+ Args:
117
+ func: zero-arg async callable to invoke. Wrap your call in a lambda
118
+ or nested coroutine def.
119
+ config: retry policy. Defaults to `RetryConfig()`.
120
+ label: human-readable label for log lines (e.g. "anthropic.chat").
121
+ sleep: injectable async sleep. Defaults to `asyncio.sleep`. Tests
122
+ override this to avoid real wall-clock waits.
123
+ rng: injectable RNG for jitter. Tests pass a seeded `random.Random`
124
+ for deterministic backoff timing.
125
+
126
+ Returns:
127
+ Whatever `func()` returns on success.
128
+
129
+ Raises:
130
+ The last exception, unmodified, after all attempts exhausted.
131
+ Non-transient errors raise immediately (no retry).
132
+ """
133
+ cfg = config or RetryConfig()
134
+ _sleep = sleep or asyncio.sleep
135
+ last_exc: BaseException | None = None
136
+
137
+ for attempt in range(1, cfg.max_attempts + 1):
138
+ try:
139
+ return await func()
140
+ except BaseException as e:
141
+ last_exc = e
142
+ if not is_transient_error(e):
143
+ # Permanent error - bail immediately, no retry.
144
+ raise
145
+ if attempt >= cfg.max_attempts:
146
+ # Out of retries - re-raise the last error.
147
+ raise
148
+ delay = _compute_backoff(attempt, cfg, rng)
149
+ logger.info(
150
+ "[%s] attempt %d/%d failed (%s: %s); retrying in %.2fs",
151
+ label, attempt, cfg.max_attempts,
152
+ type(e).__name__, str(e)[:120], delay,
153
+ )
154
+ await _sleep(delay)
155
+
156
+ # Unreachable in practice, but satisfies type checkers.
157
+ assert last_exc is not None
158
+ raise last_exc
159
+
160
+
161
+ def wrap_as_adapter_error(exc: BaseException, prefix: str) -> AdapterError:
162
+ """Wrap an upstream exception in the appropriate AdapterError subclass.
163
+
164
+ `TieredRouter` (Sprint 2.6 / F-2) branches on the subclass:
165
+ - `TransientAdapterError` → safe to fail over to the other tier
166
+ - `PermanentAdapterError` → bubble up so the caller sees the real error
167
+
168
+ Detection mirrors `is_transient_error`. The string carries enough
169
+ detail to debug from logs without exposing the underlying exception
170
+ type leak.
171
+ """
172
+ msg = f"{prefix}: {exc}"
173
+ if is_transient_error(exc):
174
+ return TransientAdapterError(msg)
175
+ return PermanentAdapterError(msg)
176
+
177
+
178
+ # Backward-compat alias for the underscore-prefixed call site in
179
+ # anthropic_adapter / openai_adapter. Both spellings are part of the
180
+ # adapter-internal contract; tests should prefer `wrap_as_adapter_error`.
181
+ _wrap_as_adapter_error = wrap_as_adapter_error