relay-ai-sdk 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment:
14
+ name: pypi
15
+ url: https://pypi.org/p/relay-ai-sdk
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+
23
+ - name: Build
24
+ run: |
25
+ pip install build
26
+ python -m build
27
+
28
+ - name: Publish to PyPI
29
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,15 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ .env
12
+ .mypy_cache/
13
+ .ruff_cache/
14
+ .pytest_cache/
15
+ .reviewer/
@@ -0,0 +1,241 @@
1
+ Metadata-Version: 2.4
2
+ Name: relay-ai-sdk
3
+ Version: 2.0.0
4
+ Summary: Official Python SDK for the Relay AI Gateway. One key, every model.
5
+ Project-URL: Homepage, https://relay.ai5labs.com
6
+ Project-URL: Documentation, https://relay.ai5labs.com/docs/sdk
7
+ Project-URL: Repository, https://github.com/ai5labs/relay-sdk
8
+ Author-email: ai5labs <relay@ai5labs.com>
9
+ License-Expression: MIT
10
+ Keywords: ai,ai-gateway,llm,multi-model,opentelemetry,relay,sdk,streaming,tool-calling
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: httpx>=0.25.0
23
+ Requires-Dist: pydantic>=2.0
24
+ Provides-Extra: otel
25
+ Requires-Dist: opentelemetry-api>=1.27; extra == 'otel'
26
+ Requires-Dist: opentelemetry-sdk>=1.27; extra == 'otel'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # Relay AI SDK
30
+
31
+ Official Python SDK for the [Relay AI Gateway](https://relay.ai5labs.com). One key, every model.
32
+
33
+ ```bash
34
+ pip install relay-ai-sdk
35
+ ```
36
+
37
+ With OpenTelemetry:
38
+
39
+ ```bash
40
+ pip install relay-ai-sdk[otel]
41
+ ```
42
+
43
+ ## Quick start
44
+
45
+ ```python
46
+ from relay_ai import Relay
47
+
48
+ client = Relay(api_key="sk-relay-...")
49
+
50
+ response = client.chat("claude-sonnet-4.6", messages=[
51
+ {"role": "user", "content": "Explain quantum computing in one sentence."}
52
+ ])
53
+ print(response.text)
54
+ print(f"Tokens: {response.usage.total_tokens}")
55
+ ```
56
+
57
+ ## Streaming
58
+
59
+ ```python
60
+ with client.chat("gemini-3.5-flash", messages=[
61
+ {"role": "user", "content": "Write a haiku about code."}
62
+ ], stream=True) as stream:
63
+ for chunk in stream:
64
+ print(chunk.text, end="", flush=True)
65
+
66
+ final = stream.get_final_response()
67
+ print(f"\nTokens: {final.usage.total_tokens}")
68
+ ```
69
+
70
+ ## Async
71
+
72
+ ```python
73
+ from relay_ai import AsyncRelay
74
+
75
+ async with AsyncRelay() as client:
76
+ response = await client.chat("claude-opus-4.8", messages=[
77
+ {"role": "user", "content": "Hello!"}
78
+ ])
79
+ print(response.text)
80
+ ```
81
+
82
+ ## Tool calling
83
+
84
+ ```python
85
+ tools = [{
86
+ "type": "function",
87
+ "function": {
88
+ "name": "get_weather",
89
+ "description": "Get current weather",
90
+ "parameters": {
91
+ "type": "object",
92
+ "properties": {"city": {"type": "string"}},
93
+ "required": ["city"],
94
+ },
95
+ },
96
+ }]
97
+
98
+ response = client.chat("claude-sonnet-4.6", messages=[
99
+ {"role": "user", "content": "What's the weather in Tokyo?"}
100
+ ], tools=tools)
101
+
102
+ for tc in response.tool_calls:
103
+ print(f"{tc.function_name}({tc.function_arguments})")
104
+ ```
105
+
106
+ ## Image generation
107
+
108
+ ```python
109
+ result = client.images("flux-schnell", prompt="A cat astronaut on Mars")
110
+ print(result.images[0])
111
+ ```
112
+
113
+ ## Audio
114
+
115
+ ```python
116
+ # Transcription
117
+ transcript = client.transcribe("whisper-1", "meeting.mp3")
118
+ print(transcript.text)
119
+
120
+ # Text-to-speech
121
+ audio = client.speech("tts-1", "Hello from Relay!")
122
+ with open("output.mp3", "wb") as f:
123
+ f.write(audio.audio)
124
+ ```
125
+
126
+ ## Semantic routing
127
+
128
+ ```python
129
+ decision = client.route(
130
+ messages=[{"role": "user", "content": "Prove the Riemann hypothesis"}],
131
+ candidates=["claude-opus-4.8", "claude-sonnet-4.6", "gemini-3.5-flash"],
132
+ )
133
+ print(f"Best model: {decision.alias} ({decision.confidence:.0%})")
134
+ print(f"Reasoning: {decision.reasoning}")
135
+ ```
136
+
137
+ ## Batch processing
138
+
139
+ ```python
140
+ results = client.batch("claude-sonnet-4.6", [
141
+ {"messages": [{"role": "user", "content": "What is 2+2?"}]},
142
+ {"messages": [{"role": "user", "content": "What is 3+3?"}]},
143
+ {"messages": [{"role": "user", "content": "What is 4+4?"}]},
144
+ ], max_concurrent=5)
145
+
146
+ for r in results:
147
+ if r.response:
148
+ print(f"[{r.index}] {r.response.text}")
149
+ else:
150
+ print(f"[{r.index}] Error: {r.error}")
151
+ ```
152
+
153
+ ## Credits
154
+
155
+ ```python
156
+ state = client.credits()
157
+ print(f"Balance: ${state.balance_cents / 100:.2f}")
158
+ ```
159
+
160
+ ## Error handling
161
+
162
+ ```python
163
+ from relay_ai import (
164
+ RelayError,
165
+ AuthenticationError,
166
+ RateLimitError,
167
+ InsufficientCreditsError,
168
+ ModelNotFoundError,
169
+ )
170
+
171
+ try:
172
+ response = client.chat("gpt-5", messages=[...])
173
+ except AuthenticationError:
174
+ print("Invalid API key")
175
+ except RateLimitError as e:
176
+ print(f"Rate limited. Retry after {e.retry_after}s")
177
+ except InsufficientCreditsError:
178
+ print("Top up your credits at relay.ai5labs.com")
179
+ except ModelNotFoundError:
180
+ print("Model not found")
181
+ except RelayError as e:
182
+ print(f"Error: {e.message}")
183
+ ```
184
+
185
+ ## CLI
186
+
187
+ ```bash
188
+ export RELAY_API_KEY=sk-relay-...
189
+
190
+ relay models # List models
191
+ relay chat claude-sonnet-4.6 "Hello!" # Quick chat
192
+ relay chat gemini-3.5-flash "Hi" --stream # Stream tokens
193
+ relay credits # Check balance
194
+ relay version # SDK version
195
+ ```
196
+
197
+ ## Configuration
198
+
199
+ ```python
200
+ client = Relay(
201
+ api_key="sk-relay-...", # or set RELAY_API_KEY env var
202
+ base_url="https://...", # custom gateway URL
203
+ timeout=120.0, # request timeout (seconds)
204
+ max_retries=2, # automatic retries on 429/5xx
205
+ send_telemetry=True, # usage analytics (metadata only)
206
+ http_client=httpx.Client(), # custom httpx client
207
+ )
208
+ ```
209
+
210
+ ## Telemetry
211
+
212
+ The SDK sends anonymous usage metadata (model, token counts, latency) to improve the service. **No message content, prompts, responses, or tool arguments are ever transmitted.** This is enforced by a client-side allowlist and verified by server-side stripping.
213
+
214
+ Disable with:
215
+
216
+ ```python
217
+ client = Relay(send_telemetry=False)
218
+ ```
219
+
220
+ ## OpenTelemetry
221
+
222
+ ```python
223
+ from relay_ai import Relay
224
+ from relay_ai._otel import instrument, RelaySpanExporter
225
+ from opentelemetry.sdk.trace import TracerProvider
226
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
227
+
228
+ provider = TracerProvider()
229
+ provider.add_span_processor(
230
+ BatchSpanProcessor(
231
+ RelaySpanExporter(api_key="sk-relay-...", base_url="https://api.relay.ai5labs.com/v1")
232
+ )
233
+ )
234
+
235
+ client = instrument(Relay())
236
+ response = client.chat(...) # Automatically creates OTel spans
237
+ ```
238
+
239
+ ## License
240
+
241
+ MIT
@@ -0,0 +1,213 @@
1
+ # Relay AI SDK
2
+
3
+ Official Python SDK for the [Relay AI Gateway](https://relay.ai5labs.com). One key, every model.
4
+
5
+ ```bash
6
+ pip install relay-ai-sdk
7
+ ```
8
+
9
+ With OpenTelemetry:
10
+
11
+ ```bash
12
+ pip install relay-ai-sdk[otel]
13
+ ```
14
+
15
+ ## Quick start
16
+
17
+ ```python
18
+ from relay_ai import Relay
19
+
20
+ client = Relay(api_key="sk-relay-...")
21
+
22
+ response = client.chat("claude-sonnet-4.6", messages=[
23
+ {"role": "user", "content": "Explain quantum computing in one sentence."}
24
+ ])
25
+ print(response.text)
26
+ print(f"Tokens: {response.usage.total_tokens}")
27
+ ```
28
+
29
+ ## Streaming
30
+
31
+ ```python
32
+ with client.chat("gemini-3.5-flash", messages=[
33
+ {"role": "user", "content": "Write a haiku about code."}
34
+ ], stream=True) as stream:
35
+ for chunk in stream:
36
+ print(chunk.text, end="", flush=True)
37
+
38
+ final = stream.get_final_response()
39
+ print(f"\nTokens: {final.usage.total_tokens}")
40
+ ```
41
+
42
+ ## Async
43
+
44
+ ```python
45
+ from relay_ai import AsyncRelay
46
+
47
+ async with AsyncRelay() as client:
48
+ response = await client.chat("claude-opus-4.8", messages=[
49
+ {"role": "user", "content": "Hello!"}
50
+ ])
51
+ print(response.text)
52
+ ```
53
+
54
+ ## Tool calling
55
+
56
+ ```python
57
+ tools = [{
58
+ "type": "function",
59
+ "function": {
60
+ "name": "get_weather",
61
+ "description": "Get current weather",
62
+ "parameters": {
63
+ "type": "object",
64
+ "properties": {"city": {"type": "string"}},
65
+ "required": ["city"],
66
+ },
67
+ },
68
+ }]
69
+
70
+ response = client.chat("claude-sonnet-4.6", messages=[
71
+ {"role": "user", "content": "What's the weather in Tokyo?"}
72
+ ], tools=tools)
73
+
74
+ for tc in response.tool_calls:
75
+ print(f"{tc.function_name}({tc.function_arguments})")
76
+ ```
77
+
78
+ ## Image generation
79
+
80
+ ```python
81
+ result = client.images("flux-schnell", prompt="A cat astronaut on Mars")
82
+ print(result.images[0])
83
+ ```
84
+
85
+ ## Audio
86
+
87
+ ```python
88
+ # Transcription
89
+ transcript = client.transcribe("whisper-1", "meeting.mp3")
90
+ print(transcript.text)
91
+
92
+ # Text-to-speech
93
+ audio = client.speech("tts-1", "Hello from Relay!")
94
+ with open("output.mp3", "wb") as f:
95
+ f.write(audio.audio)
96
+ ```
97
+
98
+ ## Semantic routing
99
+
100
+ ```python
101
+ decision = client.route(
102
+ messages=[{"role": "user", "content": "Prove the Riemann hypothesis"}],
103
+ candidates=["claude-opus-4.8", "claude-sonnet-4.6", "gemini-3.5-flash"],
104
+ )
105
+ print(f"Best model: {decision.alias} ({decision.confidence:.0%})")
106
+ print(f"Reasoning: {decision.reasoning}")
107
+ ```
108
+
109
+ ## Batch processing
110
+
111
+ ```python
112
+ results = client.batch("claude-sonnet-4.6", [
113
+ {"messages": [{"role": "user", "content": "What is 2+2?"}]},
114
+ {"messages": [{"role": "user", "content": "What is 3+3?"}]},
115
+ {"messages": [{"role": "user", "content": "What is 4+4?"}]},
116
+ ], max_concurrent=5)
117
+
118
+ for r in results:
119
+ if r.response:
120
+ print(f"[{r.index}] {r.response.text}")
121
+ else:
122
+ print(f"[{r.index}] Error: {r.error}")
123
+ ```
124
+
125
+ ## Credits
126
+
127
+ ```python
128
+ state = client.credits()
129
+ print(f"Balance: ${state.balance_cents / 100:.2f}")
130
+ ```
131
+
132
+ ## Error handling
133
+
134
+ ```python
135
+ from relay_ai import (
136
+ RelayError,
137
+ AuthenticationError,
138
+ RateLimitError,
139
+ InsufficientCreditsError,
140
+ ModelNotFoundError,
141
+ )
142
+
143
+ try:
144
+ response = client.chat("gpt-5", messages=[...])
145
+ except AuthenticationError:
146
+ print("Invalid API key")
147
+ except RateLimitError as e:
148
+ print(f"Rate limited. Retry after {e.retry_after}s")
149
+ except InsufficientCreditsError:
150
+ print("Top up your credits at relay.ai5labs.com")
151
+ except ModelNotFoundError:
152
+ print("Model not found")
153
+ except RelayError as e:
154
+ print(f"Error: {e.message}")
155
+ ```
156
+
157
+ ## CLI
158
+
159
+ ```bash
160
+ export RELAY_API_KEY=sk-relay-...
161
+
162
+ relay models # List models
163
+ relay chat claude-sonnet-4.6 "Hello!" # Quick chat
164
+ relay chat gemini-3.5-flash "Hi" --stream # Stream tokens
165
+ relay credits # Check balance
166
+ relay version # SDK version
167
+ ```
168
+
169
+ ## Configuration
170
+
171
+ ```python
172
+ client = Relay(
173
+ api_key="sk-relay-...", # or set RELAY_API_KEY env var
174
+ base_url="https://...", # custom gateway URL
175
+ timeout=120.0, # request timeout (seconds)
176
+ max_retries=2, # automatic retries on 429/5xx
177
+ send_telemetry=True, # usage analytics (metadata only)
178
+ http_client=httpx.Client(), # custom httpx client
179
+ )
180
+ ```
181
+
182
+ ## Telemetry
183
+
184
+ The SDK sends anonymous usage metadata (model, token counts, latency) to improve the service. **No message content, prompts, responses, or tool arguments are ever transmitted.** This is enforced by a client-side allowlist and verified by server-side stripping.
185
+
186
+ Disable with:
187
+
188
+ ```python
189
+ client = Relay(send_telemetry=False)
190
+ ```
191
+
192
+ ## OpenTelemetry
193
+
194
+ ```python
195
+ from relay_ai import Relay
196
+ from relay_ai._otel import instrument, RelaySpanExporter
197
+ from opentelemetry.sdk.trace import TracerProvider
198
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
199
+
200
+ provider = TracerProvider()
201
+ provider.add_span_processor(
202
+ BatchSpanProcessor(
203
+ RelaySpanExporter(api_key="sk-relay-...", base_url="https://api.relay.ai5labs.com/v1")
204
+ )
205
+ )
206
+
207
+ client = instrument(Relay())
208
+ response = client.chat(...) # Automatically creates OTel spans
209
+ ```
210
+
211
+ ## License
212
+
213
+ MIT
@@ -0,0 +1,58 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "relay-ai-sdk"
7
+ version = "2.0.0"
8
+ description = "Official Python SDK for the Relay AI Gateway. One key, every model."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [{ name = "ai5labs", email = "relay@ai5labs.com" }]
13
+ keywords = [
14
+ "llm", "ai", "relay", "ai-gateway", "multi-model", "sdk",
15
+ "streaming", "tool-calling", "opentelemetry",
16
+ ]
17
+ classifiers = [
18
+ "Development Status :: 5 - Production/Stable",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
27
+ "Typing :: Typed",
28
+ ]
29
+ dependencies = [
30
+ "httpx>=0.25.0",
31
+ "pydantic>=2.0",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ otel = [
36
+ "opentelemetry-api>=1.27",
37
+ "opentelemetry-sdk>=1.27",
38
+ ]
39
+
40
+ [project.scripts]
41
+ relay = "relay_ai._cli:main"
42
+
43
+ [project.urls]
44
+ Homepage = "https://relay.ai5labs.com"
45
+ Documentation = "https://relay.ai5labs.com/docs/sdk"
46
+ Repository = "https://github.com/ai5labs/relay-sdk"
47
+
48
+ [tool.hatch.build.targets.wheel]
49
+ packages = ["src/relay_ai"]
50
+
51
+ [dependency-groups]
52
+ dev = [
53
+ "pytest>=8.0",
54
+ "pytest-asyncio>=0.23",
55
+ "respx>=0.21",
56
+ "ruff>=0.6",
57
+ "mypy>=1.11",
58
+ ]
@@ -0,0 +1,85 @@
1
+ """Relay AI SDK — one key, every model.
2
+
3
+ Usage::
4
+
5
+ from relay_ai import Relay
6
+
7
+ client = Relay(api_key="sk-relay-...")
8
+ response = client.chat("claude-sonnet-4.6", messages=[
9
+ {"role": "user", "content": "Hello!"}
10
+ ])
11
+ print(response.text)
12
+ """
13
+
14
+ from relay_ai._client import AsyncRelay, Relay
15
+ from relay_ai._errors import (
16
+ APIConnectionError,
17
+ APIStatusError,
18
+ APITimeoutError,
19
+ AuthenticationError,
20
+ BadRequestError,
21
+ ContentPolicyError,
22
+ ContextWindowError,
23
+ InsufficientCreditsError,
24
+ InternalServerError,
25
+ ModelNotFoundError,
26
+ NotFoundError,
27
+ PermissionDeniedError,
28
+ RateLimitError,
29
+ RelayError,
30
+ )
31
+ from relay_ai._streaming import AsyncStream, Stream
32
+ from relay_ai._types import (
33
+ AudioResponse,
34
+ BatchResult,
35
+ ChatResponse,
36
+ CreditState,
37
+ ImageResponse,
38
+ RouteAlternate,
39
+ RouteResponse,
40
+ SpeechResponse,
41
+ StreamChunk,
42
+ ToolCall,
43
+ ToolCallDelta,
44
+ Usage,
45
+ )
46
+ from relay_ai._version import __version__
47
+
48
+ __all__ = [
49
+ # Version
50
+ "__version__",
51
+ # Clients
52
+ "Relay",
53
+ "AsyncRelay",
54
+ # Streaming
55
+ "Stream",
56
+ "AsyncStream",
57
+ # Response types
58
+ "ChatResponse",
59
+ "StreamChunk",
60
+ "ImageResponse",
61
+ "AudioResponse",
62
+ "SpeechResponse",
63
+ "RouteResponse",
64
+ "RouteAlternate",
65
+ "CreditState",
66
+ "BatchResult",
67
+ "Usage",
68
+ "ToolCall",
69
+ "ToolCallDelta",
70
+ # Errors
71
+ "RelayError",
72
+ "APIConnectionError",
73
+ "APITimeoutError",
74
+ "APIStatusError",
75
+ "AuthenticationError",
76
+ "InsufficientCreditsError",
77
+ "PermissionDeniedError",
78
+ "NotFoundError",
79
+ "ModelNotFoundError",
80
+ "RateLimitError",
81
+ "BadRequestError",
82
+ "ContentPolicyError",
83
+ "ContextWindowError",
84
+ "InternalServerError",
85
+ ]
@@ -0,0 +1,62 @@
1
+ """Batch processing — concurrent fan-out through the gateway."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from relay_ai._types import BatchResult
10
+
11
+ if TYPE_CHECKING:
12
+ from relay_ai._client import AsyncRelay, Relay
13
+
14
+
15
+ def batch_sync(
16
+ client: Relay,
17
+ model: str,
18
+ requests: list[dict[str, Any]],
19
+ *,
20
+ max_concurrent: int = 10,
21
+ ) -> list[BatchResult]:
22
+ """Fan out *requests* through ``client.chat()`` using a thread pool."""
23
+ results: list[BatchResult | None] = [None] * len(requests)
24
+
25
+ def _one(idx: int, req: dict[str, Any]) -> BatchResult:
26
+ try:
27
+ resp = client.chat(model, **req)
28
+ return BatchResult(index=idx, response=resp) # type: ignore[arg-type]
29
+ except Exception as exc:
30
+ return BatchResult(index=idx, error=str(exc))
31
+
32
+ workers = min(max_concurrent, len(requests)) or 1
33
+ with ThreadPoolExecutor(max_workers=workers) as pool:
34
+ futures = {pool.submit(_one, i, r): i for i, r in enumerate(requests)}
35
+ for future in as_completed(futures):
36
+ result = future.result()
37
+ results[result.index] = result
38
+
39
+ return [r for r in results if r is not None]
40
+
41
+
42
+ async def batch_async(
43
+ client: AsyncRelay,
44
+ model: str,
45
+ requests: list[dict[str, Any]],
46
+ *,
47
+ max_concurrent: int = 10,
48
+ ) -> list[BatchResult]:
49
+ """Fan out *requests* through ``client.chat()`` using an asyncio semaphore."""
50
+ sem = asyncio.Semaphore(max_concurrent)
51
+
52
+ async def _one(idx: int, req: dict[str, Any]) -> BatchResult:
53
+ async with sem:
54
+ try:
55
+ resp = await client.chat(model, **req)
56
+ return BatchResult(index=idx, response=resp) # type: ignore[arg-type]
57
+ except Exception as exc:
58
+ return BatchResult(index=idx, error=str(exc))
59
+
60
+ tasks = [_one(i, r) for i, r in enumerate(requests)]
61
+ results = await asyncio.gather(*tasks)
62
+ return sorted(results, key=lambda r: r.index)