arbr-client 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arbr_client-0.2.0/LICENSE +21 -0
- arbr_client-0.2.0/PKG-INFO +249 -0
- arbr_client-0.2.0/README.md +226 -0
- arbr_client-0.2.0/pyproject.toml +39 -0
- arbr_client-0.2.0/setup.cfg +4 -0
- arbr_client-0.2.0/src/arbr_client/__init__.py +537 -0
- arbr_client-0.2.0/src/arbr_client/langchain.py +119 -0
- arbr_client-0.2.0/src/arbr_client.egg-info/PKG-INFO +249 -0
- arbr_client-0.2.0/src/arbr_client.egg-info/SOURCES.txt +12 -0
- arbr_client-0.2.0/src/arbr_client.egg-info/dependency_links.txt +1 -0
- arbr_client-0.2.0/src/arbr_client.egg-info/requires.txt +6 -0
- arbr_client-0.2.0/src/arbr_client.egg-info/top_level.txt +1 -0
- arbr_client-0.2.0/tests/test_client.py +306 -0
- arbr_client-0.2.0/tests/test_langchain.py +74 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Gyde
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arbr-client
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Official Python client for the Arbr AI control-plane gateway — one function to route, observe, and govern every LLM call.
|
|
5
|
+
Author: Gyde
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: llm,ai,gateway,routing,control-plane,openai,anthropic,gemini,bedrock,cost
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Provides-Extra: langchain
|
|
19
|
+
Requires-Dist: langchain-core>=0.2; extra == "langchain"
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# arbr-client (Python)
|
|
25
|
+
|
|
26
|
+
Official Python client for the **Arbr AI control plane** — one function to route, observe,
|
|
27
|
+
and govern every LLM call your app makes.
|
|
28
|
+
|
|
29
|
+
Your app calls the gateway instead of provider SDKs. The gateway holds the provider keys,
|
|
30
|
+
honors the model you pin (or picks one when you say `"auto"`), applies human-approved routing
|
|
31
|
+
rules and cost policies, and logs every call with full cost attribution — visible in the dashboard.
|
|
32
|
+
|
|
33
|
+
- **Zero dependencies** — Python ≥ 3.11, stdlib only. Sync *and* async (`achat`/`astream`).
|
|
34
|
+
- **One function for the 90% case** — `chat()`.
|
|
35
|
+
- **Robust by default** — per-attempt timeouts, retries with exponential backoff + jitter on
|
|
36
|
+
network errors / 429 / 5xx, typed errors.
|
|
37
|
+
- **Optional LangChain integration** — a real `BaseChatModel` via `arbr-client[langchain]`.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```sh
|
|
42
|
+
pip install arbr-client # core (zero deps)
|
|
43
|
+
pip install "arbr-client[langchain]" # + the LangChain BaseChatModel adapter
|
|
44
|
+
# (pre-release: pip install /path/to/arbr_client-0.1.0-py3-none-any.whl)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## 60-second quickstart
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from arbr_client import create_client
|
|
51
|
+
|
|
52
|
+
arbr = create_client(
|
|
53
|
+
"http://localhost:4100", # or set ARBR_GATEWAY_URL
|
|
54
|
+
application="my-app", # attribution — shows up in the dashboard
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
res = arbr.chat("Summarise this support ticket: ...", model="auto", max_tokens=300)
|
|
58
|
+
print(res.text)
|
|
59
|
+
print(res.model, res.routing_decision) # e.g. "gpt-4o-mini", "ai"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Async (FastAPI, LangGraph, etc.):
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
res = await arbr.achat("Summarise this ticket: ...", model="auto")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
That's a complete integration. No provider keys in your app, and every call is logged,
|
|
69
|
+
costed, and governable from the dashboard.
|
|
70
|
+
|
|
71
|
+
## How model choice works
|
|
72
|
+
|
|
73
|
+
| You send | What happens |
|
|
74
|
+
|---|---|
|
|
75
|
+
| `model="gpt-4o"` (provider connected) | Honored **as-is** — all routing policies skipped. `routing_decision == "explicit"` |
|
|
76
|
+
| `model="auto"` or omitted | The gateway decides: cache → operator rules → automated routing (cost guardrail or AI policy) → default model |
|
|
77
|
+
| a model whose provider isn't connected | Falls back to the router (same as `"auto"`) |
|
|
78
|
+
|
|
79
|
+
`res.model_requested` shows what you asked for, `res.model` what served it, `res.routing_decision`
|
|
80
|
+
why (`explicit / rule / auto / ai / cache / fallback / passthrough`), and `res.classified_by` how
|
|
81
|
+
the task type was determined (`provided / keyword / ai`).
|
|
82
|
+
|
|
83
|
+
## API
|
|
84
|
+
|
|
85
|
+
### `create_client(base_url=None, *, application=None, workflow=None, department=None, user_id=None, api_key=None, timeout_s=60, retries=2) → Client`
|
|
86
|
+
|
|
87
|
+
`base_url` falls back to `$ARBR_GATEWAY_URL`; `api_key` to `$ARBR_API_KEY`. A gateway API key
|
|
88
|
+
(`ab_…`, dashboard → Settings → API keys) is sent as `Authorization: Bearer` and binds attribution
|
|
89
|
+
server-side — required once the gateway has *Require API keys* on. The metadata kwargs are defaults
|
|
90
|
+
merged into every call (per-call kwargs override them).
|
|
91
|
+
|
|
92
|
+
### `Client.chat(messages, *, model=None, provider=None, task_type=None, temperature=None, max_tokens=None, ...) → ChatResponse`
|
|
93
|
+
|
|
94
|
+
`messages` accepts a bare string, `{"role", "content"}` dicts, or LangChain message objects.
|
|
95
|
+
`ChatResponse` is a frozen dataclass: `text`, `usage` (`input_tokens/output_tokens/total_tokens`),
|
|
96
|
+
`model`, `model_requested`, `provider`, `routing_decision`, `classified_by`, `cache_hit`,
|
|
97
|
+
`request_id`, plus `.raw` (the unmodified gateway payload).
|
|
98
|
+
|
|
99
|
+
### `Client.achat(...)` / `Client.astream(...)` / `Client.astatus()`
|
|
100
|
+
|
|
101
|
+
Async counterparts (the blocking call runs in a worker thread via `asyncio.to_thread`).
|
|
102
|
+
|
|
103
|
+
### Streaming
|
|
104
|
+
|
|
105
|
+
The gateway supports two streaming modes:
|
|
106
|
+
|
|
107
|
+
**Real SSE (token-by-token)** — use the OpenAI-compatible endpoint at `POST /v1/chat/completions`
|
|
108
|
+
with `stream=True`. Works with the OpenAI Python SDK, any chat UI, or a raw `httpx`/`requests` call:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from openai import OpenAI
|
|
112
|
+
|
|
113
|
+
client = OpenAI(api_key="ab_…", base_url="http://localhost:4100")
|
|
114
|
+
stream = client.chat.completions.create(
|
|
115
|
+
model="gpt-4o-mini",
|
|
116
|
+
messages=[{"role": "user", "content": "Tell me a joke"}],
|
|
117
|
+
stream=True,
|
|
118
|
+
)
|
|
119
|
+
for chunk in stream:
|
|
120
|
+
print(chunk.choices[0].delta.content or "", end="", flush=True)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
**`Client.stream(messages, ...) → Iterator[str]`** — makes one buffered `chat()` call and yields
|
|
124
|
+
the text in small chunks. Useful when you want full routing metadata (`res.model`,
|
|
125
|
+
`res.routing_decision`, etc.) alongside a streaming-style emit:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
for chunk in arbr.stream("Explain quantum entanglement simply"):
|
|
129
|
+
print(chunk, end="", flush=True)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Use the OpenAI-compat endpoint when you need real token-by-token delivery or are integrating with
|
|
133
|
+
chat UIs. Use `stream()` when you want the routing metadata the OpenAI endpoint doesn't expose.
|
|
134
|
+
|
|
135
|
+
### `Client.status() → dict`
|
|
136
|
+
|
|
137
|
+
Healthcheck against `GET /api/status` — `demoMode`, `liveProviders`, `defaultProvider`,
|
|
138
|
+
`defaultModel`, `routingMode`, `breachedCaps`.
|
|
139
|
+
When the gateway has admin auth enabled (`ARBR_ADMIN_KEY` set server-side), this endpoint
|
|
140
|
+
requires a credential — your gateway `api_key` is accepted, so set it and `status()` keeps working.
|
|
141
|
+
|
|
142
|
+
### `Client.models() → dict`
|
|
143
|
+
|
|
144
|
+
List every model available on this Arbr instance — `GET /v1/models`.
|
|
145
|
+
Uses the same gateway API key as chat calls (no admin key needed).
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
result = arbr.models()
|
|
149
|
+
|
|
150
|
+
# Filter and sort by tier / cost
|
|
151
|
+
cheap = sorted(
|
|
152
|
+
[m for m in result["data"] if m["tier"] == "light"],
|
|
153
|
+
key=lambda m: m["inputPer1M"],
|
|
154
|
+
)
|
|
155
|
+
print(cheap[0]["id"], cheap[0]["provider"]) # e.g. "us.amazon.nova-micro-v1:0", "bedrock-nova"
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Response shape is OpenAI-compatible (`{"object": "list", "data": [...]}`) with Arbr extensions:
|
|
159
|
+
|
|
160
|
+
| Field | Type | Description |
|
|
161
|
+
|---|---|---|
|
|
162
|
+
| `id` | str | Model ID — pass as `model=` in chat calls |
|
|
163
|
+
| `provider` | str | Underlying provider (`"openai"`, `"bedrock-nova"`, `"anthropic"`, …) |
|
|
164
|
+
| `label` | str | Human-readable name |
|
|
165
|
+
| `tier` | str | `"light"` / `"mid"` / `"premium"` |
|
|
166
|
+
| `inputPer1M` | float | USD per 1M input tokens |
|
|
167
|
+
| `outputPer1M` | float | USD per 1M output tokens |
|
|
168
|
+
|
|
169
|
+
Async counterpart: `await arbr.amodels()`.
|
|
170
|
+
|
|
171
|
+
### `Client.providers() → dict`
|
|
172
|
+
|
|
173
|
+
List configured live providers — `GET /v1/providers`.
|
|
174
|
+
Returns `{"object": "list", "data": [{"id": ..., "models": [...]}]}`. No credentials exposed.
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
result = arbr.providers()
|
|
178
|
+
|
|
179
|
+
for p in result["data"]:
|
|
180
|
+
print(p["id"], "→", len(p["models"]), "models")
|
|
181
|
+
|
|
182
|
+
# openai → 2 models
|
|
183
|
+
# bedrock-nova → 11 models
|
|
184
|
+
# anthropic → 3 models
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
Async counterpart: `await arbr.aproviders()`.
|
|
188
|
+
|
|
189
|
+
## Error handling
|
|
190
|
+
|
|
191
|
+
All failures raise `GatewayError` with `.status`, `.code`, `.retryable`, `.request_id`:
|
|
192
|
+
|
|
193
|
+
| `code` | Meaning | Retried automatically? |
|
|
194
|
+
|---|---|---|
|
|
195
|
+
| `invalid_input` | Bad arguments (caught before any network call) | no |
|
|
196
|
+
| `bad_request` | Gateway rejected the request (HTTP 400) | no |
|
|
197
|
+
| `demo_mode` | Gateway has no provider keys configured (HTTP 503) | no |
|
|
198
|
+
| `provider_error` | All providers failed for this call (HTTP 502) | yes (5xx) |
|
|
199
|
+
| `http_error` | Other non-2xx | 429/5xx only |
|
|
200
|
+
| `invalid_api_key` | Missing/unknown/revoked gateway API key (HTTP 401) | no |
|
|
201
|
+
| `budget_exceeded` | A budget cap with action *Block* is breached for your scope (HTTP 429) | no — retrying won't help until the window rolls past |
|
|
202
|
+
| `rate_limited` | Your API key is over its requests/minute limit (HTTP 429) | yes |
|
|
203
|
+
| `network` | Connection failed | yes |
|
|
204
|
+
| `timeout` | Per-attempt timeout elapsed | yes |
|
|
205
|
+
|
|
206
|
+
## LangChain integration
|
|
207
|
+
|
|
208
|
+
Two options, by how deep your LangChain usage goes:
|
|
209
|
+
|
|
210
|
+
**1. Full `BaseChatModel` (recommended for LangChain/LangGraph apps)** — requires the extra:
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from arbr_client import create_client
|
|
214
|
+
from arbr_client.langchain import ArbrChatModel
|
|
215
|
+
|
|
216
|
+
client = create_client("http://localhost:4100", application="my-app")
|
|
217
|
+
llm = ArbrChatModel(client=client, model_name="auto", max_tokens=1024)
|
|
218
|
+
|
|
219
|
+
chain = my_prompt | llm # full Runnable compatibility:
|
|
220
|
+
await chain.ainvoke({...}) # pipes, async, batching, callbacks
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**2. Zero-dep duck-typed adapter** — when you don't want a langchain-core dependency:
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
from arbr_client import as_langchain_model
|
|
227
|
+
llm = as_langchain_model(client, workflow="answer-drafting")
|
|
228
|
+
msg = llm.invoke(messages) # .invoke()/.ainvoke(); AIMessage-shaped result
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Out of gateway scope either way: tool calling / `with_structured_output`, embeddings, and
|
|
232
|
+
token-level streaming — keep those on direct provider SDKs.
|
|
233
|
+
|
|
234
|
+
## Gradual rollout pattern
|
|
235
|
+
|
|
236
|
+
Gate the swap at your app's LLM factory so nothing else changes:
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
def get_llm():
|
|
240
|
+
if os.environ.get("ARBR_GATEWAY_URL"):
|
|
241
|
+
return ArbrChatModel(client=_arbr_client(), model_name=settings.llm_model)
|
|
242
|
+
return build_direct_provider_model() # unchanged path
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
Unset `ARBR_GATEWAY_URL` to revert instantly.
|
|
246
|
+
|
|
247
|
+
## License
|
|
248
|
+
|
|
249
|
+
MIT
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# arbr-client (Python)
|
|
2
|
+
|
|
3
|
+
Official Python client for the **Arbr AI control plane** — one function to route, observe,
|
|
4
|
+
and govern every LLM call your app makes.
|
|
5
|
+
|
|
6
|
+
Your app calls the gateway instead of provider SDKs. The gateway holds the provider keys,
|
|
7
|
+
honors the model you pin (or picks one when you say `"auto"`), applies human-approved routing
|
|
8
|
+
rules and cost policies, and logs every call with full cost attribution — visible in the dashboard.
|
|
9
|
+
|
|
10
|
+
- **Zero dependencies** — Python ≥ 3.11, stdlib only. Sync *and* async (`achat`/`astream`).
|
|
11
|
+
- **One function for the 90% case** — `chat()`.
|
|
12
|
+
- **Robust by default** — per-attempt timeouts, retries with exponential backoff + jitter on
|
|
13
|
+
network errors / 429 / 5xx, typed errors.
|
|
14
|
+
- **Optional LangChain integration** — a real `BaseChatModel` via `arbr-client[langchain]`.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```sh
|
|
19
|
+
pip install arbr-client # core (zero deps)
|
|
20
|
+
pip install "arbr-client[langchain]" # + the LangChain BaseChatModel adapter
|
|
21
|
+
# (pre-release: pip install /path/to/arbr_client-0.1.0-py3-none-any.whl)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## 60-second quickstart
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from arbr_client import create_client
|
|
28
|
+
|
|
29
|
+
arbr = create_client(
|
|
30
|
+
"http://localhost:4100", # or set ARBR_GATEWAY_URL
|
|
31
|
+
application="my-app", # attribution — shows up in the dashboard
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
res = arbr.chat("Summarise this support ticket: ...", model="auto", max_tokens=300)
|
|
35
|
+
print(res.text)
|
|
36
|
+
print(res.model, res.routing_decision) # e.g. "gpt-4o-mini", "ai"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Async (FastAPI, LangGraph, etc.):
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
res = await arbr.achat("Summarise this ticket: ...", model="auto")
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
That's a complete integration. No provider keys in your app, and every call is logged,
|
|
46
|
+
costed, and governable from the dashboard.
|
|
47
|
+
|
|
48
|
+
## How model choice works
|
|
49
|
+
|
|
50
|
+
| You send | What happens |
|
|
51
|
+
|---|---|
|
|
52
|
+
| `model="gpt-4o"` (provider connected) | Honored **as-is** — all routing policies skipped. `routing_decision == "explicit"` |
|
|
53
|
+
| `model="auto"` or omitted | The gateway decides: cache → operator rules → automated routing (cost guardrail or AI policy) → default model |
|
|
54
|
+
| a model whose provider isn't connected | Falls back to the router (same as `"auto"`) |
|
|
55
|
+
|
|
56
|
+
`res.model_requested` shows what you asked for, `res.model` what served it, `res.routing_decision`
|
|
57
|
+
why (`explicit / rule / auto / ai / cache / fallback / passthrough`), and `res.classified_by` how
|
|
58
|
+
the task type was determined (`provided / keyword / ai`).
|
|
59
|
+
|
|
60
|
+
## API
|
|
61
|
+
|
|
62
|
+
### `create_client(base_url=None, *, application=None, workflow=None, department=None, user_id=None, api_key=None, timeout_s=60, retries=2) → Client`
|
|
63
|
+
|
|
64
|
+
`base_url` falls back to `$ARBR_GATEWAY_URL`; `api_key` to `$ARBR_API_KEY`. A gateway API key
|
|
65
|
+
(`ab_…`, dashboard → Settings → API keys) is sent as `Authorization: Bearer` and binds attribution
|
|
66
|
+
server-side — required once the gateway has *Require API keys* on. The metadata kwargs are defaults
|
|
67
|
+
merged into every call (per-call kwargs override them).
|
|
68
|
+
|
|
69
|
+
### `Client.chat(messages, *, model=None, provider=None, task_type=None, temperature=None, max_tokens=None, ...) → ChatResponse`
|
|
70
|
+
|
|
71
|
+
`messages` accepts a bare string, `{"role", "content"}` dicts, or LangChain message objects.
|
|
72
|
+
`ChatResponse` is a frozen dataclass: `text`, `usage` (`input_tokens/output_tokens/total_tokens`),
|
|
73
|
+
`model`, `model_requested`, `provider`, `routing_decision`, `classified_by`, `cache_hit`,
|
|
74
|
+
`request_id`, plus `.raw` (the unmodified gateway payload).
|
|
75
|
+
|
|
76
|
+
### `Client.achat(...)` / `Client.astream(...)` / `Client.astatus()`
|
|
77
|
+
|
|
78
|
+
Async counterparts (the blocking call runs in a worker thread via `asyncio.to_thread`).
|
|
79
|
+
|
|
80
|
+
### Streaming
|
|
81
|
+
|
|
82
|
+
The gateway supports two streaming modes:
|
|
83
|
+
|
|
84
|
+
**Real SSE (token-by-token)** — use the OpenAI-compatible endpoint at `POST /v1/chat/completions`
|
|
85
|
+
with `stream=True`. Works with the OpenAI Python SDK, any chat UI, or a raw `httpx`/`requests` call:
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from openai import OpenAI
|
|
89
|
+
|
|
90
|
+
client = OpenAI(api_key="ab_…", base_url="http://localhost:4100")
|
|
91
|
+
stream = client.chat.completions.create(
|
|
92
|
+
model="gpt-4o-mini",
|
|
93
|
+
messages=[{"role": "user", "content": "Tell me a joke"}],
|
|
94
|
+
stream=True,
|
|
95
|
+
)
|
|
96
|
+
for chunk in stream:
|
|
97
|
+
print(chunk.choices[0].delta.content or "", end="", flush=True)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**`Client.stream(messages, ...) → Iterator[str]`** — makes one buffered `chat()` call and yields
|
|
101
|
+
the text in small chunks. Useful when you want full routing metadata (`res.model`,
|
|
102
|
+
`res.routing_decision`, etc.) alongside a streaming-style emit:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
for chunk in arbr.stream("Explain quantum entanglement simply"):
|
|
106
|
+
print(chunk, end="", flush=True)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Use the OpenAI-compat endpoint when you need real token-by-token delivery or are integrating with
|
|
110
|
+
chat UIs. Use `stream()` when you want the routing metadata the OpenAI endpoint doesn't expose.
|
|
111
|
+
|
|
112
|
+
### `Client.status() → dict`
|
|
113
|
+
|
|
114
|
+
Healthcheck against `GET /api/status` — `demoMode`, `liveProviders`, `defaultProvider`,
|
|
115
|
+
`defaultModel`, `routingMode`, `breachedCaps`.
|
|
116
|
+
When the gateway has admin auth enabled (`ARBR_ADMIN_KEY` set server-side), this endpoint
|
|
117
|
+
requires a credential — your gateway `api_key` is accepted, so set it and `status()` keeps working.
|
|
118
|
+
|
|
119
|
+
### `Client.models() → dict`
|
|
120
|
+
|
|
121
|
+
List every model available on this Arbr instance — `GET /v1/models`.
|
|
122
|
+
Uses the same gateway API key as chat calls (no admin key needed).
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
result = arbr.models()
|
|
126
|
+
|
|
127
|
+
# Filter and sort by tier / cost
|
|
128
|
+
cheap = sorted(
|
|
129
|
+
[m for m in result["data"] if m["tier"] == "light"],
|
|
130
|
+
key=lambda m: m["inputPer1M"],
|
|
131
|
+
)
|
|
132
|
+
print(cheap[0]["id"], cheap[0]["provider"]) # e.g. "us.amazon.nova-micro-v1:0", "bedrock-nova"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Response shape is OpenAI-compatible (`{"object": "list", "data": [...]}`) with Arbr extensions:
|
|
136
|
+
|
|
137
|
+
| Field | Type | Description |
|
|
138
|
+
|---|---|---|
|
|
139
|
+
| `id` | str | Model ID — pass as `model=` in chat calls |
|
|
140
|
+
| `provider` | str | Underlying provider (`"openai"`, `"bedrock-nova"`, `"anthropic"`, …) |
|
|
141
|
+
| `label` | str | Human-readable name |
|
|
142
|
+
| `tier` | str | `"light"` / `"mid"` / `"premium"` |
|
|
143
|
+
| `inputPer1M` | float | USD per 1M input tokens |
|
|
144
|
+
| `outputPer1M` | float | USD per 1M output tokens |
|
|
145
|
+
|
|
146
|
+
Async counterpart: `await arbr.amodels()`.
|
|
147
|
+
|
|
148
|
+
### `Client.providers() → dict`
|
|
149
|
+
|
|
150
|
+
List configured live providers — `GET /v1/providers`.
|
|
151
|
+
Returns `{"object": "list", "data": [{"id": ..., "models": [...]}]}`. No credentials exposed.
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
result = arbr.providers()
|
|
155
|
+
|
|
156
|
+
for p in result["data"]:
|
|
157
|
+
print(p["id"], "→", len(p["models"]), "models")
|
|
158
|
+
|
|
159
|
+
# openai → 2 models
|
|
160
|
+
# bedrock-nova → 11 models
|
|
161
|
+
# anthropic → 3 models
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Async counterpart: `await arbr.aproviders()`.
|
|
165
|
+
|
|
166
|
+
## Error handling
|
|
167
|
+
|
|
168
|
+
All failures raise `GatewayError` with `.status`, `.code`, `.retryable`, `.request_id`:
|
|
169
|
+
|
|
170
|
+
| `code` | Meaning | Retried automatically? |
|
|
171
|
+
|---|---|---|
|
|
172
|
+
| `invalid_input` | Bad arguments (caught before any network call) | no |
|
|
173
|
+
| `bad_request` | Gateway rejected the request (HTTP 400) | no |
|
|
174
|
+
| `demo_mode` | Gateway has no provider keys configured (HTTP 503) | no |
|
|
175
|
+
| `provider_error` | All providers failed for this call (HTTP 502) | yes (5xx) |
|
|
176
|
+
| `http_error` | Other non-2xx | 429/5xx only |
|
|
177
|
+
| `invalid_api_key` | Missing/unknown/revoked gateway API key (HTTP 401) | no |
|
|
178
|
+
| `budget_exceeded` | A budget cap with action *Block* is breached for your scope (HTTP 429) | no — retrying won't help until the window rolls past |
|
|
179
|
+
| `rate_limited` | Your API key is over its requests/minute limit (HTTP 429) | yes |
|
|
180
|
+
| `network` | Connection failed | yes |
|
|
181
|
+
| `timeout` | Per-attempt timeout elapsed | yes |
|
|
182
|
+
|
|
183
|
+
## LangChain integration
|
|
184
|
+
|
|
185
|
+
Two options, by how deep your LangChain usage goes:
|
|
186
|
+
|
|
187
|
+
**1. Full `BaseChatModel` (recommended for LangChain/LangGraph apps)** — requires the extra:
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from arbr_client import create_client
|
|
191
|
+
from arbr_client.langchain import ArbrChatModel
|
|
192
|
+
|
|
193
|
+
client = create_client("http://localhost:4100", application="my-app")
|
|
194
|
+
llm = ArbrChatModel(client=client, model_name="auto", max_tokens=1024)
|
|
195
|
+
|
|
196
|
+
chain = my_prompt | llm # full Runnable compatibility:
|
|
197
|
+
await chain.ainvoke({...}) # pipes, async, batching, callbacks
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
**2. Zero-dep duck-typed adapter** — when you don't want a langchain-core dependency:
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
from arbr_client import as_langchain_model
|
|
204
|
+
llm = as_langchain_model(client, workflow="answer-drafting")
|
|
205
|
+
msg = llm.invoke(messages) # .invoke()/.ainvoke(); AIMessage-shaped result
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Out of gateway scope either way: tool calling / `with_structured_output`, embeddings, and
|
|
209
|
+
token-level streaming — keep those on direct provider SDKs.
|
|
210
|
+
|
|
211
|
+
## Gradual rollout pattern
|
|
212
|
+
|
|
213
|
+
Gate the swap at your app's LLM factory so nothing else changes:
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
def get_llm():
|
|
217
|
+
if os.environ.get("ARBR_GATEWAY_URL"):
|
|
218
|
+
return ArbrChatModel(client=_arbr_client(), model_name=settings.llm_model)
|
|
219
|
+
return build_direct_provider_model() # unchanged path
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Unset `ARBR_GATEWAY_URL` to revert instantly.
|
|
223
|
+
|
|
224
|
+
## License
|
|
225
|
+
|
|
226
|
+
MIT
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "arbr-client"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "Official Python client for the Arbr AI control-plane gateway — one function to route, observe, and govern every LLM call."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Gyde" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"llm", "ai", "gateway", "routing", "control-plane",
|
|
15
|
+
"openai", "anthropic", "gemini", "bedrock", "cost",
|
|
16
|
+
]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 4 - Beta",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Programming Language :: Python :: 3.13",
|
|
25
|
+
]
|
|
26
|
+
# Zero runtime dependencies — stdlib only (urllib + asyncio).
|
|
27
|
+
dependencies = []
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
# Optional LangChain integration (arbr_client.langchain.ArbrChatModel).
|
|
31
|
+
langchain = ["langchain-core>=0.2"]
|
|
32
|
+
dev = ["pytest>=8.0.0"]
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.packages.find]
|
|
35
|
+
where = ["src"]
|
|
36
|
+
|
|
37
|
+
[tool.pytest.ini_options]
|
|
38
|
+
testpaths = ["tests"]
|
|
39
|
+
python_files = ["test_*.py"]
|