arbr-client 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arbr_client/__init__.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
"""Official Python client for the AI control-plane gateway.
|
|
2
|
+
|
|
3
|
+
Zero dependencies — Python >= 3.11, stdlib only (urllib for HTTP; async via
|
|
4
|
+
``asyncio.to_thread``). The gateway owns provider keys, routing policy, logging
|
|
5
|
+
and cost attribution; this client is a thin, robust pipe to it:
|
|
6
|
+
|
|
7
|
+
from arbr_client import create_client
|
|
8
|
+
|
|
9
|
+
arbr = create_client(base_url="http://localhost:4100", application="my-app")
|
|
10
|
+
res = arbr.chat("Summarise this ticket: ...") # sync
|
|
11
|
+
res = await arbr.achat("Summarise this ticket: ...") # async
|
|
12
|
+
# res.text, res.model, res.routing_decision ("explicit" | "rule" | "ai" | ...)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import random
|
|
21
|
+
import time
|
|
22
|
+
import urllib.error
|
|
23
|
+
import urllib.request
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from typing import Any, AsyncIterator, Iterator, Optional
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"create_client",
|
|
29
|
+
"Client",
|
|
30
|
+
"ChatResponse",
|
|
31
|
+
"Usage",
|
|
32
|
+
"GatewayError",
|
|
33
|
+
"as_langchain_model",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
_RETRY_BASE_S = 0.25
|
|
37
|
+
_RETRY_CAP_S = 4.0
|
|
38
|
+
_STREAM_CHUNK_CHARS = 24
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ── errors ────────────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GatewayError(Exception):
|
|
45
|
+
"""Typed gateway error.
|
|
46
|
+
|
|
47
|
+
code: "invalid_input" | "bad_request" | "demo_mode" | "provider_error"
|
|
48
|
+
| "http_error" | "network" | "timeout"
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
message: str,
|
|
54
|
+
*,
|
|
55
|
+
status: int = 0,
|
|
56
|
+
code: str = "http_error",
|
|
57
|
+
request_id: Optional[str] = None,
|
|
58
|
+
retryable: bool = False,
|
|
59
|
+
) -> None:
|
|
60
|
+
super().__init__(message)
|
|
61
|
+
self.status = status
|
|
62
|
+
self.code = code
|
|
63
|
+
self.request_id = request_id
|
|
64
|
+
self.retryable = retryable
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _invalid(message: str) -> GatewayError:
|
|
68
|
+
return GatewayError(message, code="invalid_input", status=0, retryable=False)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ── response shapes ───────────────────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass(frozen=True)
|
|
75
|
+
class Usage:
|
|
76
|
+
input_tokens: int = 0
|
|
77
|
+
output_tokens: int = 0
|
|
78
|
+
total_tokens: int = 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass(frozen=True)
|
|
82
|
+
class ChatResponse:
|
|
83
|
+
"""The gateway's routed completion.
|
|
84
|
+
|
|
85
|
+
model is what actually served the call; model_requested is what you asked
|
|
86
|
+
for ("auto" when you deferred); routing_decision says why
|
|
87
|
+
("explicit" | "passthrough" | "rule" | "auto" | "ai" | "cache" | "fallback");
|
|
88
|
+
classified_by says how the task type was determined ("provided" | "keyword" | "ai").
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
text: str
|
|
92
|
+
model: str
|
|
93
|
+
model_requested: str
|
|
94
|
+
provider: str
|
|
95
|
+
routing_decision: str
|
|
96
|
+
classified_by: str
|
|
97
|
+
cache_hit: bool
|
|
98
|
+
request_id: str
|
|
99
|
+
usage: Optional[Usage] = None
|
|
100
|
+
raw: dict = field(repr=False, default_factory=dict)
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def _from_dict(d: dict) -> "ChatResponse":
|
|
104
|
+
u = d.get("usage") or None
|
|
105
|
+
usage = (
|
|
106
|
+
Usage(
|
|
107
|
+
input_tokens=int(u.get("inputTokens") or 0),
|
|
108
|
+
output_tokens=int(u.get("outputTokens") or 0),
|
|
109
|
+
total_tokens=int(u.get("totalTokens") or 0),
|
|
110
|
+
)
|
|
111
|
+
if isinstance(u, dict)
|
|
112
|
+
else None
|
|
113
|
+
)
|
|
114
|
+
return ChatResponse(
|
|
115
|
+
text=d.get("text") or "",
|
|
116
|
+
model=d.get("model") or "",
|
|
117
|
+
model_requested=d.get("modelRequested") or "",
|
|
118
|
+
provider=d.get("provider") or "",
|
|
119
|
+
routing_decision=d.get("routingDecision") or "",
|
|
120
|
+
classified_by=d.get("classifiedBy") or "",
|
|
121
|
+
cache_hit=bool(d.get("cacheHit")),
|
|
122
|
+
request_id=d.get("requestId") or "",
|
|
123
|
+
usage=usage,
|
|
124
|
+
raw=d,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ── message normalization ─────────────────────────────────────────────────────
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _content_to_str(content: Any) -> str:
|
|
132
|
+
if isinstance(content, str):
|
|
133
|
+
return content
|
|
134
|
+
if isinstance(content, (list, tuple)):
|
|
135
|
+
parts = []
|
|
136
|
+
for c in content:
|
|
137
|
+
if isinstance(c, str):
|
|
138
|
+
parts.append(c)
|
|
139
|
+
elif isinstance(c, dict) and c.get("text"):
|
|
140
|
+
parts.append(str(c["text"]))
|
|
141
|
+
elif getattr(c, "text", None):
|
|
142
|
+
parts.append(str(c.text))
|
|
143
|
+
return "".join(parts)
|
|
144
|
+
return "" if content is None else str(content)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _normalize_messages(messages: Any) -> list[dict]:
|
|
148
|
+
"""Accepts a bare string, ``{"role","content"}`` dicts, or duck-typed
|
|
149
|
+
LangChain messages (objects with ``.type`` and ``.content``)."""
|
|
150
|
+
if isinstance(messages, str):
|
|
151
|
+
return [{"role": "user", "content": messages}]
|
|
152
|
+
if not isinstance(messages, (list, tuple)):
|
|
153
|
+
messages = [messages]
|
|
154
|
+
if len(messages) == 0:
|
|
155
|
+
raise _invalid("`messages` must not be empty")
|
|
156
|
+
out: list[dict] = []
|
|
157
|
+
for i, m in enumerate(messages):
|
|
158
|
+
if m is None:
|
|
159
|
+
raise _invalid(f"message at index {i} is None")
|
|
160
|
+
if isinstance(m, str):
|
|
161
|
+
out.append({"role": "user", "content": m})
|
|
162
|
+
elif isinstance(m, dict):
|
|
163
|
+
out.append({"role": m.get("role") or "user", "content": _content_to_str(m.get("content"))})
|
|
164
|
+
elif hasattr(m, "type") and hasattr(m, "content"):
|
|
165
|
+
t = str(getattr(m, "type"))
|
|
166
|
+
role = "system" if t == "system" else "assistant" if t == "ai" else "user"
|
|
167
|
+
out.append({"role": role, "content": _content_to_str(m.content)})
|
|
168
|
+
else:
|
|
169
|
+
raise _invalid(f"message at index {i} is not a str, dict, or LangChain-style message")
|
|
170
|
+
return out
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ── HTTP plumbing (stdlib) ────────────────────────────────────────────────────
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _http_once(
|
|
177
|
+
url: str, *, method: str, body: Optional[dict], timeout_s: float, headers: Optional[dict] = None
|
|
178
|
+
) -> tuple[int, Optional[dict]]:
|
|
179
|
+
data = json.dumps(body).encode("utf-8") if body is not None else None
|
|
180
|
+
req = urllib.request.Request(
|
|
181
|
+
url,
|
|
182
|
+
data=data,
|
|
183
|
+
method=method,
|
|
184
|
+
headers={"Content-Type": "application/json", "Accept": "application/json", **(headers or {})},
|
|
185
|
+
)
|
|
186
|
+
try:
|
|
187
|
+
with urllib.request.urlopen(req, timeout=timeout_s) as res:
|
|
188
|
+
payload = res.read()
|
|
189
|
+
try:
|
|
190
|
+
return res.status, json.loads(payload) if payload else None
|
|
191
|
+
except (json.JSONDecodeError, ValueError):
|
|
192
|
+
return res.status, None
|
|
193
|
+
except urllib.error.HTTPError as err: # non-2xx WITH a response
|
|
194
|
+
payload = err.read()
|
|
195
|
+
try:
|
|
196
|
+
parsed = json.loads(payload) if payload else None
|
|
197
|
+
except (json.JSONDecodeError, ValueError):
|
|
198
|
+
parsed = None
|
|
199
|
+
return err.code, parsed
|
|
200
|
+
except (TimeoutError, urllib.error.URLError, OSError) as err:
|
|
201
|
+
reason = getattr(err, "reason", err)
|
|
202
|
+
if isinstance(err, TimeoutError) or isinstance(reason, TimeoutError) or "timed out" in str(reason).lower():
|
|
203
|
+
raise GatewayError(
|
|
204
|
+
f"request timed out after {timeout_s}s", code="timeout", retryable=True
|
|
205
|
+
) from err
|
|
206
|
+
raise GatewayError(f"network error: {reason}", code="network", retryable=True) from err
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _error_from_response(status: int, body: Optional[dict]) -> GatewayError:
|
|
210
|
+
message = (body or {}).get("message") or (body or {}).get("error") or f"gateway responded {status}"
|
|
211
|
+
code = "http_error"
|
|
212
|
+
err_field = (body or {}).get("error")
|
|
213
|
+
if err_field == "demo_mode":
|
|
214
|
+
code = "demo_mode"
|
|
215
|
+
elif err_field == "provider_error":
|
|
216
|
+
code = "provider_error"
|
|
217
|
+
elif err_field == "invalid_api_key":
|
|
218
|
+
code = "invalid_api_key"
|
|
219
|
+
elif err_field == "budget_exceeded":
|
|
220
|
+
code = "budget_exceeded"
|
|
221
|
+
elif err_field == "rate_limited":
|
|
222
|
+
code = "rate_limited"
|
|
223
|
+
elif status == 400:
|
|
224
|
+
code = "bad_request"
|
|
225
|
+
# budget_exceeded is a 429, but retrying won't help until the window rolls past.
|
|
226
|
+
retryable = code != "budget_exceeded" and (status == 429 or status >= 500)
|
|
227
|
+
return GatewayError(
|
|
228
|
+
str(message),
|
|
229
|
+
status=status,
|
|
230
|
+
code=code,
|
|
231
|
+
request_id=(body or {}).get("requestId"),
|
|
232
|
+
retryable=retryable,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _request_with_retries(
|
|
237
|
+
url: str, *, method: str, body: Optional[dict], timeout_s: float, retries: int,
|
|
238
|
+
headers: Optional[dict] = None,
|
|
239
|
+
) -> dict:
|
|
240
|
+
last_err: Optional[GatewayError] = None
|
|
241
|
+
for attempt in range(retries + 1):
|
|
242
|
+
if attempt > 0:
|
|
243
|
+
exp = min(_RETRY_CAP_S, _RETRY_BASE_S * (2 ** (attempt - 1)))
|
|
244
|
+
time.sleep(exp / 2 + random.random() * (exp / 2))
|
|
245
|
+
try:
|
|
246
|
+
status, parsed = _http_once(url, method=method, body=body, timeout_s=timeout_s, headers=headers)
|
|
247
|
+
except GatewayError as err: # network / timeout
|
|
248
|
+
last_err = err
|
|
249
|
+
if err.retryable and attempt < retries:
|
|
250
|
+
continue
|
|
251
|
+
raise
|
|
252
|
+
if 200 <= status < 300:
|
|
253
|
+
return parsed or {}
|
|
254
|
+
gerr = _error_from_response(status, parsed)
|
|
255
|
+
last_err = gerr
|
|
256
|
+
if gerr.retryable and attempt < retries:
|
|
257
|
+
continue
|
|
258
|
+
raise gerr
|
|
259
|
+
raise last_err if last_err else GatewayError("request failed") # pragma: no cover
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# ── the client ────────────────────────────────────────────────────────────────
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class Client:
|
|
266
|
+
"""Gateway client. Create via :func:`create_client`."""
|
|
267
|
+
|
|
268
|
+
def __init__(
|
|
269
|
+
self,
|
|
270
|
+
*,
|
|
271
|
+
base_url: Optional[str] = None,
|
|
272
|
+
application: Optional[str] = None,
|
|
273
|
+
workflow: Optional[str] = None,
|
|
274
|
+
department: Optional[str] = None,
|
|
275
|
+
user_id: Optional[str] = None,
|
|
276
|
+
api_key: Optional[str] = None,
|
|
277
|
+
timeout_s: float = 60.0,
|
|
278
|
+
retries: int = 2,
|
|
279
|
+
) -> None:
|
|
280
|
+
url = (base_url or os.environ.get("ARBR_GATEWAY_URL") or "").rstrip("/")
|
|
281
|
+
if not url:
|
|
282
|
+
raise _invalid("`base_url` is required (or set ARBR_GATEWAY_URL)")
|
|
283
|
+
self.base_url = url
|
|
284
|
+
self._defaults = {
|
|
285
|
+
"application": application,
|
|
286
|
+
"workflow": workflow,
|
|
287
|
+
"department": department,
|
|
288
|
+
"userId": user_id,
|
|
289
|
+
}
|
|
290
|
+
# Gateway API key ("ka_…", Settings → API keys). Binds attribution server-side.
|
|
291
|
+
key = api_key or os.environ.get("ARBR_API_KEY")
|
|
292
|
+
self._headers = {"Authorization": f"Bearer {key}"} if key else {}
|
|
293
|
+
self._timeout_s = timeout_s
|
|
294
|
+
self._retries = max(0, retries)
|
|
295
|
+
|
|
296
|
+
# — chat —
|
|
297
|
+
|
|
298
|
+
def chat(
|
|
299
|
+
self,
|
|
300
|
+
messages: Any,
|
|
301
|
+
*,
|
|
302
|
+
model: Optional[str] = None,
|
|
303
|
+
provider: Optional[str] = None,
|
|
304
|
+
task_type: Optional[str] = None,
|
|
305
|
+
temperature: Optional[float] = None,
|
|
306
|
+
max_tokens: Optional[int] = None,
|
|
307
|
+
application: Optional[str] = None,
|
|
308
|
+
workflow: Optional[str] = None,
|
|
309
|
+
department: Optional[str] = None,
|
|
310
|
+
user_id: Optional[str] = None,
|
|
311
|
+
timeout_s: Optional[float] = None,
|
|
312
|
+
retries: Optional[int] = None,
|
|
313
|
+
) -> ChatResponse:
|
|
314
|
+
"""One routed completion. ``model=None`` or ``"auto"`` → the gateway's
|
|
315
|
+
router decides (rules → automated routing → default); an explicit model
|
|
316
|
+
whose provider is connected is honored as-is."""
|
|
317
|
+
if messages is None:
|
|
318
|
+
raise _invalid("`messages` is required")
|
|
319
|
+
body: dict[str, Any] = {k: v for k, v in self._defaults.items() if v is not None}
|
|
320
|
+
overrides = {
|
|
321
|
+
"application": application,
|
|
322
|
+
"workflow": workflow,
|
|
323
|
+
"department": department,
|
|
324
|
+
"userId": user_id,
|
|
325
|
+
"model": model,
|
|
326
|
+
"provider": provider,
|
|
327
|
+
"taskType": task_type,
|
|
328
|
+
"temperature": temperature,
|
|
329
|
+
"maxTokens": max_tokens,
|
|
330
|
+
}
|
|
331
|
+
body.update({k: v for k, v in overrides.items() if v is not None})
|
|
332
|
+
body["messages"] = _normalize_messages(messages)
|
|
333
|
+
raw = _request_with_retries(
|
|
334
|
+
f"{self.base_url}/v1/chat",
|
|
335
|
+
method="POST",
|
|
336
|
+
body=body,
|
|
337
|
+
timeout_s=timeout_s if timeout_s is not None else self._timeout_s,
|
|
338
|
+
retries=retries if retries is not None else self._retries,
|
|
339
|
+
headers=self._headers,
|
|
340
|
+
)
|
|
341
|
+
return ChatResponse._from_dict(raw)
|
|
342
|
+
|
|
343
|
+
async def achat(self, messages: Any, **kwargs: Any) -> ChatResponse:
|
|
344
|
+
"""Async :meth:`chat` (runs the blocking call in a worker thread)."""
|
|
345
|
+
return await asyncio.to_thread(self.chat, messages, **kwargs)
|
|
346
|
+
|
|
347
|
+
# — stream (honest shim) —
|
|
348
|
+
|
|
349
|
+
def stream(self, messages: Any, **kwargs: Any) -> Iterator[str]:
|
|
350
|
+
"""Yield the answer in small text chunks.
|
|
351
|
+
|
|
352
|
+
NOTE: the gateway is non-streaming today — this makes ONE buffered
|
|
353
|
+
:meth:`chat` call and chunks the text out (near-streaming UX, not
|
|
354
|
+
token-by-token). Use :meth:`chat` when you need the full metadata."""
|
|
355
|
+
res = self.chat(messages, **kwargs)
|
|
356
|
+
text = res.text
|
|
357
|
+
for i in range(0, len(text), _STREAM_CHUNK_CHARS):
|
|
358
|
+
yield text[i : i + _STREAM_CHUNK_CHARS]
|
|
359
|
+
|
|
360
|
+
async def astream(self, messages: Any, **kwargs: Any) -> AsyncIterator[str]:
|
|
361
|
+
"""Async :meth:`stream` (same buffered-shim caveat)."""
|
|
362
|
+
res = await self.achat(messages, **kwargs)
|
|
363
|
+
text = res.text
|
|
364
|
+
for i in range(0, len(text), _STREAM_CHUNK_CHARS):
|
|
365
|
+
yield text[i : i + _STREAM_CHUNK_CHARS]
|
|
366
|
+
|
|
367
|
+
# — status —
|
|
368
|
+
|
|
369
|
+
def status(self) -> dict:
|
|
370
|
+
"""Gateway healthcheck — GET /api/status."""
|
|
371
|
+
return _request_with_retries(
|
|
372
|
+
f"{self.base_url}/api/status",
|
|
373
|
+
method="GET",
|
|
374
|
+
body=None,
|
|
375
|
+
timeout_s=self._timeout_s,
|
|
376
|
+
retries=self._retries,
|
|
377
|
+
headers=self._headers,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
async def astatus(self) -> dict:
|
|
381
|
+
return await asyncio.to_thread(self.status)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def create_client(
|
|
385
|
+
base_url: Optional[str] = None,
|
|
386
|
+
*,
|
|
387
|
+
application: Optional[str] = None,
|
|
388
|
+
workflow: Optional[str] = None,
|
|
389
|
+
department: Optional[str] = None,
|
|
390
|
+
user_id: Optional[str] = None,
|
|
391
|
+
api_key: Optional[str] = None,
|
|
392
|
+
timeout_s: float = 60.0,
|
|
393
|
+
retries: int = 2,
|
|
394
|
+
) -> Client:
|
|
395
|
+
"""Create a gateway client. ``base_url`` falls back to $ARBR_GATEWAY_URL,
|
|
396
|
+
``api_key`` to $ARBR_API_KEY."""
|
|
397
|
+
return Client(
|
|
398
|
+
base_url=base_url,
|
|
399
|
+
application=application,
|
|
400
|
+
workflow=workflow,
|
|
401
|
+
department=department,
|
|
402
|
+
user_id=user_id,
|
|
403
|
+
api_key=api_key,
|
|
404
|
+
timeout_s=timeout_s,
|
|
405
|
+
retries=retries,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
# ── LangChain-style adapter (duck-typed; no LangChain dependency) ─────────────
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
class _AiMessageShape:
|
|
413
|
+
"""AIMessage-shaped result: .content, .usage_metadata, .response_metadata,
|
|
414
|
+
.type == "ai". Attribute access only — not a real LangChain object."""
|
|
415
|
+
|
|
416
|
+
def __init__(self, res: ChatResponse) -> None:
|
|
417
|
+
u = res.usage or Usage()
|
|
418
|
+
self.content = res.text
|
|
419
|
+
self.usage_metadata = {
|
|
420
|
+
"input_tokens": u.input_tokens,
|
|
421
|
+
"output_tokens": u.output_tokens,
|
|
422
|
+
"total_tokens": u.total_tokens,
|
|
423
|
+
}
|
|
424
|
+
self.response_metadata = {
|
|
425
|
+
"model": res.model,
|
|
426
|
+
"provider": res.provider,
|
|
427
|
+
"routingDecision": res.routing_decision,
|
|
428
|
+
"classifiedBy": res.classified_by,
|
|
429
|
+
"modelRequested": res.model_requested,
|
|
430
|
+
"requestId": res.request_id,
|
|
431
|
+
"gateway": True,
|
|
432
|
+
}
|
|
433
|
+
self.additional_kwargs: dict = {}
|
|
434
|
+
self.type = "ai"
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def _coerce_lc_input(value: Any) -> Any:
|
|
438
|
+
# A LangChain PromptValue (from `prompt | model` chains) → messages.
|
|
439
|
+
if hasattr(value, "to_messages"):
|
|
440
|
+
return value.to_messages()
|
|
441
|
+
return value
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
class _LangChainishModel:
|
|
445
|
+
"""Minimal LangChain-style chat model backed by the gateway (duck-typed).
|
|
446
|
+
|
|
447
|
+
Supports .invoke() / .ainvoke() and is itself callable, so simple
|
|
448
|
+
`prompt | model` chains coerce it via RunnableLambda. For FULL Runnable
|
|
449
|
+
compatibility (callbacks, batch, with_structured_output), wrap the client
|
|
450
|
+
in a real BaseChatModel subclass in your app instead."""
|
|
451
|
+
|
|
452
|
+
def __init__(self, client: Client, meta: dict) -> None:
|
|
453
|
+
self._client = client
|
|
454
|
+
self._meta = meta
|
|
455
|
+
|
|
456
|
+
def invoke(self, messages: Any, _config: Any = None, **_: Any) -> _AiMessageShape:
|
|
457
|
+
res = self._client.chat(_coerce_lc_input(messages), **self._meta)
|
|
458
|
+
return _AiMessageShape(res)
|
|
459
|
+
|
|
460
|
+
async def ainvoke(self, messages: Any, _config: Any = None, **_: Any) -> _AiMessageShape:
|
|
461
|
+
res = await self._client.achat(_coerce_lc_input(messages), **self._meta)
|
|
462
|
+
return _AiMessageShape(res)
|
|
463
|
+
|
|
464
|
+
# Callable → coercible to RunnableLambda in `prompt | model` chains.
|
|
465
|
+
def __call__(self, messages: Any, **_: Any) -> _AiMessageShape:
|
|
466
|
+
return self.invoke(messages)
|
|
467
|
+
|
|
468
|
+
def stream(self, messages: Any, **_: Any) -> Iterator[_AiMessageShape]:
|
|
469
|
+
res = self._client.chat(_coerce_lc_input(messages), **self._meta)
|
|
470
|
+
text = res.text
|
|
471
|
+
for i in range(0, len(text), _STREAM_CHUNK_CHARS):
|
|
472
|
+
chunk = _AiMessageShape(res)
|
|
473
|
+
chunk.content = text[i : i + _STREAM_CHUNK_CHARS]
|
|
474
|
+
yield chunk
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def as_langchain_model(client: Client, **meta: Any) -> _LangChainishModel:
|
|
478
|
+
"""Wrap a client as a minimal LangChain-style chat model (no LangChain
|
|
479
|
+
dependency). ``meta`` (workflow, task_type, model, temperature,
|
|
480
|
+
max_tokens, ...) is merged into every call."""
|
|
481
|
+
return _LangChainishModel(client, meta)
|
arbr_client/langchain.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Optional LangChain integration: the gateway as a real ``BaseChatModel``.
|
|
2
|
+
|
|
3
|
+
Requires ``langchain-core`` (install via ``pip install arbr-client[langchain]``).
|
|
4
|
+
The core ``arbr_client`` package stays zero-dependency; this module imports
|
|
5
|
+
LangChain lazily and fails with a clear message if it's missing.
|
|
6
|
+
|
|
7
|
+
from arbr_client import create_client
|
|
8
|
+
from arbr_client.langchain import ArbrChatModel
|
|
9
|
+
|
|
10
|
+
client = create_client("http://localhost:4100", application="my-app")
|
|
11
|
+
llm = ArbrChatModel(client=client, model_name="auto") # or a pinned model id
|
|
12
|
+
# Full Runnable compatibility: prompt | llm, .ainvoke(), batching, callbacks.
|
|
13
|
+
|
|
14
|
+
For apps that should NOT take a langchain-core dependency, use the zero-dep
|
|
15
|
+
duck-typed adapter instead: ``arbr_client.as_langchain_model(client, ...)``.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import Any, List, Optional
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from langchain_core.callbacks import (
|
|
24
|
+
AsyncCallbackManagerForLLMRun,
|
|
25
|
+
CallbackManagerForLLMRun,
|
|
26
|
+
)
|
|
27
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
|
28
|
+
from langchain_core.messages import AIMessage, BaseMessage
|
|
29
|
+
from langchain_core.outputs import ChatGeneration, ChatResult
|
|
30
|
+
except ImportError as _err: # pragma: no cover
|
|
31
|
+
raise ImportError(
|
|
32
|
+
"arbr_client.langchain requires langchain-core — "
|
|
33
|
+
"install it with: pip install arbr-client[langchain]"
|
|
34
|
+
) from _err
|
|
35
|
+
|
|
36
|
+
__all__ = ["ArbrChatModel", "KaryaChatModel"]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ArbrChatModel(BaseChatModel):
|
|
40
|
+
"""A LangChain chat model that routes completions through the gateway.
|
|
41
|
+
|
|
42
|
+
``model_name`` follows the gateway's semantics: an explicit model id is
|
|
43
|
+
honored as-is when its provider is connected; ``"auto"`` (or ``None``)
|
|
44
|
+
lets the gateway's router decide (rules → automated routing → default).
|
|
45
|
+
|
|
46
|
+
Out of gateway scope (keep on direct provider SDKs): tool calling /
|
|
47
|
+
``with_structured_output``, embeddings, token-level streaming.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
client: Any # arbr_client.Client
|
|
51
|
+
model_name: Optional[str] = None
|
|
52
|
+
temperature: Optional[float] = None
|
|
53
|
+
max_tokens: Optional[int] = None
|
|
54
|
+
workflow: Optional[str] = None
|
|
55
|
+
task_type: Optional[str] = None
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def _llm_type(self) -> str:
|
|
59
|
+
return "arbr-gateway"
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def _identifying_params(self) -> dict:
|
|
63
|
+
return {"model_name": self.model_name, "gateway": self.client.base_url}
|
|
64
|
+
|
|
65
|
+
def _call_kwargs(self) -> dict:
|
|
66
|
+
return {
|
|
67
|
+
"model": self.model_name,
|
|
68
|
+
"temperature": self.temperature,
|
|
69
|
+
"max_tokens": self.max_tokens,
|
|
70
|
+
"workflow": self.workflow,
|
|
71
|
+
"task_type": self.task_type,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def _to_result(self, res: Any) -> ChatResult:
|
|
75
|
+
usage = None
|
|
76
|
+
if res.usage is not None:
|
|
77
|
+
usage = {
|
|
78
|
+
"input_tokens": res.usage.input_tokens,
|
|
79
|
+
"output_tokens": res.usage.output_tokens,
|
|
80
|
+
"total_tokens": res.usage.total_tokens,
|
|
81
|
+
}
|
|
82
|
+
message = AIMessage(
|
|
83
|
+
content=res.text,
|
|
84
|
+
usage_metadata=usage,
|
|
85
|
+
response_metadata={
|
|
86
|
+
"model": res.model,
|
|
87
|
+
"provider": res.provider,
|
|
88
|
+
"routingDecision": res.routing_decision,
|
|
89
|
+
"classifiedBy": res.classified_by,
|
|
90
|
+
"modelRequested": res.model_requested,
|
|
91
|
+
"requestId": res.request_id,
|
|
92
|
+
"gateway": True,
|
|
93
|
+
},
|
|
94
|
+
)
|
|
95
|
+
return ChatResult(generations=[ChatGeneration(message=message)])
|
|
96
|
+
|
|
97
|
+
def _generate(
|
|
98
|
+
self,
|
|
99
|
+
messages: List[BaseMessage],
|
|
100
|
+
stop: Optional[List[str]] = None,
|
|
101
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
102
|
+
**kwargs: Any,
|
|
103
|
+
) -> ChatResult:
|
|
104
|
+
res = self.client.chat(messages, **self._call_kwargs())
|
|
105
|
+
return self._to_result(res)
|
|
106
|
+
|
|
107
|
+
async def _agenerate(
|
|
108
|
+
self,
|
|
109
|
+
messages: List[BaseMessage],
|
|
110
|
+
stop: Optional[List[str]] = None,
|
|
111
|
+
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
112
|
+
**kwargs: Any,
|
|
113
|
+
) -> ChatResult:
|
|
114
|
+
res = await self.client.achat(messages, **self._call_kwargs())
|
|
115
|
+
return self._to_result(res)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# Backward-compatibility alias (Karya → Arbr rename).
|
|
119
|
+
KaryaChatModel = ArbrChatModel
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arbr-client
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Official Python client for the Arbr AI control-plane gateway — one function to route, observe, and govern every LLM call.
|
|
5
|
+
Author: Gyde
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: llm,ai,gateway,routing,control-plane,openai,anthropic,gemini,bedrock,cost
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Provides-Extra: langchain
|
|
19
|
+
Requires-Dist: langchain-core>=0.2; extra == "langchain"
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# arbr-client (Python)
|
|
25
|
+
|
|
26
|
+
Official Python client for the **Arbr AI control plane** — one function to route, observe,
|
|
27
|
+
and govern every LLM call your app makes.
|
|
28
|
+
|
|
29
|
+
Your app calls the gateway instead of provider SDKs. The gateway holds the provider keys,
|
|
30
|
+
honors the model you pin (or picks one when you say `"auto"`), applies human-approved routing
|
|
31
|
+
rules and cost policies, and logs every call with full cost attribution — visible in the dashboard.
|
|
32
|
+
|
|
33
|
+
- **Zero dependencies** — Python ≥ 3.11, stdlib only. Sync *and* async (`achat`/`astream`).
|
|
34
|
+
- **One function for the 90% case** — `chat()`.
|
|
35
|
+
- **Robust by default** — per-attempt timeouts, retries with exponential backoff + jitter on
|
|
36
|
+
network errors / 429 / 5xx, typed errors.
|
|
37
|
+
- **Optional LangChain integration** — a real `BaseChatModel` via `arbr-client[langchain]`.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```sh
|
|
42
|
+
pip install arbr-client # core (zero deps)
|
|
43
|
+
pip install "arbr-client[langchain]" # + the LangChain BaseChatModel adapter
|
|
44
|
+
# (pre-release: pip install /path/to/arbr_client-0.1.0-py3-none-any.whl)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## 60-second quickstart
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from arbr_client import create_client
|
|
51
|
+
|
|
52
|
+
arbr = create_client(
|
|
53
|
+
"http://localhost:4100", # or set ARBR_GATEWAY_URL
|
|
54
|
+
application="my-app", # attribution — shows up in the dashboard
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
res = arbr.chat("Summarise this support ticket: ...", model="auto", max_tokens=300)
|
|
58
|
+
print(res.text)
|
|
59
|
+
print(res.model, res.routing_decision) # e.g. "gpt-4o-mini", "ai"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Async (FastAPI, LangGraph, etc.):
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
res = await arbr.achat("Summarise this ticket: ...", model="auto")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
That's a complete integration. No provider keys in your app, and every call is logged,
|
|
69
|
+
costed, and governable from the dashboard.
|
|
70
|
+
|
|
71
|
+
## How model choice works
|
|
72
|
+
|
|
73
|
+
| You send | What happens |
|
|
74
|
+
|---|---|
|
|
75
|
+
| `model="gpt-4o"` (provider connected) | Honored **as-is** — all routing policies skipped. `routing_decision == "explicit"` |
|
|
76
|
+
| `model="auto"` or omitted | The gateway decides: cache → operator rules → automated routing (cost guardrail or AI policy) → default model |
|
|
77
|
+
| a model whose provider isn't connected | Falls back to the router (same as `"auto"`) |
|
|
78
|
+
|
|
79
|
+
`res.model_requested` shows what you asked for, `res.model` what served it, `res.routing_decision`
|
|
80
|
+
why (`explicit / rule / auto / ai / cache / fallback / passthrough`), and `res.classified_by` how
|
|
81
|
+
the task type was determined (`provided / keyword / ai`).
|
|
82
|
+
|
|
83
|
+
## API
|
|
84
|
+
|
|
85
|
+
### `create_client(base_url=None, *, application=None, workflow=None, department=None, user_id=None, api_key=None, timeout_s=60, retries=2) → Client`
|
|
86
|
+
|
|
87
|
+
`base_url` falls back to `$ARBR_GATEWAY_URL`; `api_key` to `$ARBR_API_KEY`. A gateway API key
|
|
88
|
+
(`ab_…`, dashboard → Settings → API keys) is sent as `Authorization: Bearer` and binds attribution
|
|
89
|
+
server-side — required once the gateway has *Require API keys* on. The metadata kwargs are defaults
|
|
90
|
+
merged into every call (per-call kwargs override them).
|
|
91
|
+
|
|
92
|
+
### `Client.chat(messages, *, model=None, provider=None, task_type=None, temperature=None, max_tokens=None, ...) → ChatResponse`
|
|
93
|
+
|
|
94
|
+
`messages` accepts a bare string, `{"role", "content"}` dicts, or LangChain message objects.
|
|
95
|
+
`ChatResponse` is a frozen dataclass: `text`, `usage` (`input_tokens/output_tokens/total_tokens`),
|
|
96
|
+
`model`, `model_requested`, `provider`, `routing_decision`, `classified_by`, `cache_hit`,
|
|
97
|
+
`request_id`, plus `.raw` (the unmodified gateway payload).
|
|
98
|
+
|
|
99
|
+
### `Client.achat(...)` / `Client.astream(...)` / `Client.astatus()`
|
|
100
|
+
|
|
101
|
+
Async counterparts (the blocking call runs in a worker thread via `asyncio.to_thread`).
|
|
102
|
+
|
|
103
|
+
### Streaming
|
|
104
|
+
|
|
105
|
+
The gateway supports two streaming modes:
|
|
106
|
+
|
|
107
|
+
**Real SSE (token-by-token)** — use the OpenAI-compatible endpoint at `POST /v1/chat/completions`
|
|
108
|
+
with `stream=True`. Works with the OpenAI Python SDK, any chat UI, or a raw `httpx`/`requests` call:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from openai import OpenAI
|
|
112
|
+
|
|
113
|
+
client = OpenAI(api_key="ab_…", base_url="http://localhost:4100")
|
|
114
|
+
stream = client.chat.completions.create(
|
|
115
|
+
model="gpt-4o-mini",
|
|
116
|
+
messages=[{"role": "user", "content": "Tell me a joke"}],
|
|
117
|
+
stream=True,
|
|
118
|
+
)
|
|
119
|
+
for chunk in stream:
|
|
120
|
+
print(chunk.choices[0].delta.content or "", end="", flush=True)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
**`Client.stream(messages, ...) → Iterator[str]`** — makes one buffered `chat()` call and yields
|
|
124
|
+
the text in small chunks. Useful when you want full routing metadata (`res.model`,
|
|
125
|
+
`res.routing_decision`, etc.) alongside a streaming-style emit:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
for chunk in arbr.stream("Explain quantum entanglement simply"):
|
|
129
|
+
print(chunk, end="", flush=True)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Use the OpenAI-compat endpoint when you need real token-by-token delivery or are integrating with
|
|
133
|
+
chat UIs. Use `stream()` when you want the routing metadata the OpenAI endpoint doesn't expose.
|
|
134
|
+
|
|
135
|
+
### `Client.status() → dict`
|
|
136
|
+
|
|
137
|
+
Healthcheck against `GET /api/status` — `demoMode`, `liveProviders`, `defaultProvider`,
|
|
138
|
+
`defaultModel`, `routingMode`, `breachedCaps`.
|
|
139
|
+
When the gateway has admin auth enabled (`ARBR_ADMIN_KEY` set server-side), this endpoint
|
|
140
|
+
requires a credential — your gateway `api_key` is accepted, so set it and `status()` keeps working.
|
|
141
|
+
|
|
142
|
+
## Error handling
|
|
143
|
+
|
|
144
|
+
All failures raise `GatewayError` with `.status`, `.code`, `.retryable`, `.request_id`:
|
|
145
|
+
|
|
146
|
+
| `code` | Meaning | Retried automatically? |
|
|
147
|
+
|---|---|---|
|
|
148
|
+
| `invalid_input` | Bad arguments (caught before any network call) | no |
|
|
149
|
+
| `bad_request` | Gateway rejected the request (HTTP 400) | no |
|
|
150
|
+
| `demo_mode` | Gateway has no provider keys configured (HTTP 503) | no |
|
|
151
|
+
| `provider_error` | All providers failed for this call (HTTP 502) | yes (5xx) |
|
|
152
|
+
| `http_error` | Other non-2xx | 429/5xx only |
|
|
153
|
+
| `invalid_api_key` | Missing/unknown/revoked gateway API key (HTTP 401) | no |
|
|
154
|
+
| `budget_exceeded` | A budget cap with action *Block* is breached for your scope (HTTP 429) | no — retrying won't help until the window rolls past |
|
|
155
|
+
| `rate_limited` | Your API key is over its requests/minute limit (HTTP 429) | yes |
|
|
156
|
+
| `network` | Connection failed | yes |
|
|
157
|
+
| `timeout` | Per-attempt timeout elapsed | yes |
|
|
158
|
+
|
|
159
|
+
## LangChain integration
|
|
160
|
+
|
|
161
|
+
Two options, by how deep your LangChain usage goes:
|
|
162
|
+
|
|
163
|
+
**1. Full `BaseChatModel` (recommended for LangChain/LangGraph apps)** — requires the extra:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from arbr_client import create_client
|
|
167
|
+
from arbr_client.langchain import ArbrChatModel
|
|
168
|
+
|
|
169
|
+
client = create_client("http://localhost:4100", application="my-app")
|
|
170
|
+
llm = ArbrChatModel(client=client, model_name="auto", max_tokens=1024)
|
|
171
|
+
|
|
172
|
+
chain = my_prompt | llm # full Runnable compatibility:
|
|
173
|
+
await chain.ainvoke({...}) # pipes, async, batching, callbacks
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**2. Zero-dep duck-typed adapter** — when you don't want a langchain-core dependency:
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from arbr_client import as_langchain_model
|
|
180
|
+
llm = as_langchain_model(client, workflow="answer-drafting")
|
|
181
|
+
msg = llm.invoke(messages) # .invoke()/.ainvoke(); AIMessage-shaped result
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Out of gateway scope either way: tool calling / `with_structured_output`, embeddings, and
|
|
185
|
+
token-level streaming — keep those on direct provider SDKs.
|
|
186
|
+
|
|
187
|
+
## Gradual rollout pattern
|
|
188
|
+
|
|
189
|
+
Gate the swap at your app's LLM factory so nothing else changes:
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
def get_llm():
|
|
193
|
+
if os.environ.get("ARBR_GATEWAY_URL"):
|
|
194
|
+
return ArbrChatModel(client=_arbr_client(), model_name=settings.llm_model)
|
|
195
|
+
return build_direct_provider_model() # unchanged path
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Unset `ARBR_GATEWAY_URL` to revert instantly.
|
|
199
|
+
|
|
200
|
+
## License
|
|
201
|
+
|
|
202
|
+
MIT
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
arbr_client/__init__.py,sha256=xt9PY_j7ZqGAXic4hcTDx5ckVVk-p_11-85FNkcd45Q,17786
|
|
2
|
+
arbr_client/langchain.py,sha256=KxcoAH8pbVj4kOKGiWDYgWUje7A9kfC_474IMGga7bI,4262
|
|
3
|
+
arbr_client-0.1.0.dist-info/licenses/LICENSE,sha256=JldKhrRTw7NX0Ez2qqITMbeIJ1bOJJlDmUK-U0ZW94M,1061
|
|
4
|
+
arbr_client-0.1.0.dist-info/METADATA,sha256=B5I308M42eIxFfHUWbdO8fIOwpMBfIeDI9EL4lLG_jc,8278
|
|
5
|
+
arbr_client-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
arbr_client-0.1.0.dist-info/top_level.txt,sha256=fyVHSgqGkJcdvCsgqfFKUiuUz6qqida3lD3jqXLwAXk,12
|
|
7
|
+
arbr_client-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Gyde
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
arbr_client
|