arbr-client 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ """Official Python client for the AI control-plane gateway.
2
+
3
+ Zero dependencies — Python >= 3.11, stdlib only (urllib for HTTP; async via
4
+ ``asyncio.to_thread``). The gateway owns provider keys, routing policy, logging
5
+ and cost attribution; this client is a thin, robust pipe to it:
6
+
7
+ from arbr_client import create_client
8
+
9
+ arbr = create_client(base_url="http://localhost:4100", application="my-app")
10
+ res = arbr.chat("Summarise this ticket: ...") # sync
11
+ res = await arbr.achat("Summarise this ticket: ...") # async
12
+ # res.text, res.model, res.routing_decision ("explicit" | "rule" | "ai" | ...)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import json
19
+ import os
20
+ import random
21
+ import time
22
+ import urllib.error
23
+ import urllib.request
24
+ from dataclasses import dataclass, field
25
+ from typing import Any, AsyncIterator, Iterator, Optional
26
+
27
+ __all__ = [
28
+ "create_client",
29
+ "Client",
30
+ "ChatResponse",
31
+ "Usage",
32
+ "GatewayError",
33
+ "as_langchain_model",
34
+ ]
35
+
36
+ _RETRY_BASE_S = 0.25
37
+ _RETRY_CAP_S = 4.0
38
+ _STREAM_CHUNK_CHARS = 24
39
+
40
+
41
+ # ── errors ────────────────────────────────────────────────────────────────────
42
+
43
+
44
+ class GatewayError(Exception):
45
+ """Typed gateway error.
46
+
47
+ code: "invalid_input" | "bad_request" | "demo_mode" | "provider_error"
48
+ | "http_error" | "network" | "timeout"
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ message: str,
54
+ *,
55
+ status: int = 0,
56
+ code: str = "http_error",
57
+ request_id: Optional[str] = None,
58
+ retryable: bool = False,
59
+ ) -> None:
60
+ super().__init__(message)
61
+ self.status = status
62
+ self.code = code
63
+ self.request_id = request_id
64
+ self.retryable = retryable
65
+
66
+
67
+ def _invalid(message: str) -> GatewayError:
68
+ return GatewayError(message, code="invalid_input", status=0, retryable=False)
69
+
70
+
71
+ # ── response shapes ───────────────────────────────────────────────────────────
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class Usage:
76
+ input_tokens: int = 0
77
+ output_tokens: int = 0
78
+ total_tokens: int = 0
79
+
80
+
81
+ @dataclass(frozen=True)
82
+ class ChatResponse:
83
+ """The gateway's routed completion.
84
+
85
+ model is what actually served the call; model_requested is what you asked
86
+ for ("auto" when you deferred); routing_decision says why
87
+ ("explicit" | "passthrough" | "rule" | "auto" | "ai" | "cache" | "fallback");
88
+ classified_by says how the task type was determined ("provided" | "keyword" | "ai").
89
+ """
90
+
91
+ text: str
92
+ model: str
93
+ model_requested: str
94
+ provider: str
95
+ routing_decision: str
96
+ classified_by: str
97
+ cache_hit: bool
98
+ request_id: str
99
+ usage: Optional[Usage] = None
100
+ raw: dict = field(repr=False, default_factory=dict)
101
+
102
+ @staticmethod
103
+ def _from_dict(d: dict) -> "ChatResponse":
104
+ u = d.get("usage") or None
105
+ usage = (
106
+ Usage(
107
+ input_tokens=int(u.get("inputTokens") or 0),
108
+ output_tokens=int(u.get("outputTokens") or 0),
109
+ total_tokens=int(u.get("totalTokens") or 0),
110
+ )
111
+ if isinstance(u, dict)
112
+ else None
113
+ )
114
+ return ChatResponse(
115
+ text=d.get("text") or "",
116
+ model=d.get("model") or "",
117
+ model_requested=d.get("modelRequested") or "",
118
+ provider=d.get("provider") or "",
119
+ routing_decision=d.get("routingDecision") or "",
120
+ classified_by=d.get("classifiedBy") or "",
121
+ cache_hit=bool(d.get("cacheHit")),
122
+ request_id=d.get("requestId") or "",
123
+ usage=usage,
124
+ raw=d,
125
+ )
126
+
127
+
128
+ # ── message normalization ─────────────────────────────────────────────────────
129
+
130
+
131
+ def _content_to_str(content: Any) -> str:
132
+ if isinstance(content, str):
133
+ return content
134
+ if isinstance(content, (list, tuple)):
135
+ parts = []
136
+ for c in content:
137
+ if isinstance(c, str):
138
+ parts.append(c)
139
+ elif isinstance(c, dict) and c.get("text"):
140
+ parts.append(str(c["text"]))
141
+ elif getattr(c, "text", None):
142
+ parts.append(str(c.text))
143
+ return "".join(parts)
144
+ return "" if content is None else str(content)
145
+
146
+
147
+ def _normalize_messages(messages: Any) -> list[dict]:
148
+ """Accepts a bare string, ``{"role","content"}`` dicts, or duck-typed
149
+ LangChain messages (objects with ``.type`` and ``.content``)."""
150
+ if isinstance(messages, str):
151
+ return [{"role": "user", "content": messages}]
152
+ if not isinstance(messages, (list, tuple)):
153
+ messages = [messages]
154
+ if len(messages) == 0:
155
+ raise _invalid("`messages` must not be empty")
156
+ out: list[dict] = []
157
+ for i, m in enumerate(messages):
158
+ if m is None:
159
+ raise _invalid(f"message at index {i} is None")
160
+ if isinstance(m, str):
161
+ out.append({"role": "user", "content": m})
162
+ elif isinstance(m, dict):
163
+ out.append({"role": m.get("role") or "user", "content": _content_to_str(m.get("content"))})
164
+ elif hasattr(m, "type") and hasattr(m, "content"):
165
+ t = str(getattr(m, "type"))
166
+ role = "system" if t == "system" else "assistant" if t == "ai" else "user"
167
+ out.append({"role": role, "content": _content_to_str(m.content)})
168
+ else:
169
+ raise _invalid(f"message at index {i} is not a str, dict, or LangChain-style message")
170
+ return out
171
+
172
+
173
+ # ── HTTP plumbing (stdlib) ────────────────────────────────────────────────────
174
+
175
+
176
+ def _http_once(
177
+ url: str, *, method: str, body: Optional[dict], timeout_s: float, headers: Optional[dict] = None
178
+ ) -> tuple[int, Optional[dict]]:
179
+ data = json.dumps(body).encode("utf-8") if body is not None else None
180
+ req = urllib.request.Request(
181
+ url,
182
+ data=data,
183
+ method=method,
184
+ headers={"Content-Type": "application/json", "Accept": "application/json", **(headers or {})},
185
+ )
186
+ try:
187
+ with urllib.request.urlopen(req, timeout=timeout_s) as res:
188
+ payload = res.read()
189
+ try:
190
+ return res.status, json.loads(payload) if payload else None
191
+ except (json.JSONDecodeError, ValueError):
192
+ return res.status, None
193
+ except urllib.error.HTTPError as err: # non-2xx WITH a response
194
+ payload = err.read()
195
+ try:
196
+ parsed = json.loads(payload) if payload else None
197
+ except (json.JSONDecodeError, ValueError):
198
+ parsed = None
199
+ return err.code, parsed
200
+ except (TimeoutError, urllib.error.URLError, OSError) as err:
201
+ reason = getattr(err, "reason", err)
202
+ if isinstance(err, TimeoutError) or isinstance(reason, TimeoutError) or "timed out" in str(reason).lower():
203
+ raise GatewayError(
204
+ f"request timed out after {timeout_s}s", code="timeout", retryable=True
205
+ ) from err
206
+ raise GatewayError(f"network error: {reason}", code="network", retryable=True) from err
207
+
208
+
209
+ def _error_from_response(status: int, body: Optional[dict]) -> GatewayError:
210
+ message = (body or {}).get("message") or (body or {}).get("error") or f"gateway responded {status}"
211
+ code = "http_error"
212
+ err_field = (body or {}).get("error")
213
+ if err_field == "demo_mode":
214
+ code = "demo_mode"
215
+ elif err_field == "provider_error":
216
+ code = "provider_error"
217
+ elif err_field == "invalid_api_key":
218
+ code = "invalid_api_key"
219
+ elif err_field == "budget_exceeded":
220
+ code = "budget_exceeded"
221
+ elif err_field == "rate_limited":
222
+ code = "rate_limited"
223
+ elif status == 400:
224
+ code = "bad_request"
225
+ # budget_exceeded is a 429, but retrying won't help until the window rolls past.
226
+ retryable = code != "budget_exceeded" and (status == 429 or status >= 500)
227
+ return GatewayError(
228
+ str(message),
229
+ status=status,
230
+ code=code,
231
+ request_id=(body or {}).get("requestId"),
232
+ retryable=retryable,
233
+ )
234
+
235
+
236
+ def _request_with_retries(
237
+ url: str, *, method: str, body: Optional[dict], timeout_s: float, retries: int,
238
+ headers: Optional[dict] = None,
239
+ ) -> dict:
240
+ last_err: Optional[GatewayError] = None
241
+ for attempt in range(retries + 1):
242
+ if attempt > 0:
243
+ exp = min(_RETRY_CAP_S, _RETRY_BASE_S * (2 ** (attempt - 1)))
244
+ time.sleep(exp / 2 + random.random() * (exp / 2))
245
+ try:
246
+ status, parsed = _http_once(url, method=method, body=body, timeout_s=timeout_s, headers=headers)
247
+ except GatewayError as err: # network / timeout
248
+ last_err = err
249
+ if err.retryable and attempt < retries:
250
+ continue
251
+ raise
252
+ if 200 <= status < 300:
253
+ return parsed or {}
254
+ gerr = _error_from_response(status, parsed)
255
+ last_err = gerr
256
+ if gerr.retryable and attempt < retries:
257
+ continue
258
+ raise gerr
259
+ raise last_err if last_err else GatewayError("request failed") # pragma: no cover
260
+
261
+
262
+ # ── the client ────────────────────────────────────────────────────────────────
263
+
264
+
265
+ class Client:
266
+ """Gateway client. Create via :func:`create_client`."""
267
+
268
+ def __init__(
269
+ self,
270
+ *,
271
+ base_url: Optional[str] = None,
272
+ application: Optional[str] = None,
273
+ workflow: Optional[str] = None,
274
+ department: Optional[str] = None,
275
+ user_id: Optional[str] = None,
276
+ api_key: Optional[str] = None,
277
+ timeout_s: float = 60.0,
278
+ retries: int = 2,
279
+ ) -> None:
280
+ url = (base_url or os.environ.get("ARBR_GATEWAY_URL") or "").rstrip("/")
281
+ if not url:
282
+ raise _invalid("`base_url` is required (or set ARBR_GATEWAY_URL)")
283
+ self.base_url = url
284
+ self._defaults = {
285
+ "application": application,
286
+ "workflow": workflow,
287
+ "department": department,
288
+ "userId": user_id,
289
+ }
290
+ # Gateway API key ("ka_…", Settings → API keys). Binds attribution server-side.
291
+ key = api_key or os.environ.get("ARBR_API_KEY")
292
+ self._headers = {"Authorization": f"Bearer {key}"} if key else {}
293
+ self._timeout_s = timeout_s
294
+ self._retries = max(0, retries)
295
+
296
+ # — chat —
297
+
298
+ def chat(
299
+ self,
300
+ messages: Any,
301
+ *,
302
+ model: Optional[str] = None,
303
+ provider: Optional[str] = None,
304
+ task_type: Optional[str] = None,
305
+ temperature: Optional[float] = None,
306
+ max_tokens: Optional[int] = None,
307
+ application: Optional[str] = None,
308
+ workflow: Optional[str] = None,
309
+ department: Optional[str] = None,
310
+ user_id: Optional[str] = None,
311
+ timeout_s: Optional[float] = None,
312
+ retries: Optional[int] = None,
313
+ ) -> ChatResponse:
314
+ """One routed completion. ``model=None`` or ``"auto"`` → the gateway's
315
+ router decides (rules → automated routing → default); an explicit model
316
+ whose provider is connected is honored as-is."""
317
+ if messages is None:
318
+ raise _invalid("`messages` is required")
319
+ body: dict[str, Any] = {k: v for k, v in self._defaults.items() if v is not None}
320
+ overrides = {
321
+ "application": application,
322
+ "workflow": workflow,
323
+ "department": department,
324
+ "userId": user_id,
325
+ "model": model,
326
+ "provider": provider,
327
+ "taskType": task_type,
328
+ "temperature": temperature,
329
+ "maxTokens": max_tokens,
330
+ }
331
+ body.update({k: v for k, v in overrides.items() if v is not None})
332
+ body["messages"] = _normalize_messages(messages)
333
+ raw = _request_with_retries(
334
+ f"{self.base_url}/v1/chat",
335
+ method="POST",
336
+ body=body,
337
+ timeout_s=timeout_s if timeout_s is not None else self._timeout_s,
338
+ retries=retries if retries is not None else self._retries,
339
+ headers=self._headers,
340
+ )
341
+ return ChatResponse._from_dict(raw)
342
+
343
+ async def achat(self, messages: Any, **kwargs: Any) -> ChatResponse:
344
+ """Async :meth:`chat` (runs the blocking call in a worker thread)."""
345
+ return await asyncio.to_thread(self.chat, messages, **kwargs)
346
+
347
+ # — stream (honest shim) —
348
+
349
+ def stream(self, messages: Any, **kwargs: Any) -> Iterator[str]:
350
+ """Yield the answer in small text chunks.
351
+
352
+ NOTE: the gateway is non-streaming today — this makes ONE buffered
353
+ :meth:`chat` call and chunks the text out (near-streaming UX, not
354
+ token-by-token). Use :meth:`chat` when you need the full metadata."""
355
+ res = self.chat(messages, **kwargs)
356
+ text = res.text
357
+ for i in range(0, len(text), _STREAM_CHUNK_CHARS):
358
+ yield text[i : i + _STREAM_CHUNK_CHARS]
359
+
360
+ async def astream(self, messages: Any, **kwargs: Any) -> AsyncIterator[str]:
361
+ """Async :meth:`stream` (same buffered-shim caveat)."""
362
+ res = await self.achat(messages, **kwargs)
363
+ text = res.text
364
+ for i in range(0, len(text), _STREAM_CHUNK_CHARS):
365
+ yield text[i : i + _STREAM_CHUNK_CHARS]
366
+
367
+ # — status —
368
+
369
+ def status(self) -> dict:
370
+ """Gateway healthcheck — GET /api/status."""
371
+ return _request_with_retries(
372
+ f"{self.base_url}/api/status",
373
+ method="GET",
374
+ body=None,
375
+ timeout_s=self._timeout_s,
376
+ retries=self._retries,
377
+ headers=self._headers,
378
+ )
379
+
380
+ async def astatus(self) -> dict:
381
+ return await asyncio.to_thread(self.status)
382
+
383
+
384
+ def create_client(
385
+ base_url: Optional[str] = None,
386
+ *,
387
+ application: Optional[str] = None,
388
+ workflow: Optional[str] = None,
389
+ department: Optional[str] = None,
390
+ user_id: Optional[str] = None,
391
+ api_key: Optional[str] = None,
392
+ timeout_s: float = 60.0,
393
+ retries: int = 2,
394
+ ) -> Client:
395
+ """Create a gateway client. ``base_url`` falls back to $ARBR_GATEWAY_URL,
396
+ ``api_key`` to $ARBR_API_KEY."""
397
+ return Client(
398
+ base_url=base_url,
399
+ application=application,
400
+ workflow=workflow,
401
+ department=department,
402
+ user_id=user_id,
403
+ api_key=api_key,
404
+ timeout_s=timeout_s,
405
+ retries=retries,
406
+ )
407
+
408
+
409
+ # ── LangChain-style adapter (duck-typed; no LangChain dependency) ─────────────
410
+
411
+
412
+ class _AiMessageShape:
413
+ """AIMessage-shaped result: .content, .usage_metadata, .response_metadata,
414
+ .type == "ai". Attribute access only — not a real LangChain object."""
415
+
416
+ def __init__(self, res: ChatResponse) -> None:
417
+ u = res.usage or Usage()
418
+ self.content = res.text
419
+ self.usage_metadata = {
420
+ "input_tokens": u.input_tokens,
421
+ "output_tokens": u.output_tokens,
422
+ "total_tokens": u.total_tokens,
423
+ }
424
+ self.response_metadata = {
425
+ "model": res.model,
426
+ "provider": res.provider,
427
+ "routingDecision": res.routing_decision,
428
+ "classifiedBy": res.classified_by,
429
+ "modelRequested": res.model_requested,
430
+ "requestId": res.request_id,
431
+ "gateway": True,
432
+ }
433
+ self.additional_kwargs: dict = {}
434
+ self.type = "ai"
435
+
436
+
437
+ def _coerce_lc_input(value: Any) -> Any:
438
+ # A LangChain PromptValue (from `prompt | model` chains) → messages.
439
+ if hasattr(value, "to_messages"):
440
+ return value.to_messages()
441
+ return value
442
+
443
+
444
+ class _LangChainishModel:
445
+ """Minimal LangChain-style chat model backed by the gateway (duck-typed).
446
+
447
+ Supports .invoke() / .ainvoke() and is itself callable, so simple
448
+ `prompt | model` chains coerce it via RunnableLambda. For FULL Runnable
449
+ compatibility (callbacks, batch, with_structured_output), wrap the client
450
+ in a real BaseChatModel subclass in your app instead."""
451
+
452
+ def __init__(self, client: Client, meta: dict) -> None:
453
+ self._client = client
454
+ self._meta = meta
455
+
456
+ def invoke(self, messages: Any, _config: Any = None, **_: Any) -> _AiMessageShape:
457
+ res = self._client.chat(_coerce_lc_input(messages), **self._meta)
458
+ return _AiMessageShape(res)
459
+
460
+ async def ainvoke(self, messages: Any, _config: Any = None, **_: Any) -> _AiMessageShape:
461
+ res = await self._client.achat(_coerce_lc_input(messages), **self._meta)
462
+ return _AiMessageShape(res)
463
+
464
+ # Callable → coercible to RunnableLambda in `prompt | model` chains.
465
+ def __call__(self, messages: Any, **_: Any) -> _AiMessageShape:
466
+ return self.invoke(messages)
467
+
468
+ def stream(self, messages: Any, **_: Any) -> Iterator[_AiMessageShape]:
469
+ res = self._client.chat(_coerce_lc_input(messages), **self._meta)
470
+ text = res.text
471
+ for i in range(0, len(text), _STREAM_CHUNK_CHARS):
472
+ chunk = _AiMessageShape(res)
473
+ chunk.content = text[i : i + _STREAM_CHUNK_CHARS]
474
+ yield chunk
475
+
476
+
477
+ def as_langchain_model(client: Client, **meta: Any) -> _LangChainishModel:
478
+ """Wrap a client as a minimal LangChain-style chat model (no LangChain
479
+ dependency). ``meta`` (workflow, task_type, model, temperature,
480
+ max_tokens, ...) is merged into every call."""
481
+ return _LangChainishModel(client, meta)
@@ -0,0 +1,119 @@
1
+ """Optional LangChain integration: the gateway as a real ``BaseChatModel``.
2
+
3
+ Requires ``langchain-core`` (install via ``pip install arbr-client[langchain]``).
4
+ The core ``arbr_client`` package stays zero-dependency; this module imports
5
+ LangChain lazily and fails with a clear message if it's missing.
6
+
7
+ from arbr_client import create_client
8
+ from arbr_client.langchain import ArbrChatModel
9
+
10
+ client = create_client("http://localhost:4100", application="my-app")
11
+ llm = ArbrChatModel(client=client, model_name="auto") # or a pinned model id
12
+ # Full Runnable compatibility: prompt | llm, .ainvoke(), batching, callbacks.
13
+
14
+ For apps that should NOT take a langchain-core dependency, use the zero-dep
15
+ duck-typed adapter instead: ``arbr_client.as_langchain_model(client, ...)``.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Any, List, Optional
21
+
22
+ try:
23
+ from langchain_core.callbacks import (
24
+ AsyncCallbackManagerForLLMRun,
25
+ CallbackManagerForLLMRun,
26
+ )
27
+ from langchain_core.language_models.chat_models import BaseChatModel
28
+ from langchain_core.messages import AIMessage, BaseMessage
29
+ from langchain_core.outputs import ChatGeneration, ChatResult
30
+ except ImportError as _err: # pragma: no cover
31
+ raise ImportError(
32
+ "arbr_client.langchain requires langchain-core — "
33
+ "install it with: pip install arbr-client[langchain]"
34
+ ) from _err
35
+
36
+ __all__ = ["ArbrChatModel", "KaryaChatModel"]
37
+
38
+
39
+ class ArbrChatModel(BaseChatModel):
40
+ """A LangChain chat model that routes completions through the gateway.
41
+
42
+ ``model_name`` follows the gateway's semantics: an explicit model id is
43
+ honored as-is when its provider is connected; ``"auto"`` (or ``None``)
44
+ lets the gateway's router decide (rules → automated routing → default).
45
+
46
+ Out of gateway scope (keep on direct provider SDKs): tool calling /
47
+ ``with_structured_output``, embeddings, token-level streaming.
48
+ """
49
+
50
+ client: Any # arbr_client.Client
51
+ model_name: Optional[str] = None
52
+ temperature: Optional[float] = None
53
+ max_tokens: Optional[int] = None
54
+ workflow: Optional[str] = None
55
+ task_type: Optional[str] = None
56
+
57
+ @property
58
+ def _llm_type(self) -> str:
59
+ return "arbr-gateway"
60
+
61
+ @property
62
+ def _identifying_params(self) -> dict:
63
+ return {"model_name": self.model_name, "gateway": self.client.base_url}
64
+
65
+ def _call_kwargs(self) -> dict:
66
+ return {
67
+ "model": self.model_name,
68
+ "temperature": self.temperature,
69
+ "max_tokens": self.max_tokens,
70
+ "workflow": self.workflow,
71
+ "task_type": self.task_type,
72
+ }
73
+
74
+ def _to_result(self, res: Any) -> ChatResult:
75
+ usage = None
76
+ if res.usage is not None:
77
+ usage = {
78
+ "input_tokens": res.usage.input_tokens,
79
+ "output_tokens": res.usage.output_tokens,
80
+ "total_tokens": res.usage.total_tokens,
81
+ }
82
+ message = AIMessage(
83
+ content=res.text,
84
+ usage_metadata=usage,
85
+ response_metadata={
86
+ "model": res.model,
87
+ "provider": res.provider,
88
+ "routingDecision": res.routing_decision,
89
+ "classifiedBy": res.classified_by,
90
+ "modelRequested": res.model_requested,
91
+ "requestId": res.request_id,
92
+ "gateway": True,
93
+ },
94
+ )
95
+ return ChatResult(generations=[ChatGeneration(message=message)])
96
+
97
+ def _generate(
98
+ self,
99
+ messages: List[BaseMessage],
100
+ stop: Optional[List[str]] = None,
101
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
102
+ **kwargs: Any,
103
+ ) -> ChatResult:
104
+ res = self.client.chat(messages, **self._call_kwargs())
105
+ return self._to_result(res)
106
+
107
+ async def _agenerate(
108
+ self,
109
+ messages: List[BaseMessage],
110
+ stop: Optional[List[str]] = None,
111
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
112
+ **kwargs: Any,
113
+ ) -> ChatResult:
114
+ res = await self.client.achat(messages, **self._call_kwargs())
115
+ return self._to_result(res)
116
+
117
+
118
+ # Backward-compatibility alias (Karya → Arbr rename).
119
+ KaryaChatModel = ArbrChatModel
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: arbr-client
3
+ Version: 0.1.0
4
+ Summary: Official Python client for the Arbr AI control-plane gateway — one function to route, observe, and govern every LLM call.
5
+ Author: Gyde
6
+ License: MIT
7
+ Keywords: llm,ai,gateway,routing,control-plane,openai,anthropic,gemini,bedrock,cost
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Provides-Extra: langchain
19
+ Requires-Dist: langchain-core>=0.2; extra == "langchain"
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
22
+ Dynamic: license-file
23
+
24
+ # arbr-client (Python)
25
+
26
+ Official Python client for the **Arbr AI control plane** — one function to route, observe,
27
+ and govern every LLM call your app makes.
28
+
29
+ Your app calls the gateway instead of provider SDKs. The gateway holds the provider keys,
30
+ honors the model you pin (or picks one when you say `"auto"`), applies human-approved routing
31
+ rules and cost policies, and logs every call with full cost attribution — visible in the dashboard.
32
+
33
+ - **Zero dependencies** — Python ≥ 3.11, stdlib only. Sync *and* async (`achat`/`astream`).
34
+ - **One function for the 90% case** — `chat()`.
35
+ - **Robust by default** — per-attempt timeouts, retries with exponential backoff + jitter on
36
+ network errors / 429 / 5xx, typed errors.
37
+ - **Optional LangChain integration** — a real `BaseChatModel` via `arbr-client[langchain]`.
38
+
39
+ ## Install
40
+
41
+ ```sh
42
+ pip install arbr-client # core (zero deps)
43
+ pip install "arbr-client[langchain]" # + the LangChain BaseChatModel adapter
44
+ # (pre-release: pip install /path/to/arbr_client-0.1.0-py3-none-any.whl)
45
+ ```
46
+
47
+ ## 60-second quickstart
48
+
49
+ ```python
50
+ from arbr_client import create_client
51
+
52
+ arbr = create_client(
53
+ "http://localhost:4100", # or set ARBR_GATEWAY_URL
54
+ application="my-app", # attribution — shows up in the dashboard
55
+ )
56
+
57
+ res = arbr.chat("Summarise this support ticket: ...", model="auto", max_tokens=300)
58
+ print(res.text)
59
+ print(res.model, res.routing_decision) # e.g. "gpt-4o-mini", "ai"
60
+ ```
61
+
62
+ Async (FastAPI, LangGraph, etc.):
63
+
64
+ ```python
65
+ res = await arbr.achat("Summarise this ticket: ...", model="auto")
66
+ ```
67
+
68
+ That's a complete integration. No provider keys in your app, and every call is logged,
69
+ costed, and governable from the dashboard.
70
+
71
+ ## How model choice works
72
+
73
+ | You send | What happens |
74
+ |---|---|
75
+ | `model="gpt-4o"` (provider connected) | Honored **as-is** — all routing policies skipped. `routing_decision == "explicit"` |
76
+ | `model="auto"` or omitted | The gateway decides: cache → operator rules → automated routing (cost guardrail or AI policy) → default model |
77
+ | a model whose provider isn't connected | Falls back to the router (same as `"auto"`) |
78
+
79
+ `res.model_requested` shows what you asked for, `res.model` what served it, `res.routing_decision`
80
+ why (`explicit / rule / auto / ai / cache / fallback / passthrough`), and `res.classified_by` how
81
+ the task type was determined (`provided / keyword / ai`).
82
+
83
+ ## API
84
+
85
+ ### `create_client(base_url=None, *, application=None, workflow=None, department=None, user_id=None, api_key=None, timeout_s=60, retries=2) → Client`
86
+
87
+ `base_url` falls back to `$ARBR_GATEWAY_URL`; `api_key` to `$ARBR_API_KEY`. A gateway API key
88
+ (`ab_…`, dashboard → Settings → API keys) is sent as `Authorization: Bearer` and binds attribution
89
+ server-side — required once the gateway has *Require API keys* on. The metadata kwargs are defaults
90
+ merged into every call (per-call kwargs override them).
91
+
92
+ ### `Client.chat(messages, *, model=None, provider=None, task_type=None, temperature=None, max_tokens=None, ...) → ChatResponse`
93
+
94
+ `messages` accepts a bare string, `{"role", "content"}` dicts, or LangChain message objects.
95
+ `ChatResponse` is a frozen dataclass: `text`, `usage` (`input_tokens/output_tokens/total_tokens`),
96
+ `model`, `model_requested`, `provider`, `routing_decision`, `classified_by`, `cache_hit`,
97
+ `request_id`, plus `.raw` (the unmodified gateway payload).
98
+
99
+ ### `Client.achat(...)` / `Client.astream(...)` / `Client.astatus()`
100
+
101
+ Async counterparts (the blocking call runs in a worker thread via `asyncio.to_thread`).
102
+
103
+ ### Streaming
104
+
105
+ The gateway supports two streaming modes:
106
+
107
+ **Real SSE (token-by-token)** — use the OpenAI-compatible endpoint at `POST /v1/chat/completions`
108
+ with `stream=True`. Works with the OpenAI Python SDK, any chat UI, or a raw `httpx`/`requests` call:
109
+
110
+ ```python
111
+ from openai import OpenAI
112
+
113
+ client = OpenAI(api_key="ab_…", base_url="http://localhost:4100")
114
+ stream = client.chat.completions.create(
115
+ model="gpt-4o-mini",
116
+ messages=[{"role": "user", "content": "Tell me a joke"}],
117
+ stream=True,
118
+ )
119
+ for chunk in stream:
120
+ print(chunk.choices[0].delta.content or "", end="", flush=True)
121
+ ```
122
+
123
+ **`Client.stream(messages, ...) → Iterator[str]`** — makes one buffered `chat()` call and yields
124
+ the text in small chunks. Useful when you want full routing metadata (`res.model`,
125
+ `res.routing_decision`, etc.) alongside a streaming-style emit:
126
+
127
+ ```python
128
+ for chunk in arbr.stream("Explain quantum entanglement simply"):
129
+ print(chunk, end="", flush=True)
130
+ ```
131
+
132
+ Use the OpenAI-compat endpoint when you need real token-by-token delivery or are integrating with
133
+ chat UIs. Use `stream()` when you want the routing metadata the OpenAI endpoint doesn't expose.
134
+
135
+ ### `Client.status() → dict`
136
+
137
+ Healthcheck against `GET /api/status` — `demoMode`, `liveProviders`, `defaultProvider`,
138
+ `defaultModel`, `routingMode`, `breachedCaps`.
139
+ When the gateway has admin auth enabled (`ARBR_ADMIN_KEY` set server-side), this endpoint
140
+ requires a credential — your gateway `api_key` is accepted, so set it and `status()` keeps working.
141
+
142
+ ## Error handling
143
+
144
+ All failures raise `GatewayError` with `.status`, `.code`, `.retryable`, `.request_id`:
145
+
146
+ | `code` | Meaning | Retried automatically? |
147
+ |---|---|---|
148
+ | `invalid_input` | Bad arguments (caught before any network call) | no |
149
+ | `bad_request` | Gateway rejected the request (HTTP 400) | no |
150
+ | `demo_mode` | Gateway has no provider keys configured (HTTP 503) | no |
151
+ | `provider_error` | All providers failed for this call (HTTP 502) | yes (5xx) |
152
+ | `http_error` | Other non-2xx | 429/5xx only |
153
+ | `invalid_api_key` | Missing/unknown/revoked gateway API key (HTTP 401) | no |
154
+ | `budget_exceeded` | A budget cap with action *Block* is breached for your scope (HTTP 429) | no — retrying won't help until the window rolls past |
155
+ | `rate_limited` | Your API key is over its requests/minute limit (HTTP 429) | yes |
156
+ | `network` | Connection failed | yes |
157
+ | `timeout` | Per-attempt timeout elapsed | yes |
158
+
159
+ ## LangChain integration
160
+
161
+ Two options, by how deep your LangChain usage goes:
162
+
163
+ **1. Full `BaseChatModel` (recommended for LangChain/LangGraph apps)** — requires the extra:
164
+
165
+ ```python
166
+ from arbr_client import create_client
167
+ from arbr_client.langchain import ArbrChatModel
168
+
169
+ client = create_client("http://localhost:4100", application="my-app")
170
+ llm = ArbrChatModel(client=client, model_name="auto", max_tokens=1024)
171
+
172
+ chain = my_prompt | llm # full Runnable compatibility:
173
+ await chain.ainvoke({...}) # pipes, async, batching, callbacks
174
+ ```
175
+
176
+ **2. Zero-dep duck-typed adapter** — when you don't want a langchain-core dependency:
177
+
178
+ ```python
179
+ from arbr_client import as_langchain_model
180
+ llm = as_langchain_model(client, workflow="answer-drafting")
181
+ msg = llm.invoke(messages) # .invoke()/.ainvoke(); AIMessage-shaped result
182
+ ```
183
+
184
+ Out of gateway scope either way: tool calling / `with_structured_output`, embeddings, and
185
+ token-level streaming — keep those on direct provider SDKs.
186
+
187
+ ## Gradual rollout pattern
188
+
189
+ Gate the swap at your app's LLM factory so nothing else changes:
190
+
191
+ ```python
192
+ def get_llm():
193
+ if os.environ.get("ARBR_GATEWAY_URL"):
194
+ return ArbrChatModel(client=_arbr_client(), model_name=settings.llm_model)
195
+ return build_direct_provider_model() # unchanged path
196
+ ```
197
+
198
+ Unset `ARBR_GATEWAY_URL` to revert instantly.
199
+
200
+ ## License
201
+
202
+ MIT
@@ -0,0 +1,7 @@
1
+ arbr_client/__init__.py,sha256=xt9PY_j7ZqGAXic4hcTDx5ckVVk-p_11-85FNkcd45Q,17786
2
+ arbr_client/langchain.py,sha256=KxcoAH8pbVj4kOKGiWDYgWUje7A9kfC_474IMGga7bI,4262
3
+ arbr_client-0.1.0.dist-info/licenses/LICENSE,sha256=JldKhrRTw7NX0Ez2qqITMbeIJ1bOJJlDmUK-U0ZW94M,1061
4
+ arbr_client-0.1.0.dist-info/METADATA,sha256=B5I308M42eIxFfHUWbdO8fIOwpMBfIeDI9EL4lLG_jc,8278
5
+ arbr_client-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ arbr_client-0.1.0.dist-info/top_level.txt,sha256=fyVHSgqGkJcdvCsgqfFKUiuUz6qqida3lD3jqXLwAXk,12
7
+ arbr_client-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gyde
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ arbr_client