PyPI - astra-ai-sdk - Versions diffs - 0.2.0__tar.gz - Mend

astra-ai-sdk 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

astra_ai_sdk-0.2.0/LICENSE +21 -0
astra_ai_sdk-0.2.0/PKG-INFO +141 -0
astra_ai_sdk-0.2.0/README.md +82 -0
astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/PKG-INFO +141 -0
astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/SOURCES.txt +21 -0
astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/dependency_links.txt +1 -0
astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/entry_points.txt +2 -0
astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/requires.txt +17 -0
astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/top_level.txt +1 -0
astra_ai_sdk-0.2.0/astra_sdk/__init__.py +41 -0
astra_ai_sdk-0.2.0/astra_sdk/_http.py +104 -0
astra_ai_sdk-0.2.0/astra_sdk/cli.py +150 -0
astra_ai_sdk-0.2.0/astra_sdk/client.py +80 -0
astra_ai_sdk-0.2.0/astra_sdk/py.typed +0 -0
astra_ai_sdk-0.2.0/astra_sdk/runner.py +275 -0
astra_ai_sdk-0.2.0/astra_sdk/stats.py +151 -0
astra_ai_sdk-0.2.0/astra_sdk/system.py +239 -0
astra_ai_sdk-0.2.0/astra_sdk/telemetry.py +255 -0
astra_ai_sdk-0.2.0/pyproject.toml +46 -0
astra_ai_sdk-0.2.0/setup.cfg +4 -0
astra_ai_sdk-0.2.0/tests/test_client.py +79 -0
astra_ai_sdk-0.2.0/tests/test_stats.py +56 -0
astra_ai_sdk-0.2.0/tests/test_telemetry.py +117 -0

astra_ai_sdk-0.2.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Astra
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

astra_ai_sdk-0.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,141 @@
+Metadata-Version: 2.4
+Name: astra-ai-sdk
+Version: 0.2.0
+Summary: Serve Astra-compressed models anywhere - hosted or local ONNX serving with built-in telemetry that feeds the Astra dashboard
+Author: Astra
+License: MIT License
+        Copyright (c) 2026 Astra
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/AstraFoundation/Astra-Back
+Project-URL: Documentation, https://github.com/AstraFoundation/Astra-Back#readme
+Project-URL: Changelog, https://github.com/AstraFoundation/Astra-Back/blob/main/clients/python/CHANGELOG.md
+Keywords: onnx,model-compression,inference,telemetry,mlops
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: httpx>=0.27
+Provides-Extra: serve
+Requires-Dist: onnxruntime>=1.18; extra == "serve"
+Requires-Dist: numpy>=1.26; extra == "serve"
+Provides-Extra: system
+Requires-Dist: psutil>=5.9; extra == "system"
+Provides-Extra: gpu
+Requires-Dist: nvidia-ml-py>=12; extra == "gpu"
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: numpy>=1.26; extra == "dev"
+Requires-Dist: onnx>=1.16; extra == "dev"
+Requires-Dist: onnxruntime>=1.18; extra == "dev"
+Dynamic: license-file
+# astra-ai-sdk
+Serve **Astra-compressed models** anywhere — and keep the Astra dashboard
+monitoring them while they run on your hardware.
+```bash
+pip install astra-ai-sdk                # hosted inference client
+pip install 'astra-ai-sdk[serve]'       # + local ONNX serving (onnxruntime, numpy)
+pip install 'astra-ai-sdk[serve,system]'  # + precise CPU/RSS metrics (psutil)
+```
+## Hosted inference
+Calls the Astra-hosted endpoint; telemetry is recorded server-side.
+```python
+from astra_sdk import AstraClient
+# base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
+client = AstraClient("dep_ab12cd34ef", "astra_sk_live_...")
+out = client.infer({"input": [[0.1, 0.2, 0.3]]})
+print(out["latencyMs"], out["outputs"])
+```
+## Local serving (the headline)
+Pulls the deployed, compressed artifact once (sha256-cached under
+`~/.cache/astra`) and serves it with onnxruntime in your process:
+```python
+from astra_sdk import LocalRunner
+# base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
+runner = LocalRunner.from_deployment("dep_ab12cd34ef", "astra_sk_live_...")
+out = runner.run({"input": my_numpy_array})   # local inference
+print(out["latencyMs"], out["raw"][0].shape)
+runner.close()
+```
+### Run a file you already have
+Downloaded the artifact (SDK Hub → **Download Artifact**) or have an `.onnx` on
+disk? Skip the deployment — serve the file directly:
+```python
+from astra_sdk import LocalRunner
+runner = LocalRunner.from_file("compressed.onnx")
+out = runner.run({"input": my_numpy_array})
+runner.close()
+```
+Telemetry is off for a bare file; pass `deployment_id=` + `api_key=` to still
+report local runs to that deployment.
+### What gets reported to the dashboard
+A background thread batches telemetry to Astra (never blocks or breaks your
+serving path; bounded queue with drop-oldest under pressure):
+| Stream | Cadence | Fields |
+|---|---|---|
+| **Request events** | per inference | timestamp, latency breakdown (preprocess / inference / postprocess ms), success / error code, batch size, region tag, input shape signature |
+| **System snapshots** | ~30 s | CPU %, RSS MB, throughput req/min, dropped-event count, SDK / Python / onnxruntime versions, OS, arch, execution provider, hostname |
+| **Window stats** | ~60 s or 200 requests | per-input tensor mean/std/min/max/NaN%, output class distribution (top-10), 16-bin confidence histogram, mean entropy, mean top-1 confidence |
+Window stats power the dashboard\'s **prediction drift** (PSI vs the
+deployment\'s reference distribution) and **input distribution shift** alerts.
+Opt out any time: `LocalRunner.from_deployment(..., report_telemetry=False)`
+or `ASTRA_SDK_TELEMETRY=0`.
+## CLI
+```bash
+astra pull  --deployment dep_x --api-key KEY
+astra serve --deployment dep_x --api-key KEY --port 8765
+astra bench --deployment dep_x --api-key KEY -n 200
+```
+Options can also come from `ASTRA_BASE_URL`, `ASTRA_DEPLOYMENT_ID`,
+`ASTRA_API_KEY`.

astra_ai_sdk-0.2.0/README.md ADDED Viewed

@@ -0,0 +1,82 @@
+# astra-ai-sdk
+Serve **Astra-compressed models** anywhere — and keep the Astra dashboard
+monitoring them while they run on your hardware.
+```bash
+pip install astra-ai-sdk                # hosted inference client
+pip install 'astra-ai-sdk[serve]'       # + local ONNX serving (onnxruntime, numpy)
+pip install 'astra-ai-sdk[serve,system]'  # + precise CPU/RSS metrics (psutil)
+```
+## Hosted inference
+Calls the Astra-hosted endpoint; telemetry is recorded server-side.
+```python
+from astra_sdk import AstraClient
+# base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
+client = AstraClient("dep_ab12cd34ef", "astra_sk_live_...")
+out = client.infer({"input": [[0.1, 0.2, 0.3]]})
+print(out["latencyMs"], out["outputs"])
+```
+## Local serving (the headline)
+Pulls the deployed, compressed artifact once (sha256-cached under
+`~/.cache/astra`) and serves it with onnxruntime in your process:
+```python
+from astra_sdk import LocalRunner
+# base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
+runner = LocalRunner.from_deployment("dep_ab12cd34ef", "astra_sk_live_...")
+out = runner.run({"input": my_numpy_array})   # local inference
+print(out["latencyMs"], out["raw"][0].shape)
+runner.close()
+```
+### Run a file you already have
+Downloaded the artifact (SDK Hub → **Download Artifact**) or have an `.onnx` on
+disk? Skip the deployment — serve the file directly:
+```python
+from astra_sdk import LocalRunner
+runner = LocalRunner.from_file("compressed.onnx")
+out = runner.run({"input": my_numpy_array})
+runner.close()
+```
+Telemetry is off for a bare file; pass `deployment_id=` + `api_key=` to still
+report local runs to that deployment.
+### What gets reported to the dashboard
+A background thread batches telemetry to Astra (never blocks or breaks your
+serving path; bounded queue with drop-oldest under pressure):
+| Stream | Cadence | Fields |
+|---|---|---|
+| **Request events** | per inference | timestamp, latency breakdown (preprocess / inference / postprocess ms), success / error code, batch size, region tag, input shape signature |
+| **System snapshots** | ~30 s | CPU %, RSS MB, throughput req/min, dropped-event count, SDK / Python / onnxruntime versions, OS, arch, execution provider, hostname |
+| **Window stats** | ~60 s or 200 requests | per-input tensor mean/std/min/max/NaN%, output class distribution (top-10), 16-bin confidence histogram, mean entropy, mean top-1 confidence |
+Window stats power the dashboard\'s **prediction drift** (PSI vs the
+deployment\'s reference distribution) and **input distribution shift** alerts.
+Opt out any time: `LocalRunner.from_deployment(..., report_telemetry=False)`
+or `ASTRA_SDK_TELEMETRY=0`.
+## CLI
+```bash
+astra pull  --deployment dep_x --api-key KEY
+astra serve --deployment dep_x --api-key KEY --port 8765
+astra bench --deployment dep_x --api-key KEY -n 200
+```
+Options can also come from `ASTRA_BASE_URL`, `ASTRA_DEPLOYMENT_ID`,
+`ASTRA_API_KEY`.

astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,141 @@
+Metadata-Version: 2.4
+Name: astra-ai-sdk
+Version: 0.2.0
+Summary: Serve Astra-compressed models anywhere - hosted or local ONNX serving with built-in telemetry that feeds the Astra dashboard
+Author: Astra
+License: MIT License
+        Copyright (c) 2026 Astra
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/AstraFoundation/Astra-Back
+Project-URL: Documentation, https://github.com/AstraFoundation/Astra-Back#readme
+Project-URL: Changelog, https://github.com/AstraFoundation/Astra-Back/blob/main/clients/python/CHANGELOG.md
+Keywords: onnx,model-compression,inference,telemetry,mlops
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: httpx>=0.27
+Provides-Extra: serve
+Requires-Dist: onnxruntime>=1.18; extra == "serve"
+Requires-Dist: numpy>=1.26; extra == "serve"
+Provides-Extra: system
+Requires-Dist: psutil>=5.9; extra == "system"
+Provides-Extra: gpu
+Requires-Dist: nvidia-ml-py>=12; extra == "gpu"
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: numpy>=1.26; extra == "dev"
+Requires-Dist: onnx>=1.16; extra == "dev"
+Requires-Dist: onnxruntime>=1.18; extra == "dev"
+Dynamic: license-file
+# astra-ai-sdk
+Serve **Astra-compressed models** anywhere — and keep the Astra dashboard
+monitoring them while they run on your hardware.
+```bash
+pip install astra-ai-sdk                # hosted inference client
+pip install 'astra-ai-sdk[serve]'       # + local ONNX serving (onnxruntime, numpy)
+pip install 'astra-ai-sdk[serve,system]'  # + precise CPU/RSS metrics (psutil)
+```
+## Hosted inference
+Calls the Astra-hosted endpoint; telemetry is recorded server-side.
+```python
+from astra_sdk import AstraClient
+# base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
+client = AstraClient("dep_ab12cd34ef", "astra_sk_live_...")
+out = client.infer({"input": [[0.1, 0.2, 0.3]]})
+print(out["latencyMs"], out["outputs"])
+```
+## Local serving (the headline)
+Pulls the deployed, compressed artifact once (sha256-cached under
+`~/.cache/astra`) and serves it with onnxruntime in your process:
+```python
+from astra_sdk import LocalRunner
+# base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
+runner = LocalRunner.from_deployment("dep_ab12cd34ef", "astra_sk_live_...")
+out = runner.run({"input": my_numpy_array})   # local inference
+print(out["latencyMs"], out["raw"][0].shape)
+runner.close()
+```
+### Run a file you already have
+Downloaded the artifact (SDK Hub → **Download Artifact**) or have an `.onnx` on
+disk? Skip the deployment — serve the file directly:
+```python
+from astra_sdk import LocalRunner
+runner = LocalRunner.from_file("compressed.onnx")
+out = runner.run({"input": my_numpy_array})
+runner.close()
+```
+Telemetry is off for a bare file; pass `deployment_id=` + `api_key=` to still
+report local runs to that deployment.
+### What gets reported to the dashboard
+A background thread batches telemetry to Astra (never blocks or breaks your
+serving path; bounded queue with drop-oldest under pressure):
+| Stream | Cadence | Fields |
+|---|---|---|
+| **Request events** | per inference | timestamp, latency breakdown (preprocess / inference / postprocess ms), success / error code, batch size, region tag, input shape signature |
+| **System snapshots** | ~30 s | CPU %, RSS MB, throughput req/min, dropped-event count, SDK / Python / onnxruntime versions, OS, arch, execution provider, hostname |
+| **Window stats** | ~60 s or 200 requests | per-input tensor mean/std/min/max/NaN%, output class distribution (top-10), 16-bin confidence histogram, mean entropy, mean top-1 confidence |
+Window stats power the dashboard\'s **prediction drift** (PSI vs the
+deployment\'s reference distribution) and **input distribution shift** alerts.
+Opt out any time: `LocalRunner.from_deployment(..., report_telemetry=False)`
+or `ASTRA_SDK_TELEMETRY=0`.
+## CLI
+```bash
+astra pull  --deployment dep_x --api-key KEY
+astra serve --deployment dep_x --api-key KEY --port 8765
+astra bench --deployment dep_x --api-key KEY -n 200
+```
+Options can also come from `ASTRA_BASE_URL`, `ASTRA_DEPLOYMENT_ID`,
+`ASTRA_API_KEY`.

astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,21 @@
+LICENSE
+README.md
+pyproject.toml
+astra_ai_sdk.egg-info/PKG-INFO
+astra_ai_sdk.egg-info/SOURCES.txt
+astra_ai_sdk.egg-info/dependency_links.txt
+astra_ai_sdk.egg-info/entry_points.txt
+astra_ai_sdk.egg-info/requires.txt
+astra_ai_sdk.egg-info/top_level.txt
+astra_sdk/__init__.py
+astra_sdk/_http.py
+astra_sdk/cli.py
+astra_sdk/client.py
+astra_sdk/py.typed
+astra_sdk/runner.py
+astra_sdk/stats.py
+astra_sdk/system.py
+astra_sdk/telemetry.py
+tests/test_client.py
+tests/test_stats.py
+tests/test_telemetry.py

astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ astra = astra_sdk.cli:main

astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,17 @@
+httpx>=0.27
+[dev]
+pytest>=8.0
+numpy>=1.26
+onnx>=1.16
+onnxruntime>=1.18
+[gpu]
+nvidia-ml-py>=12
+[serve]
+onnxruntime>=1.18
+numpy>=1.26
+[system]
+psutil>=5.9

astra_ai_sdk-0.2.0/astra_ai_sdk.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ astra_sdk

astra_ai_sdk-0.2.0/astra_sdk/__init__.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Astra SDK — serve Astra-compressed models anywhere, with telemetry built in.
+Hosted inference (server-side telemetry, zero extra deps):
+    from astra_sdk import AstraClient
+    client = AstraClient(deployment_id, api_key)   # base_url defaults to the hosted origin
+    out = client.infer({"input": [[0.1, 0.2, ...]]})
+Local serving (pip install 'astra-ai-sdk[serve]') — pulls the compressed artifact
+and runs it on YOUR hardware while the dashboard keeps monitoring it:
+    from astra_sdk import LocalRunner
+    runner = LocalRunner.from_deployment(deployment_id, api_key)
+    out = runner.run({"input": my_array})
+    runner.close()
+Every locally-served request ships latency breakdown, system snapshots and
+windowed input/output stats to Astra — powering the live Telemetry tab and
+prediction/input drift alerts. Opt out: report_telemetry=False or
+ASTRA_SDK_TELEMETRY=0.
+"""
+from __future__ import annotations
+from ._http import ApiError
+from .client import InferenceError, AstraClient
+from .runner import LocalRunner, RunnerError, pull_artifact
+from .telemetry import TelemetryReporter
+__all__ = [
+    "ApiError",
+    "InferenceError",
+    "LocalRunner",
+    "AstraClient",
+    "RunnerError",
+    "TelemetryReporter",
+    "pull_artifact",
+]
+__version__ = "0.2.0"

astra_ai_sdk-0.2.0/astra_sdk/_http.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Shared HTTP plumbing: bearer auth, retry with backoff, error mapping."""
+from __future__ import annotations
+import os
+import random
+import time
+from typing import Any
+import httpx
+_RETRYABLE_STATUS = {429, 502, 503, 504}
+# The hosted Astra origin every deployment lives behind. Baked in so SDK code
+# never has to carry a base URL; override with the ASTRA_BASE_URL env var or an
+# explicit base_url argument (e.g. for self-host / testing).
+DEFAULT_BASE_URL = "https://astra.kwon5700.kr"
+def resolve_base_url(base_url: str | None) -> str:
+    """The base URL to use: explicit arg → ASTRA_BASE_URL env → hosted default."""
+    return (base_url or os.environ.get("ASTRA_BASE_URL") or DEFAULT_BASE_URL).rstrip("/")
+class ApiError(Exception):
+    """Non-2xx response from the Astra backend."""
+    def __init__(self, status: int, code: str, message: str) -> None:
+        super().__init__(f"[{status}] {code}: {message}")
+        self.status = status
+        self.code = code
+        self.message = message
+def error_from_response(resp: httpx.Response) -> ApiError:
+    detail: dict[str, Any] = {}
+    try:
+        body = resp.json()
+        detail = body.get("detail", {}) if isinstance(body, dict) else {}
+        if not isinstance(detail, dict):
+            detail = {"message": str(detail)}
+    except ValueError:
+        pass
+    return ApiError(
+        resp.status_code,
+        detail.get("code", "error"),
+        detail.get("message", resp.text[:500]),
+    )
+class HttpSession:
+    """httpx.Client wrapper with bearer auth + bounded retry/backoff.
+    Retries transient failures (connect errors, 429/5xx) with exponential
+    backoff capped at `max_backoff`; gives up after `max_attempts` and raises
+    the last error. 4xx (except 429) never retries."""
+    def __init__(
+        self,
+        base_url: str,
+        api_key: str,
+        *,
+        timeout: float = 30.0,
+        max_attempts: int = 3,
+        max_backoff: float = 60.0,
+    ) -> None:
+        self.base_url = base_url.rstrip("/")
+        self._headers = {"Authorization": f"Bearer {api_key}"}
+        self._client = httpx.Client(timeout=timeout)
+        self._max_attempts = max(1, max_attempts)
+        self._max_backoff = max_backoff
+    def request(
+        self,
+        method: str,
+        path: str,
+        *,
+        json: Any | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> httpx.Response:
+        url = f"{self.base_url}{path}"
+        merged = dict(self._headers)
+        if headers:
+            merged.update(headers)
+        last_exc: Exception | None = None
+        for attempt in range(self._max_attempts):
+            try:
+                resp = self._client.request(method, url, json=json, headers=merged)
+            except httpx.HTTPError as exc:
+                last_exc = exc
+            else:
+                if resp.status_code < 400 or resp.status_code == 304:
+                    return resp
+                if resp.status_code not in _RETRYABLE_STATUS:
+                    raise error_from_response(resp)
+                last_exc = error_from_response(resp)
+            if attempt < self._max_attempts - 1:
+                backoff = min(self._max_backoff, (2 ** attempt) + random.random())
+                time.sleep(backoff)
+        assert last_exc is not None
+        raise last_exc
+    def close(self) -> None:
+        self._client.close()