astra-ai-sdk 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Astra
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.4
2
+ Name: astra-ai-sdk
3
+ Version: 0.2.0
4
+ Summary: Serve Astra-compressed models anywhere - hosted or local ONNX serving with built-in telemetry that feeds the Astra dashboard
5
+ Author: Astra
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Astra
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/AstraFoundation/Astra-Back
29
+ Project-URL: Documentation, https://github.com/AstraFoundation/Astra-Back#readme
30
+ Project-URL: Changelog, https://github.com/AstraFoundation/Astra-Back/blob/main/clients/python/CHANGELOG.md
31
+ Keywords: onnx,model-compression,inference,telemetry,mlops
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: httpx>=0.27
46
+ Provides-Extra: serve
47
+ Requires-Dist: onnxruntime>=1.18; extra == "serve"
48
+ Requires-Dist: numpy>=1.26; extra == "serve"
49
+ Provides-Extra: system
50
+ Requires-Dist: psutil>=5.9; extra == "system"
51
+ Provides-Extra: gpu
52
+ Requires-Dist: nvidia-ml-py>=12; extra == "gpu"
53
+ Provides-Extra: dev
54
+ Requires-Dist: pytest>=8.0; extra == "dev"
55
+ Requires-Dist: numpy>=1.26; extra == "dev"
56
+ Requires-Dist: onnx>=1.16; extra == "dev"
57
+ Requires-Dist: onnxruntime>=1.18; extra == "dev"
58
+ Dynamic: license-file
59
+
60
+ # astra-ai-sdk
61
+
62
+ Serve **Astra-compressed models** anywhere — and keep the Astra dashboard
63
+ monitoring them while they run on your hardware.
64
+
65
+ ```bash
66
+ pip install astra-ai-sdk # hosted inference client
67
+ pip install 'astra-ai-sdk[serve]' # + local ONNX serving (onnxruntime, numpy)
68
+ pip install 'astra-ai-sdk[serve,system]' # + precise CPU/RSS metrics (psutil)
69
+ ```
70
+
71
+ ## Hosted inference
72
+
73
+ Calls the Astra-hosted endpoint; telemetry is recorded server-side.
74
+
75
+ ```python
76
+ from astra_sdk import AstraClient
77
+
78
+ # base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
79
+ client = AstraClient("dep_ab12cd34ef", "astra_sk_live_...")
80
+ out = client.infer({"input": [[0.1, 0.2, 0.3]]})
81
+ print(out["latencyMs"], out["outputs"])
82
+ ```
83
+
84
+ ## Local serving (the headline)
85
+
86
+ Pulls the deployed, compressed artifact once (sha256-cached under
87
+ `~/.cache/astra`) and serves it with onnxruntime in your process:
88
+
89
+ ```python
90
+ from astra_sdk import LocalRunner
91
+
92
+ # base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
93
+ runner = LocalRunner.from_deployment("dep_ab12cd34ef", "astra_sk_live_...")
94
+ out = runner.run({"input": my_numpy_array}) # local inference
95
+ print(out["latencyMs"], out["raw"][0].shape)
96
+ runner.close()
97
+ ```
98
+
99
+ ### Run a file you already have
100
+
101
+ Downloaded the artifact (SDK Hub → **Download Artifact**) or have an `.onnx` on
102
+ disk? Skip the deployment — serve the file directly:
103
+
104
+ ```python
105
+ from astra_sdk import LocalRunner
106
+
107
+ runner = LocalRunner.from_file("compressed.onnx")
108
+ out = runner.run({"input": my_numpy_array})
109
+ runner.close()
110
+ ```
111
+
112
+ Telemetry is off for a bare file; pass `deployment_id=` + `api_key=` to still
113
+ report local runs to that deployment.
114
+
115
+ ### What gets reported to the dashboard
116
+
117
+ A background thread batches telemetry to Astra (never blocks or breaks your
118
+ serving path; bounded queue with drop-oldest under pressure):
119
+
120
+ | Stream | Cadence | Fields |
121
+ |---|---|---|
122
+ | **Request events** | per inference | timestamp, latency breakdown (preprocess / inference / postprocess ms), success / error code, batch size, region tag, input shape signature |
123
+ | **System snapshots** | ~30 s | CPU %, RSS MB, throughput req/min, dropped-event count, SDK / Python / onnxruntime versions, OS, arch, execution provider, hostname |
124
+ | **Window stats** | ~60 s or 200 requests | per-input tensor mean/std/min/max/NaN%, output class distribution (top-10), 16-bin confidence histogram, mean entropy, mean top-1 confidence |
125
+
126
+ Window stats power the dashboard\'s **prediction drift** (PSI vs the
127
+ deployment\'s reference distribution) and **input distribution shift** alerts.
128
+
129
+ Opt out any time: `LocalRunner.from_deployment(..., report_telemetry=False)`
130
+ or `ASTRA_SDK_TELEMETRY=0`.
131
+
132
+ ## CLI
133
+
134
+ ```bash
135
+ astra pull --deployment dep_x --api-key KEY
136
+ astra serve --deployment dep_x --api-key KEY --port 8765
137
+ astra bench --deployment dep_x --api-key KEY -n 200
138
+ ```
139
+
140
+ Options can also come from `ASTRA_BASE_URL`, `ASTRA_DEPLOYMENT_ID`,
141
+ `ASTRA_API_KEY`.
@@ -0,0 +1,82 @@
1
+ # astra-ai-sdk
2
+
3
+ Serve **Astra-compressed models** anywhere — and keep the Astra dashboard
4
+ monitoring them while they run on your hardware.
5
+
6
+ ```bash
7
+ pip install astra-ai-sdk # hosted inference client
8
+ pip install 'astra-ai-sdk[serve]' # + local ONNX serving (onnxruntime, numpy)
9
+ pip install 'astra-ai-sdk[serve,system]' # + precise CPU/RSS metrics (psutil)
10
+ ```
11
+
12
+ ## Hosted inference
13
+
14
+ Calls the Astra-hosted endpoint; telemetry is recorded server-side.
15
+
16
+ ```python
17
+ from astra_sdk import AstraClient
18
+
19
+ # base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
20
+ client = AstraClient("dep_ab12cd34ef", "astra_sk_live_...")
21
+ out = client.infer({"input": [[0.1, 0.2, 0.3]]})
22
+ print(out["latencyMs"], out["outputs"])
23
+ ```
24
+
25
+ ## Local serving (the headline)
26
+
27
+ Pulls the deployed, compressed artifact once (sha256-cached under
28
+ `~/.cache/astra`) and serves it with onnxruntime in your process:
29
+
30
+ ```python
31
+ from astra_sdk import LocalRunner
32
+
33
+ # base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
34
+ runner = LocalRunner.from_deployment("dep_ab12cd34ef", "astra_sk_live_...")
35
+ out = runner.run({"input": my_numpy_array}) # local inference
36
+ print(out["latencyMs"], out["raw"][0].shape)
37
+ runner.close()
38
+ ```
39
+
40
+ ### Run a file you already have
41
+
42
+ Downloaded the artifact (SDK Hub → **Download Artifact**) or have an `.onnx` on
43
+ disk? Skip the deployment — serve the file directly:
44
+
45
+ ```python
46
+ from astra_sdk import LocalRunner
47
+
48
+ runner = LocalRunner.from_file("compressed.onnx")
49
+ out = runner.run({"input": my_numpy_array})
50
+ runner.close()
51
+ ```
52
+
53
+ Telemetry is off for a bare file; pass `deployment_id=` + `api_key=` to still
54
+ report local runs to that deployment.
55
+
56
+ ### What gets reported to the dashboard
57
+
58
+ A background thread batches telemetry to Astra (never blocks or breaks your
59
+ serving path; bounded queue with drop-oldest under pressure):
60
+
61
+ | Stream | Cadence | Fields |
62
+ |---|---|---|
63
+ | **Request events** | per inference | timestamp, latency breakdown (preprocess / inference / postprocess ms), success / error code, batch size, region tag, input shape signature |
64
+ | **System snapshots** | ~30 s | CPU %, RSS MB, throughput req/min, dropped-event count, SDK / Python / onnxruntime versions, OS, arch, execution provider, hostname |
65
+ | **Window stats** | ~60 s or 200 requests | per-input tensor mean/std/min/max/NaN%, output class distribution (top-10), 16-bin confidence histogram, mean entropy, mean top-1 confidence |
66
+
67
+ Window stats power the dashboard\'s **prediction drift** (PSI vs the
68
+ deployment\'s reference distribution) and **input distribution shift** alerts.
69
+
70
+ Opt out any time: `LocalRunner.from_deployment(..., report_telemetry=False)`
71
+ or `ASTRA_SDK_TELEMETRY=0`.
72
+
73
+ ## CLI
74
+
75
+ ```bash
76
+ astra pull --deployment dep_x --api-key KEY
77
+ astra serve --deployment dep_x --api-key KEY --port 8765
78
+ astra bench --deployment dep_x --api-key KEY -n 200
79
+ ```
80
+
81
+ Options can also come from `ASTRA_BASE_URL`, `ASTRA_DEPLOYMENT_ID`,
82
+ `ASTRA_API_KEY`.
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.4
2
+ Name: astra-ai-sdk
3
+ Version: 0.2.0
4
+ Summary: Serve Astra-compressed models anywhere - hosted or local ONNX serving with built-in telemetry that feeds the Astra dashboard
5
+ Author: Astra
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Astra
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/AstraFoundation/Astra-Back
29
+ Project-URL: Documentation, https://github.com/AstraFoundation/Astra-Back#readme
30
+ Project-URL: Changelog, https://github.com/AstraFoundation/Astra-Back/blob/main/clients/python/CHANGELOG.md
31
+ Keywords: onnx,model-compression,inference,telemetry,mlops
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Requires-Python: >=3.10
43
+ Description-Content-Type: text/markdown
44
+ License-File: LICENSE
45
+ Requires-Dist: httpx>=0.27
46
+ Provides-Extra: serve
47
+ Requires-Dist: onnxruntime>=1.18; extra == "serve"
48
+ Requires-Dist: numpy>=1.26; extra == "serve"
49
+ Provides-Extra: system
50
+ Requires-Dist: psutil>=5.9; extra == "system"
51
+ Provides-Extra: gpu
52
+ Requires-Dist: nvidia-ml-py>=12; extra == "gpu"
53
+ Provides-Extra: dev
54
+ Requires-Dist: pytest>=8.0; extra == "dev"
55
+ Requires-Dist: numpy>=1.26; extra == "dev"
56
+ Requires-Dist: onnx>=1.16; extra == "dev"
57
+ Requires-Dist: onnxruntime>=1.18; extra == "dev"
58
+ Dynamic: license-file
59
+
60
+ # astra-ai-sdk
61
+
62
+ Serve **Astra-compressed models** anywhere — and keep the Astra dashboard
63
+ monitoring them while they run on your hardware.
64
+
65
+ ```bash
66
+ pip install astra-ai-sdk # hosted inference client
67
+ pip install 'astra-ai-sdk[serve]' # + local ONNX serving (onnxruntime, numpy)
68
+ pip install 'astra-ai-sdk[serve,system]' # + precise CPU/RSS metrics (psutil)
69
+ ```
70
+
71
+ ## Hosted inference
72
+
73
+ Calls the Astra-hosted endpoint; telemetry is recorded server-side.
74
+
75
+ ```python
76
+ from astra_sdk import AstraClient
77
+
78
+ # base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
79
+ client = AstraClient("dep_ab12cd34ef", "astra_sk_live_...")
80
+ out = client.infer({"input": [[0.1, 0.2, 0.3]]})
81
+ print(out["latencyMs"], out["outputs"])
82
+ ```
83
+
84
+ ## Local serving (the headline)
85
+
86
+ Pulls the deployed, compressed artifact once (sha256-cached under
87
+ `~/.cache/astra`) and serves it with onnxruntime in your process:
88
+
89
+ ```python
90
+ from astra_sdk import LocalRunner
91
+
92
+ # base_url defaults to the hosted Astra origin (override with ASTRA_BASE_URL).
93
+ runner = LocalRunner.from_deployment("dep_ab12cd34ef", "astra_sk_live_...")
94
+ out = runner.run({"input": my_numpy_array}) # local inference
95
+ print(out["latencyMs"], out["raw"][0].shape)
96
+ runner.close()
97
+ ```
98
+
99
+ ### Run a file you already have
100
+
101
+ Downloaded the artifact (SDK Hub → **Download Artifact**) or have an `.onnx` on
102
+ disk? Skip the deployment — serve the file directly:
103
+
104
+ ```python
105
+ from astra_sdk import LocalRunner
106
+
107
+ runner = LocalRunner.from_file("compressed.onnx")
108
+ out = runner.run({"input": my_numpy_array})
109
+ runner.close()
110
+ ```
111
+
112
+ Telemetry is off for a bare file; pass `deployment_id=` + `api_key=` to still
113
+ report local runs to that deployment.
114
+
115
+ ### What gets reported to the dashboard
116
+
117
+ A background thread batches telemetry to Astra (never blocks or breaks your
118
+ serving path; bounded queue with drop-oldest under pressure):
119
+
120
+ | Stream | Cadence | Fields |
121
+ |---|---|---|
122
+ | **Request events** | per inference | timestamp, latency breakdown (preprocess / inference / postprocess ms), success / error code, batch size, region tag, input shape signature |
123
+ | **System snapshots** | ~30 s | CPU %, RSS MB, throughput req/min, dropped-event count, SDK / Python / onnxruntime versions, OS, arch, execution provider, hostname |
124
+ | **Window stats** | ~60 s or 200 requests | per-input tensor mean/std/min/max/NaN%, output class distribution (top-10), 16-bin confidence histogram, mean entropy, mean top-1 confidence |
125
+
126
+ Window stats power the dashboard\'s **prediction drift** (PSI vs the
127
+ deployment\'s reference distribution) and **input distribution shift** alerts.
128
+
129
+ Opt out any time: `LocalRunner.from_deployment(..., report_telemetry=False)`
130
+ or `ASTRA_SDK_TELEMETRY=0`.
131
+
132
+ ## CLI
133
+
134
+ ```bash
135
+ astra pull --deployment dep_x --api-key KEY
136
+ astra serve --deployment dep_x --api-key KEY --port 8765
137
+ astra bench --deployment dep_x --api-key KEY -n 200
138
+ ```
139
+
140
+ Options can also come from `ASTRA_BASE_URL`, `ASTRA_DEPLOYMENT_ID`,
141
+ `ASTRA_API_KEY`.
@@ -0,0 +1,21 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ astra_ai_sdk.egg-info/PKG-INFO
5
+ astra_ai_sdk.egg-info/SOURCES.txt
6
+ astra_ai_sdk.egg-info/dependency_links.txt
7
+ astra_ai_sdk.egg-info/entry_points.txt
8
+ astra_ai_sdk.egg-info/requires.txt
9
+ astra_ai_sdk.egg-info/top_level.txt
10
+ astra_sdk/__init__.py
11
+ astra_sdk/_http.py
12
+ astra_sdk/cli.py
13
+ astra_sdk/client.py
14
+ astra_sdk/py.typed
15
+ astra_sdk/runner.py
16
+ astra_sdk/stats.py
17
+ astra_sdk/system.py
18
+ astra_sdk/telemetry.py
19
+ tests/test_client.py
20
+ tests/test_stats.py
21
+ tests/test_telemetry.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ astra = astra_sdk.cli:main
@@ -0,0 +1,17 @@
1
+ httpx>=0.27
2
+
3
+ [dev]
4
+ pytest>=8.0
5
+ numpy>=1.26
6
+ onnx>=1.16
7
+ onnxruntime>=1.18
8
+
9
+ [gpu]
10
+ nvidia-ml-py>=12
11
+
12
+ [serve]
13
+ onnxruntime>=1.18
14
+ numpy>=1.26
15
+
16
+ [system]
17
+ psutil>=5.9
@@ -0,0 +1 @@
1
+ astra_sdk
@@ -0,0 +1,41 @@
1
+ """Astra SDK — serve Astra-compressed models anywhere, with telemetry built in.
2
+
3
+ Hosted inference (server-side telemetry, zero extra deps):
4
+
5
+ from astra_sdk import AstraClient
6
+
7
+ client = AstraClient(deployment_id, api_key) # base_url defaults to the hosted origin
8
+ out = client.infer({"input": [[0.1, 0.2, ...]]})
9
+
10
+ Local serving (pip install 'astra-ai-sdk[serve]') — pulls the compressed artifact
11
+ and runs it on YOUR hardware while the dashboard keeps monitoring it:
12
+
13
+ from astra_sdk import LocalRunner
14
+
15
+ runner = LocalRunner.from_deployment(deployment_id, api_key)
16
+ out = runner.run({"input": my_array})
17
+ runner.close()
18
+
19
+ Every locally-served request ships latency breakdown, system snapshots and
20
+ windowed input/output stats to Astra — powering the live Telemetry tab and
21
+ prediction/input drift alerts. Opt out: report_telemetry=False or
22
+ ASTRA_SDK_TELEMETRY=0.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from ._http import ApiError
28
+ from .client import InferenceError, AstraClient
29
+ from .runner import LocalRunner, RunnerError, pull_artifact
30
+ from .telemetry import TelemetryReporter
31
+
32
+ __all__ = [
33
+ "ApiError",
34
+ "InferenceError",
35
+ "LocalRunner",
36
+ "AstraClient",
37
+ "RunnerError",
38
+ "TelemetryReporter",
39
+ "pull_artifact",
40
+ ]
41
+ __version__ = "0.2.0"
@@ -0,0 +1,104 @@
1
+ """Shared HTTP plumbing: bearer auth, retry with backoff, error mapping."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import random
7
+ import time
8
+ from typing import Any
9
+
10
+ import httpx
11
+
12
+ _RETRYABLE_STATUS = {429, 502, 503, 504}
13
+
14
+ # The hosted Astra origin every deployment lives behind. Baked in so SDK code
15
+ # never has to carry a base URL; override with the ASTRA_BASE_URL env var or an
16
+ # explicit base_url argument (e.g. for self-host / testing).
17
+ DEFAULT_BASE_URL = "https://astra.kwon5700.kr"
18
+
19
+
20
+ def resolve_base_url(base_url: str | None) -> str:
21
+ """The base URL to use: explicit arg → ASTRA_BASE_URL env → hosted default."""
22
+ return (base_url or os.environ.get("ASTRA_BASE_URL") or DEFAULT_BASE_URL).rstrip("/")
23
+
24
+
25
+ class ApiError(Exception):
26
+ """Non-2xx response from the Astra backend."""
27
+
28
+ def __init__(self, status: int, code: str, message: str) -> None:
29
+ super().__init__(f"[{status}] {code}: {message}")
30
+ self.status = status
31
+ self.code = code
32
+ self.message = message
33
+
34
+
35
+ def error_from_response(resp: httpx.Response) -> ApiError:
36
+ detail: dict[str, Any] = {}
37
+ try:
38
+ body = resp.json()
39
+ detail = body.get("detail", {}) if isinstance(body, dict) else {}
40
+ if not isinstance(detail, dict):
41
+ detail = {"message": str(detail)}
42
+ except ValueError:
43
+ pass
44
+ return ApiError(
45
+ resp.status_code,
46
+ detail.get("code", "error"),
47
+ detail.get("message", resp.text[:500]),
48
+ )
49
+
50
+
51
+ class HttpSession:
52
+ """httpx.Client wrapper with bearer auth + bounded retry/backoff.
53
+
54
+ Retries transient failures (connect errors, 429/5xx) with exponential
55
+ backoff capped at `max_backoff`; gives up after `max_attempts` and raises
56
+ the last error. 4xx (except 429) never retries."""
57
+
58
+ def __init__(
59
+ self,
60
+ base_url: str,
61
+ api_key: str,
62
+ *,
63
+ timeout: float = 30.0,
64
+ max_attempts: int = 3,
65
+ max_backoff: float = 60.0,
66
+ ) -> None:
67
+ self.base_url = base_url.rstrip("/")
68
+ self._headers = {"Authorization": f"Bearer {api_key}"}
69
+ self._client = httpx.Client(timeout=timeout)
70
+ self._max_attempts = max(1, max_attempts)
71
+ self._max_backoff = max_backoff
72
+
73
+ def request(
74
+ self,
75
+ method: str,
76
+ path: str,
77
+ *,
78
+ json: Any | None = None,
79
+ headers: dict[str, str] | None = None,
80
+ ) -> httpx.Response:
81
+ url = f"{self.base_url}{path}"
82
+ merged = dict(self._headers)
83
+ if headers:
84
+ merged.update(headers)
85
+ last_exc: Exception | None = None
86
+ for attempt in range(self._max_attempts):
87
+ try:
88
+ resp = self._client.request(method, url, json=json, headers=merged)
89
+ except httpx.HTTPError as exc:
90
+ last_exc = exc
91
+ else:
92
+ if resp.status_code < 400 or resp.status_code == 304:
93
+ return resp
94
+ if resp.status_code not in _RETRYABLE_STATUS:
95
+ raise error_from_response(resp)
96
+ last_exc = error_from_response(resp)
97
+ if attempt < self._max_attempts - 1:
98
+ backoff = min(self._max_backoff, (2 ** attempt) + random.random())
99
+ time.sleep(backoff)
100
+ assert last_exc is not None
101
+ raise last_exc
102
+
103
+ def close(self) -> None:
104
+ self._client.close()