anycloud-sdk 5.17.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anycloud_sdk-5.17.2/.gitignore +7 -0
- anycloud_sdk-5.17.2/PKG-INFO +79 -0
- anycloud_sdk-5.17.2/README.md +65 -0
- anycloud_sdk-5.17.2/anycloud/__init__.py +50 -0
- anycloud_sdk-5.17.2/anycloud/client.py +342 -0
- anycloud_sdk-5.17.2/anycloud/errors.py +52 -0
- anycloud_sdk-5.17.2/anycloud/image.py +31 -0
- anycloud_sdk-5.17.2/anycloud/job.py +206 -0
- anycloud_sdk-5.17.2/anycloud/py.typed +0 -0
- anycloud_sdk-5.17.2/anycloud/remote.py +115 -0
- anycloud_sdk-5.17.2/anycloud/types.py +184 -0
- anycloud_sdk-5.17.2/pyproject.toml +25 -0
- anycloud_sdk-5.17.2/tests/__init__.py +0 -0
- anycloud_sdk-5.17.2/tests/test_client.py +527 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: anycloud-sdk
|
|
3
|
+
Version: 5.17.2
|
|
4
|
+
Summary: Python SDK for anycloud — submit jobs, build DAGs, run workloads on any cloud
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: httpx>=0.27
|
|
8
|
+
Requires-Dist: pydantic>=2.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
12
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# anycloud Python SDK
|
|
16
|
+
|
|
17
|
+
Submit jobs, build DAGs, run workloads on any cloud.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install anycloud
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Define jobs as functions
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import anycloud
|
|
29
|
+
|
|
30
|
+
ac = anycloud.Client()
|
|
31
|
+
IMG = anycloud.image("my-training:latest")
|
|
32
|
+
|
|
33
|
+
@ac.job(image=IMG, gpu="h100:8")
|
|
34
|
+
def train(lr: float = 0.001, batch_size: int = 32):
|
|
35
|
+
...
|
|
36
|
+
|
|
37
|
+
# Submit — function params become env vars (LR=0.01, BATCH_SIZE=32)
|
|
38
|
+
job = train.submit(lr=0.01)
|
|
39
|
+
job.wait()
|
|
40
|
+
print(job.logs())
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## DAGs via `after`
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
@ac.job(image=anycloud.image("prep:latest"))
|
|
47
|
+
def preprocess():
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
@ac.job(image=IMG, gpu="h100:8")
|
|
51
|
+
def train(lr: float = 0.001):
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
@ac.job(image=anycloud.image("eval:latest"))
|
|
55
|
+
def evaluate():
|
|
56
|
+
...
|
|
57
|
+
|
|
58
|
+
prep = preprocess()
|
|
59
|
+
t = train.submit(lr=0.01, after=[prep])
|
|
60
|
+
e = evaluate.submit(after=[t])
|
|
61
|
+
e.wait() # preprocess → train → evaluate
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Fan-out / fan-in
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
split = preprocess()
|
|
68
|
+
shards = [train.submit(lr=lr, after=[split]) for lr in [0.1, 0.01, 0.001]]
|
|
69
|
+
best = evaluate.submit(after=shards)
|
|
70
|
+
best.wait() # preprocess → 3× train (parallel) → evaluate
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Low-level API
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
# submit() works without the decorator too
|
|
77
|
+
job = ac.submit("my-image:latest", cloud="aws", gpu="h100:8", env={"LR": "0.01"})
|
|
78
|
+
job.wait()
|
|
79
|
+
```
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# anycloud Python SDK
|
|
2
|
+
|
|
3
|
+
Submit jobs, build DAGs, run workloads on any cloud.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install anycloud
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Define jobs as functions
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import anycloud
|
|
15
|
+
|
|
16
|
+
ac = anycloud.Client()
|
|
17
|
+
IMG = anycloud.image("my-training:latest")
|
|
18
|
+
|
|
19
|
+
@ac.job(image=IMG, gpu="h100:8")
|
|
20
|
+
def train(lr: float = 0.001, batch_size: int = 32):
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
# Submit — function params become env vars (LR=0.01, BATCH_SIZE=32)
|
|
24
|
+
job = train.submit(lr=0.01)
|
|
25
|
+
job.wait()
|
|
26
|
+
print(job.logs())
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## DAGs via `after`
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
@ac.job(image=anycloud.image("prep:latest"))
|
|
33
|
+
def preprocess():
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
@ac.job(image=IMG, gpu="h100:8")
|
|
37
|
+
def train(lr: float = 0.001):
|
|
38
|
+
...
|
|
39
|
+
|
|
40
|
+
@ac.job(image=anycloud.image("eval:latest"))
|
|
41
|
+
def evaluate():
|
|
42
|
+
...
|
|
43
|
+
|
|
44
|
+
prep = preprocess()
|
|
45
|
+
t = train.submit(lr=0.01, after=[prep])
|
|
46
|
+
e = evaluate.submit(after=[t])
|
|
47
|
+
e.wait() # preprocess → train → evaluate
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Fan-out / fan-in
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
split = preprocess()
|
|
54
|
+
shards = [train.submit(lr=lr, after=[split]) for lr in [0.1, 0.01, 0.001]]
|
|
55
|
+
best = evaluate.submit(after=shards)
|
|
56
|
+
best.wait() # preprocess → 3× train (parallel) → evaluate
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Low-level API
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
# submit() works without the decorator too
|
|
63
|
+
job = ac.submit("my-image:latest", cloud="aws", gpu="h100:8", env={"LR": "0.01"})
|
|
64
|
+
job.wait()
|
|
65
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""anycloud Python SDK — submit jobs, build DAGs, run workloads on any cloud."""
|
|
2
|
+
|
|
3
|
+
from anycloud.client import Client
|
|
4
|
+
from anycloud.errors import (
|
|
5
|
+
AnyCloudError,
|
|
6
|
+
APIError,
|
|
7
|
+
ConflictError,
|
|
8
|
+
JobFailedError,
|
|
9
|
+
NotFoundError,
|
|
10
|
+
TimeoutError,
|
|
11
|
+
)
|
|
12
|
+
from anycloud.image import Image, image
|
|
13
|
+
from anycloud.job import Job
|
|
14
|
+
from anycloud.remote import RemoteFunction
|
|
15
|
+
from anycloud.types import (
|
|
16
|
+
AWSCredentials,
|
|
17
|
+
AzureCredentials,
|
|
18
|
+
CloudConfig,
|
|
19
|
+
CloudType,
|
|
20
|
+
Deployment,
|
|
21
|
+
DeploymentState,
|
|
22
|
+
DockerOptions,
|
|
23
|
+
GCPCredentials,
|
|
24
|
+
LambdaCredentials,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"Client",
|
|
29
|
+
"Image",
|
|
30
|
+
"image",
|
|
31
|
+
"Job",
|
|
32
|
+
"RemoteFunction",
|
|
33
|
+
# Types
|
|
34
|
+
"CloudConfig",
|
|
35
|
+
"CloudType",
|
|
36
|
+
"Deployment",
|
|
37
|
+
"DeploymentState",
|
|
38
|
+
"DockerOptions",
|
|
39
|
+
"AWSCredentials",
|
|
40
|
+
"GCPCredentials",
|
|
41
|
+
"AzureCredentials",
|
|
42
|
+
"LambdaCredentials",
|
|
43
|
+
# Errors
|
|
44
|
+
"AnyCloudError",
|
|
45
|
+
"APIError",
|
|
46
|
+
"ConflictError",
|
|
47
|
+
"JobFailedError",
|
|
48
|
+
"NotFoundError",
|
|
49
|
+
"TimeoutError",
|
|
50
|
+
]
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
"""anycloud Client — submit jobs, manage deployments."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from importlib.metadata import version as _pkg_version, PackageNotFoundError
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Callable
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from anycloud.errors import APIError, ConflictError, NotFoundError
|
|
14
|
+
from anycloud.image import Image
|
|
15
|
+
from anycloud.job import Job
|
|
16
|
+
from anycloud.remote import RemoteFunction
|
|
17
|
+
from anycloud.types import (
|
|
18
|
+
CloudConfig,
|
|
19
|
+
Deployment,
|
|
20
|
+
StatusResponse,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _read_version() -> str:
|
|
25
|
+
"""Resolve SDK version: installed package metadata first, then monorepo package.json."""
|
|
26
|
+
try:
|
|
27
|
+
return _pkg_version("anycloud-sdk")
|
|
28
|
+
except PackageNotFoundError:
|
|
29
|
+
pass
|
|
30
|
+
# Development fallback: read from monorepo root package.json
|
|
31
|
+
pkg = Path(__file__).resolve().parent.parent.parent.parent / "package.json"
|
|
32
|
+
try:
|
|
33
|
+
with open(pkg) as f:
|
|
34
|
+
return json.load(f)["version"]
|
|
35
|
+
except (FileNotFoundError, KeyError):
|
|
36
|
+
raise RuntimeError(
|
|
37
|
+
"Could not determine anycloud-sdk version. "
|
|
38
|
+
"Install the package (pip install anycloud-sdk) or run from the monorepo."
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_SDK_VERSION = _read_version()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Client:
|
|
46
|
+
"""anycloud Python client.
|
|
47
|
+
|
|
48
|
+
Usage::
|
|
49
|
+
|
|
50
|
+
from anycloud import Client
|
|
51
|
+
from anycloud.types import CloudConfig, AWSCredentials
|
|
52
|
+
|
|
53
|
+
cc = CloudConfig(
|
|
54
|
+
cloudProvider="AWS",
|
|
55
|
+
credentials=AWSCredentials(
|
|
56
|
+
accessKeyId="...", secretAccessKey="...",
|
|
57
|
+
),
|
|
58
|
+
region="us-west-2",
|
|
59
|
+
spot=True,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
ac = Client(access_token="ghp_...", cloud_config=cc)
|
|
63
|
+
|
|
64
|
+
# Submit a job — returns a Job (promise/future)
|
|
65
|
+
job = ac.submit("train:latest", gpu="h100:8")
|
|
66
|
+
job.wait()
|
|
67
|
+
|
|
68
|
+
# Chain jobs into a DAG
|
|
69
|
+
prep = ac.submit("prep:latest")
|
|
70
|
+
train = ac.submit("train:latest", gpu="h100:8", after=[prep])
|
|
71
|
+
eval = ac.submit("eval:latest", after=[train])
|
|
72
|
+
eval.wait() # waits for the entire chain
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
access_token: GitHub token for authentication.
|
|
76
|
+
Falls back to ``ANYCLOUD_ACCESS_TOKEN`` env var.
|
|
77
|
+
conductor_url: Base URL of the conductor API.
|
|
78
|
+
Falls back to ``ANYCLOUD_CONDUCTOR_URL``, then ``http://localhost:8080``.
|
|
79
|
+
cloud_config: Default ``CloudConfig`` applied to every ``submit()`` call.
|
|
80
|
+
Can be overridden per-submit via ``submit(cloud_config=...)``.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
*,
|
|
86
|
+
access_token: str | None = None,
|
|
87
|
+
conductor_url: str | None = None,
|
|
88
|
+
cloud_config: CloudConfig | None = None,
|
|
89
|
+
):
|
|
90
|
+
self._access_token = access_token or os.environ.get("ANYCLOUD_ACCESS_TOKEN", "")
|
|
91
|
+
self._base_url = (
|
|
92
|
+
conductor_url
|
|
93
|
+
or os.environ.get("ANYCLOUD_CONDUCTOR_URL", "http://localhost:8080")
|
|
94
|
+
).rstrip("/")
|
|
95
|
+
self._default_cloud_config = cloud_config
|
|
96
|
+
self._http = httpx.Client(base_url=self._base_url, timeout=30.0)
|
|
97
|
+
|
|
98
|
+
# ------------------------------------------------------------------
|
|
99
|
+
# Decorator: @client.job()
|
|
100
|
+
# ------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
def job(
|
|
103
|
+
self,
|
|
104
|
+
*,
|
|
105
|
+
image: Image | str,
|
|
106
|
+
cloud_config: CloudConfig | None = None,
|
|
107
|
+
gpu: str | None = None,
|
|
108
|
+
docker_options: dict[str, Any] | None = None,
|
|
109
|
+
command: list[str] | None = None,
|
|
110
|
+
) -> Callable:
|
|
111
|
+
"""Decorator that turns a function into a submittable anycloud job.
|
|
112
|
+
|
|
113
|
+
The function's parameters (with defaults) become environment variables
|
|
114
|
+
when submitted. The function body is not executed — the Docker image
|
|
115
|
+
is what runs on the cloud.
|
|
116
|
+
|
|
117
|
+
Usage::
|
|
118
|
+
|
|
119
|
+
IMG = anycloud.image("train:latest")
|
|
120
|
+
|
|
121
|
+
@ac.job(image=IMG, cloud_config=my_config, gpu="h100:8")
|
|
122
|
+
def train(lr: float = 0.001, batch_size: int = 32):
|
|
123
|
+
...
|
|
124
|
+
|
|
125
|
+
job = train.submit(lr=0.01)
|
|
126
|
+
job = train(lr=0.01) # shorthand
|
|
127
|
+
job = train.submit(after=[prep_job]) # DAG
|
|
128
|
+
"""
|
|
129
|
+
def decorator(fn: Callable) -> RemoteFunction:
|
|
130
|
+
return RemoteFunction(
|
|
131
|
+
self,
|
|
132
|
+
fn,
|
|
133
|
+
image=image,
|
|
134
|
+
cloud_config=cloud_config,
|
|
135
|
+
gpu=gpu,
|
|
136
|
+
docker_options=docker_options,
|
|
137
|
+
command=command,
|
|
138
|
+
)
|
|
139
|
+
return decorator
|
|
140
|
+
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
# Core: submit
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
def submit(
|
|
146
|
+
self,
|
|
147
|
+
image: str,
|
|
148
|
+
*,
|
|
149
|
+
cloud_config: CloudConfig | None = None,
|
|
150
|
+
gpu: str | None = None,
|
|
151
|
+
env: dict[str, str] | None = None,
|
|
152
|
+
docker_options: dict[str, Any] | None = None,
|
|
153
|
+
command: list[str] | None = None,
|
|
154
|
+
persist: bool = False,
|
|
155
|
+
deployment_id: str | None = None,
|
|
156
|
+
image_digest: str | None = None,
|
|
157
|
+
after: list[Job] | None = None,
|
|
158
|
+
) -> Job:
|
|
159
|
+
"""Submit a job and return a ``Job`` promise.
|
|
160
|
+
|
|
161
|
+
If ``after`` is provided, the job is **deferred**: it won't be
|
|
162
|
+
submitted to the conductor until all upstream jobs complete.
|
|
163
|
+
Calling ``wait()`` on a deferred job blocks on the full chain.
|
|
164
|
+
|
|
165
|
+
This lets you build arbitrary DAGs::
|
|
166
|
+
|
|
167
|
+
prep = client.submit("prep:latest", cloud_config=cc)
|
|
168
|
+
train = client.submit("train:latest", cloud_config=cc, gpu="h100:8", after=[prep])
|
|
169
|
+
eval = client.submit("eval:latest", cloud_config=cc, after=[train])
|
|
170
|
+
eval.wait() # prep → train → eval
|
|
171
|
+
|
|
172
|
+
Fan-out / fan-in::
|
|
173
|
+
|
|
174
|
+
split = client.submit("split:latest", cloud_config=cc)
|
|
175
|
+
shards = [client.submit("worker:latest", cloud_config=cc, after=[split]) for _ in range(4)]
|
|
176
|
+
merge = client.submit("merge:latest", cloud_config=cc, after=shards)
|
|
177
|
+
merge.wait() # split → 4× worker → merge
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
image: Docker image reference (e.g. ``"train:latest"``).
|
|
181
|
+
cloud_config: ``CloudConfig`` specifying provider, credentials, region, etc.
|
|
182
|
+
Falls back to the default set on ``Client(cloud_config=...)``.
|
|
183
|
+
gpu: GPU type shorthand (e.g. ``"h100:8"``).
|
|
184
|
+
env: Environment variables passed to the container.
|
|
185
|
+
docker_options: Docker runtime options (shmSize, gpus, etc.).
|
|
186
|
+
command: Override container CMD.
|
|
187
|
+
persist: Keep VM alive after job completion.
|
|
188
|
+
deployment_id: Custom deployment ID (auto-generated if omitted).
|
|
189
|
+
image_digest: Docker image digest (e.g. ``"sha256:abc..."``).
|
|
190
|
+
after: List of upstream ``Job`` objects that must complete first.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
A ``Job`` handle you can poll, wait on, or pass as a dependency.
|
|
194
|
+
"""
|
|
195
|
+
body = self._build_request_body(
|
|
196
|
+
image=image,
|
|
197
|
+
gpu=gpu,
|
|
198
|
+
env=env,
|
|
199
|
+
docker_options=docker_options,
|
|
200
|
+
command=command,
|
|
201
|
+
persist=persist,
|
|
202
|
+
deployment_id=deployment_id,
|
|
203
|
+
cloud_config=cloud_config,
|
|
204
|
+
image_digest=image_digest,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if after:
|
|
208
|
+
# Deferred: don't submit yet, wait for deps on .wait()
|
|
209
|
+
job = Job(self, deployment_id or "(deferred)", after=after)
|
|
210
|
+
job._submit_kwargs = body
|
|
211
|
+
return job
|
|
212
|
+
|
|
213
|
+
# Immediate: submit now
|
|
214
|
+
data = self._submit_raw(body)
|
|
215
|
+
return Job(self, data["id"])
|
|
216
|
+
|
|
217
|
+
# ------------------------------------------------------------------
|
|
218
|
+
# Deployment management
|
|
219
|
+
# ------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
def list(self, *, limit: int = 20) -> list[Deployment]:
|
|
222
|
+
"""List recent deployments."""
|
|
223
|
+
data = self._post("/v1/list", {
|
|
224
|
+
"accessToken": self._access_token,
|
|
225
|
+
"version": _SDK_VERSION,
|
|
226
|
+
"limit": limit,
|
|
227
|
+
})
|
|
228
|
+
return [Deployment.model_validate(d) for d in data]
|
|
229
|
+
|
|
230
|
+
def get(self, deployment_id: str) -> Job:
|
|
231
|
+
"""Get a ``Job`` handle for an existing deployment."""
|
|
232
|
+
return Job(self, deployment_id)
|
|
233
|
+
|
|
234
|
+
# ------------------------------------------------------------------
|
|
235
|
+
# Internal API calls (used by Job)
|
|
236
|
+
# ------------------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
def _submit_raw(self, body: dict[str, Any]) -> dict[str, Any]:
|
|
239
|
+
"""POST /v1/new and return the response dict."""
|
|
240
|
+
return self._post("/v1/new", body)
|
|
241
|
+
|
|
242
|
+
def _status(self, deployment_id: str, *, verbose: bool = False) -> StatusResponse:
|
|
243
|
+
body: dict[str, Any] = {
|
|
244
|
+
"id": deployment_id,
|
|
245
|
+
"accessToken": self._access_token,
|
|
246
|
+
"version": _SDK_VERSION,
|
|
247
|
+
}
|
|
248
|
+
if verbose:
|
|
249
|
+
body["verbose"] = True
|
|
250
|
+
data = self._post("/v1/status", body)
|
|
251
|
+
return StatusResponse.model_validate(data)
|
|
252
|
+
|
|
253
|
+
def _terminate(self, deployment_id: str) -> None:
|
|
254
|
+
self._post("/v1/terminate", {
|
|
255
|
+
"id": deployment_id,
|
|
256
|
+
"accessToken": self._access_token,
|
|
257
|
+
"version": _SDK_VERSION,
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
def _resubmit(self, deployment_id: str) -> None:
|
|
261
|
+
self._post("/v1/resubmit", {
|
|
262
|
+
"id": deployment_id,
|
|
263
|
+
"accessToken": self._access_token,
|
|
264
|
+
"version": _SDK_VERSION,
|
|
265
|
+
})
|
|
266
|
+
|
|
267
|
+
# ------------------------------------------------------------------
|
|
268
|
+
# HTTP
|
|
269
|
+
# ------------------------------------------------------------------
|
|
270
|
+
|
|
271
|
+
def _post(self, path: str, body: dict[str, Any]) -> Any:
|
|
272
|
+
resp = self._http.post(path, json=body)
|
|
273
|
+
if resp.status_code == 409:
|
|
274
|
+
raise ConflictError(resp.text)
|
|
275
|
+
if resp.status_code == 404:
|
|
276
|
+
raise NotFoundError(resp.text)
|
|
277
|
+
if resp.status_code >= 400:
|
|
278
|
+
# /v1/status returns 400 (not 404) for missing deployments
|
|
279
|
+
if "not found" in resp.text.lower():
|
|
280
|
+
raise NotFoundError(resp.text)
|
|
281
|
+
raise APIError(resp.status_code, resp.text)
|
|
282
|
+
return resp.json()
|
|
283
|
+
|
|
284
|
+
# ------------------------------------------------------------------
|
|
285
|
+
# Helpers
|
|
286
|
+
# ------------------------------------------------------------------
|
|
287
|
+
|
|
288
|
+
def _build_request_body(
|
|
289
|
+
self,
|
|
290
|
+
*,
|
|
291
|
+
image: str,
|
|
292
|
+
gpu: str | None,
|
|
293
|
+
env: dict[str, str] | None,
|
|
294
|
+
docker_options: dict[str, Any] | None,
|
|
295
|
+
command: list[str] | None,
|
|
296
|
+
persist: bool,
|
|
297
|
+
deployment_id: str | None,
|
|
298
|
+
cloud_config: CloudConfig | None,
|
|
299
|
+
image_digest: str | None = None,
|
|
300
|
+
) -> dict[str, Any]:
|
|
301
|
+
cc = cloud_config or self._default_cloud_config
|
|
302
|
+
if cc is None:
|
|
303
|
+
raise ValueError(
|
|
304
|
+
"A cloud config is required. Pass cloud_config=CloudConfig(...) "
|
|
305
|
+
"or set a default via Client(cloud_config=...)."
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
body: dict[str, Any] = {
|
|
309
|
+
"image": image,
|
|
310
|
+
"deploymentType": "job",
|
|
311
|
+
"version": _SDK_VERSION,
|
|
312
|
+
"accessToken": self._access_token,
|
|
313
|
+
"cloudConfig": cc.model_dump(by_alias=True, exclude_none=True),
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if deployment_id is not None:
|
|
317
|
+
body["id"] = deployment_id
|
|
318
|
+
if env is not None:
|
|
319
|
+
body["env"] = env
|
|
320
|
+
if persist:
|
|
321
|
+
body["persist"] = True
|
|
322
|
+
if docker_options is not None:
|
|
323
|
+
body["dockerOptions"] = docker_options
|
|
324
|
+
if command is not None:
|
|
325
|
+
body["command"] = command
|
|
326
|
+
if gpu is not None:
|
|
327
|
+
body["gpuType"] = gpu
|
|
328
|
+
if image_digest is not None:
|
|
329
|
+
body["imageDigest"] = image_digest
|
|
330
|
+
|
|
331
|
+
return body
|
|
332
|
+
|
|
333
|
+
def close(self) -> None:
|
|
334
|
+
self._http.close()
|
|
335
|
+
|
|
336
|
+
def __enter__(self) -> Client:
|
|
337
|
+
return self
|
|
338
|
+
|
|
339
|
+
def __exit__(self, *exc: Any) -> None:
|
|
340
|
+
self.close()
|
|
341
|
+
|
|
342
|
+
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""anycloud SDK exceptions."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AnyCloudError(Exception):
|
|
5
|
+
"""Base exception for anycloud SDK."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class APIError(AnyCloudError):
|
|
9
|
+
"""HTTP error from the conductor API."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, status_code: int, message: str):
|
|
12
|
+
self.status_code = status_code
|
|
13
|
+
super().__init__(f"[{status_code}] {message}")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ConflictError(APIError):
|
|
17
|
+
"""Deployment ID already exists (409)."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, message: str = "Deployment ID already exists"):
|
|
20
|
+
super().__init__(409, message)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class NotFoundError(APIError):
|
|
24
|
+
"""Deployment not found (404)."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, message: str = "Deployment not found"):
|
|
27
|
+
super().__init__(404, message)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class JobFailedError(AnyCloudError):
|
|
31
|
+
"""Job reached a terminal failure state."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, job_id: str, state: str, message: str | None = None):
|
|
34
|
+
self.job_id = job_id
|
|
35
|
+
self.state = state
|
|
36
|
+
detail = f": {message}" if message else ""
|
|
37
|
+
super().__init__(f"Job {job_id} {state}{detail}")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DAGError(AnyCloudError):
|
|
41
|
+
"""Error in DAG construction or execution."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CycleError(DAGError):
|
|
45
|
+
"""DAG contains a cycle."""
|
|
46
|
+
|
|
47
|
+
def __init__(self):
|
|
48
|
+
super().__init__("DAG contains a cycle")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class TimeoutError(AnyCloudError):
|
|
52
|
+
"""Operation timed out."""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Image reference for anycloud jobs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Image:
|
|
7
|
+
"""A reference to a Docker image.
|
|
8
|
+
|
|
9
|
+
Usage::
|
|
10
|
+
|
|
11
|
+
IMG = anycloud.image("pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime")
|
|
12
|
+
|
|
13
|
+
@ac.job(image=IMG, gpu="h100:8")
|
|
14
|
+
def train(lr: float = 0.001):
|
|
15
|
+
...
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, ref: str):
|
|
19
|
+
self.ref = ref
|
|
20
|
+
|
|
21
|
+
def __repr__(self) -> str:
|
|
22
|
+
return f"Image({self.ref!r})"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def image(ref: str) -> Image:
|
|
26
|
+
"""Create an image reference.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
ref: Docker image reference (e.g. ``"pytorch/pytorch:2.1.0"``).
|
|
30
|
+
"""
|
|
31
|
+
return Image(ref)
|