anycloud-sdk 5.17.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .pytest_cache/
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: anycloud-sdk
3
+ Version: 5.17.2
4
+ Summary: Python SDK for anycloud — submit jobs, build DAGs, run workloads on any cloud
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: httpx>=0.27
8
+ Requires-Dist: pydantic>=2.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
11
+ Requires-Dist: pytest>=8.0; extra == 'dev'
12
+ Requires-Dist: respx>=0.21; extra == 'dev'
13
+ Description-Content-Type: text/markdown
14
+
15
+ # anycloud Python SDK
16
+
17
+ Submit jobs, build DAGs, run workloads on any cloud.
18
+
19
+ ## Install
20
+
21
+ ```bash
22
+ pip install anycloud
23
+ ```
24
+
25
+ ## Define jobs as functions
26
+
27
+ ```python
28
+ import anycloud
29
+
30
+ ac = anycloud.Client()
31
+ IMG = anycloud.image("my-training:latest")
32
+
33
+ @ac.job(image=IMG, gpu="h100:8")
34
+ def train(lr: float = 0.001, batch_size: int = 32):
35
+ ...
36
+
37
+ # Submit — function params become env vars (LR=0.01, BATCH_SIZE=32)
38
+ job = train.submit(lr=0.01)
39
+ job.wait()
40
+ print(job.logs())
41
+ ```
42
+
43
+ ## DAGs via `after`
44
+
45
+ ```python
46
+ @ac.job(image=anycloud.image("prep:latest"))
47
+ def preprocess():
48
+ ...
49
+
50
+ @ac.job(image=IMG, gpu="h100:8")
51
+ def train(lr: float = 0.001):
52
+ ...
53
+
54
+ @ac.job(image=anycloud.image("eval:latest"))
55
+ def evaluate():
56
+ ...
57
+
58
+ prep = preprocess()
59
+ t = train.submit(lr=0.01, after=[prep])
60
+ e = evaluate.submit(after=[t])
61
+ e.wait() # preprocess → train → evaluate
62
+ ```
63
+
64
+ ## Fan-out / fan-in
65
+
66
+ ```python
67
+ split = preprocess()
68
+ shards = [train.submit(lr=lr, after=[split]) for lr in [0.1, 0.01, 0.001]]
69
+ best = evaluate.submit(after=shards)
70
+ best.wait() # preprocess → 3× train (parallel) → evaluate
71
+ ```
72
+
73
+ ## Low-level API
74
+
75
+ ```python
76
+ # submit() works without the decorator too
77
+ job = ac.submit("my-image:latest", cloud="aws", gpu="h100:8", env={"LR": "0.01"})
78
+ job.wait()
79
+ ```
@@ -0,0 +1,65 @@
1
+ # anycloud Python SDK
2
+
3
+ Submit jobs, build DAGs, run workloads on any cloud.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install anycloud
9
+ ```
10
+
11
+ ## Define jobs as functions
12
+
13
+ ```python
14
+ import anycloud
15
+
16
+ ac = anycloud.Client()
17
+ IMG = anycloud.image("my-training:latest")
18
+
19
+ @ac.job(image=IMG, gpu="h100:8")
20
+ def train(lr: float = 0.001, batch_size: int = 32):
21
+ ...
22
+
23
+ # Submit — function params become env vars (LR=0.01, BATCH_SIZE=32)
24
+ job = train.submit(lr=0.01)
25
+ job.wait()
26
+ print(job.logs())
27
+ ```
28
+
29
+ ## DAGs via `after`
30
+
31
+ ```python
32
+ @ac.job(image=anycloud.image("prep:latest"))
33
+ def preprocess():
34
+ ...
35
+
36
+ @ac.job(image=IMG, gpu="h100:8")
37
+ def train(lr: float = 0.001):
38
+ ...
39
+
40
+ @ac.job(image=anycloud.image("eval:latest"))
41
+ def evaluate():
42
+ ...
43
+
44
+ prep = preprocess()
45
+ t = train.submit(lr=0.01, after=[prep])
46
+ e = evaluate.submit(after=[t])
47
+ e.wait() # preprocess → train → evaluate
48
+ ```
49
+
50
+ ## Fan-out / fan-in
51
+
52
+ ```python
53
+ split = preprocess()
54
+ shards = [train.submit(lr=lr, after=[split]) for lr in [0.1, 0.01, 0.001]]
55
+ best = evaluate.submit(after=shards)
56
+ best.wait() # preprocess → 3× train (parallel) → evaluate
57
+ ```
58
+
59
+ ## Low-level API
60
+
61
+ ```python
62
+ # submit() works without the decorator too
63
+ job = ac.submit("my-image:latest", cloud="aws", gpu="h100:8", env={"LR": "0.01"})
64
+ job.wait()
65
+ ```
@@ -0,0 +1,50 @@
1
+ """anycloud Python SDK — submit jobs, build DAGs, run workloads on any cloud."""
2
+
3
+ from anycloud.client import Client
4
+ from anycloud.errors import (
5
+ AnyCloudError,
6
+ APIError,
7
+ ConflictError,
8
+ JobFailedError,
9
+ NotFoundError,
10
+ TimeoutError,
11
+ )
12
+ from anycloud.image import Image, image
13
+ from anycloud.job import Job
14
+ from anycloud.remote import RemoteFunction
15
+ from anycloud.types import (
16
+ AWSCredentials,
17
+ AzureCredentials,
18
+ CloudConfig,
19
+ CloudType,
20
+ Deployment,
21
+ DeploymentState,
22
+ DockerOptions,
23
+ GCPCredentials,
24
+ LambdaCredentials,
25
+ )
26
+
27
+ __all__ = [
28
+ "Client",
29
+ "Image",
30
+ "image",
31
+ "Job",
32
+ "RemoteFunction",
33
+ # Types
34
+ "CloudConfig",
35
+ "CloudType",
36
+ "Deployment",
37
+ "DeploymentState",
38
+ "DockerOptions",
39
+ "AWSCredentials",
40
+ "GCPCredentials",
41
+ "AzureCredentials",
42
+ "LambdaCredentials",
43
+ # Errors
44
+ "AnyCloudError",
45
+ "APIError",
46
+ "ConflictError",
47
+ "JobFailedError",
48
+ "NotFoundError",
49
+ "TimeoutError",
50
+ ]
@@ -0,0 +1,342 @@
1
+ """anycloud Client — submit jobs, manage deployments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from importlib.metadata import version as _pkg_version, PackageNotFoundError
8
+ from pathlib import Path
9
+ from typing import Any, Callable
10
+
11
+ import httpx
12
+
13
+ from anycloud.errors import APIError, ConflictError, NotFoundError
14
+ from anycloud.image import Image
15
+ from anycloud.job import Job
16
+ from anycloud.remote import RemoteFunction
17
+ from anycloud.types import (
18
+ CloudConfig,
19
+ Deployment,
20
+ StatusResponse,
21
+ )
22
+
23
+
24
+ def _read_version() -> str:
25
+ """Resolve SDK version: installed package metadata first, then monorepo package.json."""
26
+ try:
27
+ return _pkg_version("anycloud-sdk")
28
+ except PackageNotFoundError:
29
+ pass
30
+ # Development fallback: read from monorepo root package.json
31
+ pkg = Path(__file__).resolve().parent.parent.parent.parent / "package.json"
32
+ try:
33
+ with open(pkg) as f:
34
+ return json.load(f)["version"]
35
+ except (FileNotFoundError, KeyError):
36
+ raise RuntimeError(
37
+ "Could not determine anycloud-sdk version. "
38
+ "Install the package (pip install anycloud-sdk) or run from the monorepo."
39
+ )
40
+
41
+
42
+ _SDK_VERSION = _read_version()
43
+
44
+
45
+ class Client:
46
+ """anycloud Python client.
47
+
48
+ Usage::
49
+
50
+ from anycloud import Client
51
+ from anycloud.types import CloudConfig, AWSCredentials
52
+
53
+ cc = CloudConfig(
54
+ cloudProvider="AWS",
55
+ credentials=AWSCredentials(
56
+ accessKeyId="...", secretAccessKey="...",
57
+ ),
58
+ region="us-west-2",
59
+ spot=True,
60
+ )
61
+
62
+ ac = Client(access_token="ghp_...", cloud_config=cc)
63
+
64
+ # Submit a job — returns a Job (promise/future)
65
+ job = ac.submit("train:latest", gpu="h100:8")
66
+ job.wait()
67
+
68
+ # Chain jobs into a DAG
69
+ prep = ac.submit("prep:latest")
70
+ train = ac.submit("train:latest", gpu="h100:8", after=[prep])
71
+ eval = ac.submit("eval:latest", after=[train])
72
+ eval.wait() # waits for the entire chain
73
+
74
+ Args:
75
+ access_token: GitHub token for authentication.
76
+ Falls back to ``ANYCLOUD_ACCESS_TOKEN`` env var.
77
+ conductor_url: Base URL of the conductor API.
78
+ Falls back to ``ANYCLOUD_CONDUCTOR_URL``, then ``http://localhost:8080``.
79
+ cloud_config: Default ``CloudConfig`` applied to every ``submit()`` call.
80
+ Can be overridden per-submit via ``submit(cloud_config=...)``.
81
+ """
82
+
83
+ def __init__(
84
+ self,
85
+ *,
86
+ access_token: str | None = None,
87
+ conductor_url: str | None = None,
88
+ cloud_config: CloudConfig | None = None,
89
+ ):
90
+ self._access_token = access_token or os.environ.get("ANYCLOUD_ACCESS_TOKEN", "")
91
+ self._base_url = (
92
+ conductor_url
93
+ or os.environ.get("ANYCLOUD_CONDUCTOR_URL", "http://localhost:8080")
94
+ ).rstrip("/")
95
+ self._default_cloud_config = cloud_config
96
+ self._http = httpx.Client(base_url=self._base_url, timeout=30.0)
97
+
98
+ # ------------------------------------------------------------------
99
+ # Decorator: @client.job()
100
+ # ------------------------------------------------------------------
101
+
102
+ def job(
103
+ self,
104
+ *,
105
+ image: Image | str,
106
+ cloud_config: CloudConfig | None = None,
107
+ gpu: str | None = None,
108
+ docker_options: dict[str, Any] | None = None,
109
+ command: list[str] | None = None,
110
+ ) -> Callable:
111
+ """Decorator that turns a function into a submittable anycloud job.
112
+
113
+ The function's parameters (with defaults) become environment variables
114
+ when submitted. The function body is not executed — the Docker image
115
+ is what runs on the cloud.
116
+
117
+ Usage::
118
+
119
+ IMG = anycloud.image("train:latest")
120
+
121
+ @ac.job(image=IMG, cloud_config=my_config, gpu="h100:8")
122
+ def train(lr: float = 0.001, batch_size: int = 32):
123
+ ...
124
+
125
+ job = train.submit(lr=0.01)
126
+ job = train(lr=0.01) # shorthand
127
+ job = train.submit(after=[prep_job]) # DAG
128
+ """
129
+ def decorator(fn: Callable) -> RemoteFunction:
130
+ return RemoteFunction(
131
+ self,
132
+ fn,
133
+ image=image,
134
+ cloud_config=cloud_config,
135
+ gpu=gpu,
136
+ docker_options=docker_options,
137
+ command=command,
138
+ )
139
+ return decorator
140
+
141
+ # ------------------------------------------------------------------
142
+ # Core: submit
143
+ # ------------------------------------------------------------------
144
+
145
+ def submit(
146
+ self,
147
+ image: str,
148
+ *,
149
+ cloud_config: CloudConfig | None = None,
150
+ gpu: str | None = None,
151
+ env: dict[str, str] | None = None,
152
+ docker_options: dict[str, Any] | None = None,
153
+ command: list[str] | None = None,
154
+ persist: bool = False,
155
+ deployment_id: str | None = None,
156
+ image_digest: str | None = None,
157
+ after: list[Job] | None = None,
158
+ ) -> Job:
159
+ """Submit a job and return a ``Job`` promise.
160
+
161
+ If ``after`` is provided, the job is **deferred**: it won't be
162
+ submitted to the conductor until all upstream jobs complete.
163
+ Calling ``wait()`` on a deferred job blocks on the full chain.
164
+
165
+ This lets you build arbitrary DAGs::
166
+
167
+ prep = client.submit("prep:latest", cloud_config=cc)
168
+ train = client.submit("train:latest", cloud_config=cc, gpu="h100:8", after=[prep])
169
+ eval = client.submit("eval:latest", cloud_config=cc, after=[train])
170
+ eval.wait() # prep → train → eval
171
+
172
+ Fan-out / fan-in::
173
+
174
+ split = client.submit("split:latest", cloud_config=cc)
175
+ shards = [client.submit("worker:latest", cloud_config=cc, after=[split]) for _ in range(4)]
176
+ merge = client.submit("merge:latest", cloud_config=cc, after=shards)
177
+ merge.wait() # split → 4× worker → merge
178
+
179
+ Args:
180
+ image: Docker image reference (e.g. ``"train:latest"``).
181
+ cloud_config: ``CloudConfig`` specifying provider, credentials, region, etc.
182
+ Falls back to the default set on ``Client(cloud_config=...)``.
183
+ gpu: GPU type shorthand (e.g. ``"h100:8"``).
184
+ env: Environment variables passed to the container.
185
+ docker_options: Docker runtime options (shmSize, gpus, etc.).
186
+ command: Override container CMD.
187
+ persist: Keep VM alive after job completion.
188
+ deployment_id: Custom deployment ID (auto-generated if omitted).
189
+ image_digest: Docker image digest (e.g. ``"sha256:abc..."``).
190
+ after: List of upstream ``Job`` objects that must complete first.
191
+
192
+ Returns:
193
+ A ``Job`` handle you can poll, wait on, or pass as a dependency.
194
+ """
195
+ body = self._build_request_body(
196
+ image=image,
197
+ gpu=gpu,
198
+ env=env,
199
+ docker_options=docker_options,
200
+ command=command,
201
+ persist=persist,
202
+ deployment_id=deployment_id,
203
+ cloud_config=cloud_config,
204
+ image_digest=image_digest,
205
+ )
206
+
207
+ if after:
208
+ # Deferred: don't submit yet, wait for deps on .wait()
209
+ job = Job(self, deployment_id or "(deferred)", after=after)
210
+ job._submit_kwargs = body
211
+ return job
212
+
213
+ # Immediate: submit now
214
+ data = self._submit_raw(body)
215
+ return Job(self, data["id"])
216
+
217
+ # ------------------------------------------------------------------
218
+ # Deployment management
219
+ # ------------------------------------------------------------------
220
+
221
+ def list(self, *, limit: int = 20) -> list[Deployment]:
222
+ """List recent deployments."""
223
+ data = self._post("/v1/list", {
224
+ "accessToken": self._access_token,
225
+ "version": _SDK_VERSION,
226
+ "limit": limit,
227
+ })
228
+ return [Deployment.model_validate(d) for d in data]
229
+
230
+ def get(self, deployment_id: str) -> Job:
231
+ """Get a ``Job`` handle for an existing deployment."""
232
+ return Job(self, deployment_id)
233
+
234
+ # ------------------------------------------------------------------
235
+ # Internal API calls (used by Job)
236
+ # ------------------------------------------------------------------
237
+
238
+ def _submit_raw(self, body: dict[str, Any]) -> dict[str, Any]:
239
+ """POST /v1/new and return the response dict."""
240
+ return self._post("/v1/new", body)
241
+
242
+ def _status(self, deployment_id: str, *, verbose: bool = False) -> StatusResponse:
243
+ body: dict[str, Any] = {
244
+ "id": deployment_id,
245
+ "accessToken": self._access_token,
246
+ "version": _SDK_VERSION,
247
+ }
248
+ if verbose:
249
+ body["verbose"] = True
250
+ data = self._post("/v1/status", body)
251
+ return StatusResponse.model_validate(data)
252
+
253
+ def _terminate(self, deployment_id: str) -> None:
254
+ self._post("/v1/terminate", {
255
+ "id": deployment_id,
256
+ "accessToken": self._access_token,
257
+ "version": _SDK_VERSION,
258
+ })
259
+
260
+ def _resubmit(self, deployment_id: str) -> None:
261
+ self._post("/v1/resubmit", {
262
+ "id": deployment_id,
263
+ "accessToken": self._access_token,
264
+ "version": _SDK_VERSION,
265
+ })
266
+
267
+ # ------------------------------------------------------------------
268
+ # HTTP
269
+ # ------------------------------------------------------------------
270
+
271
+ def _post(self, path: str, body: dict[str, Any]) -> Any:
272
+ resp = self._http.post(path, json=body)
273
+ if resp.status_code == 409:
274
+ raise ConflictError(resp.text)
275
+ if resp.status_code == 404:
276
+ raise NotFoundError(resp.text)
277
+ if resp.status_code >= 400:
278
+ # /v1/status returns 400 (not 404) for missing deployments
279
+ if "not found" in resp.text.lower():
280
+ raise NotFoundError(resp.text)
281
+ raise APIError(resp.status_code, resp.text)
282
+ return resp.json()
283
+
284
+ # ------------------------------------------------------------------
285
+ # Helpers
286
+ # ------------------------------------------------------------------
287
+
288
+ def _build_request_body(
289
+ self,
290
+ *,
291
+ image: str,
292
+ gpu: str | None,
293
+ env: dict[str, str] | None,
294
+ docker_options: dict[str, Any] | None,
295
+ command: list[str] | None,
296
+ persist: bool,
297
+ deployment_id: str | None,
298
+ cloud_config: CloudConfig | None,
299
+ image_digest: str | None = None,
300
+ ) -> dict[str, Any]:
301
+ cc = cloud_config or self._default_cloud_config
302
+ if cc is None:
303
+ raise ValueError(
304
+ "A cloud config is required. Pass cloud_config=CloudConfig(...) "
305
+ "or set a default via Client(cloud_config=...)."
306
+ )
307
+
308
+ body: dict[str, Any] = {
309
+ "image": image,
310
+ "deploymentType": "job",
311
+ "version": _SDK_VERSION,
312
+ "accessToken": self._access_token,
313
+ "cloudConfig": cc.model_dump(by_alias=True, exclude_none=True),
314
+ }
315
+
316
+ if deployment_id is not None:
317
+ body["id"] = deployment_id
318
+ if env is not None:
319
+ body["env"] = env
320
+ if persist:
321
+ body["persist"] = True
322
+ if docker_options is not None:
323
+ body["dockerOptions"] = docker_options
324
+ if command is not None:
325
+ body["command"] = command
326
+ if gpu is not None:
327
+ body["gpuType"] = gpu
328
+ if image_digest is not None:
329
+ body["imageDigest"] = image_digest
330
+
331
+ return body
332
+
333
+ def close(self) -> None:
334
+ self._http.close()
335
+
336
+ def __enter__(self) -> Client:
337
+ return self
338
+
339
+ def __exit__(self, *exc: Any) -> None:
340
+ self.close()
341
+
342
+
@@ -0,0 +1,52 @@
1
+ """anycloud SDK exceptions."""
2
+
3
+
4
+ class AnyCloudError(Exception):
5
+ """Base exception for anycloud SDK."""
6
+
7
+
8
+ class APIError(AnyCloudError):
9
+ """HTTP error from the conductor API."""
10
+
11
+ def __init__(self, status_code: int, message: str):
12
+ self.status_code = status_code
13
+ super().__init__(f"[{status_code}] {message}")
14
+
15
+
16
+ class ConflictError(APIError):
17
+ """Deployment ID already exists (409)."""
18
+
19
+ def __init__(self, message: str = "Deployment ID already exists"):
20
+ super().__init__(409, message)
21
+
22
+
23
+ class NotFoundError(APIError):
24
+ """Deployment not found (404)."""
25
+
26
+ def __init__(self, message: str = "Deployment not found"):
27
+ super().__init__(404, message)
28
+
29
+
30
+ class JobFailedError(AnyCloudError):
31
+ """Job reached a terminal failure state."""
32
+
33
+ def __init__(self, job_id: str, state: str, message: str | None = None):
34
+ self.job_id = job_id
35
+ self.state = state
36
+ detail = f": {message}" if message else ""
37
+ super().__init__(f"Job {job_id} {state}{detail}")
38
+
39
+
40
+ class DAGError(AnyCloudError):
41
+ """Error in DAG construction or execution."""
42
+
43
+
44
+ class CycleError(DAGError):
45
+ """DAG contains a cycle."""
46
+
47
+ def __init__(self):
48
+ super().__init__("DAG contains a cycle")
49
+
50
+
51
+ class TimeoutError(AnyCloudError):
52
+ """Operation timed out."""
@@ -0,0 +1,31 @@
1
+ """Image reference for anycloud jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class Image:
7
+ """A reference to a Docker image.
8
+
9
+ Usage::
10
+
11
+ IMG = anycloud.image("pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime")
12
+
13
+ @ac.job(image=IMG, gpu="h100:8")
14
+ def train(lr: float = 0.001):
15
+ ...
16
+ """
17
+
18
+ def __init__(self, ref: str):
19
+ self.ref = ref
20
+
21
+ def __repr__(self) -> str:
22
+ return f"Image({self.ref!r})"
23
+
24
+
25
+ def image(ref: str) -> Image:
26
+ """Create an image reference.
27
+
28
+ Args:
29
+ ref: Docker image reference (e.g. ``"pytorch/pytorch:2.1.0"``).
30
+ """
31
+ return Image(ref)