beaker-py 2.0.0.dev0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. beaker_py-2.0.1/PKG-INFO +133 -0
  2. beaker_py-2.0.1/README.md +97 -0
  3. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/pyproject.toml +4 -5
  4. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/__init__.py +34 -0
  5. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_cluster.py +1 -1
  6. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_dataset.py +117 -8
  7. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_experiment.py +8 -5
  8. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_group.py +1 -1
  9. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_image.py +1 -1
  10. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_job.py +21 -9
  11. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_node.py +1 -1
  12. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_secret.py +1 -1
  13. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_service_client.py +108 -35
  14. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_user.py +1 -1
  15. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_workload.py +30 -12
  16. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_workspace.py +1 -1
  17. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/client.py +82 -49
  18. beaker_py-2.0.1/src/beaker/common.py +71 -0
  19. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/exceptions.py +7 -2
  20. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/types.py +95 -7
  21. beaker_py-2.0.1/src/beaker/version.py +1 -0
  22. beaker_py-2.0.1/src/beaker_py.egg-info/PKG-INFO +133 -0
  23. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker_py.egg-info/requires.txt +2 -2
  24. beaker_py-2.0.0.dev0/PKG-INFO +0 -80
  25. beaker_py-2.0.0.dev0/README.md +0 -42
  26. beaker_py-2.0.0.dev0/src/beaker/common.py +0 -66
  27. beaker_py-2.0.0.dev0/src/beaker/version.py +0 -1
  28. beaker_py-2.0.0.dev0/src/beaker_py.egg-info/PKG-INFO +0 -80
  29. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/setup.cfg +0 -0
  30. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/_organization.py +0 -0
  31. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/beaker_pb2.py +0 -0
  32. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/beaker_pb2.pyi +0 -0
  33. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/beaker_pb2_grpc.py +0 -0
  34. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/config.py +0 -0
  35. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker/py.typed +0 -0
  36. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker_py.egg-info/SOURCES.txt +0 -0
  37. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker_py.egg-info/dependency_links.txt +0 -0
  38. {beaker_py-2.0.0.dev0 → beaker_py-2.0.1}/src/beaker_py.egg-info/top_level.txt +0 -0
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: beaker-py
3
+ Version: 2.0.1
4
+ Summary: A Python Beaker client
5
+ Author-email: Allen Institute for Artificial Intelligence <contact@allenai.org>, Pete Walsh <petew@allenai.org>
6
+ Project-URL: Homepage, https://github.com/allenai/beaker
7
+ Project-URL: Repository, https://github.com/allenai/beaker
8
+ Project-URL: Changelog, https://github.com/allenai/beaker/blob/main/bindings/python/CHANGELOG.md
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: requests
15
+ Requires-Dist: packaging
16
+ Requires-Dist: PyYAML
17
+ Requires-Dist: grpcio>=1.70.0
18
+ Requires-Dist: protobuf<6.0,>=5.0
19
+ Requires-Dist: google-crc32c
20
+ Provides-Extra: dev
21
+ Requires-Dist: ruff; extra == "dev"
22
+ Requires-Dist: mypy<1.6,>=1.0; extra == "dev"
23
+ Requires-Dist: types-requests; extra == "dev"
24
+ Requires-Dist: types-cachetools; extra == "dev"
25
+ Requires-Dist: types-PyYAML; extra == "dev"
26
+ Requires-Dist: types-protobuf; extra == "dev"
27
+ Requires-Dist: black<24.0,>=23.0; extra == "dev"
28
+ Requires-Dist: isort<5.13,>=5.12; extra == "dev"
29
+ Requires-Dist: pytest<8.0; extra == "dev"
30
+ Requires-Dist: twine>=1.11.0; extra == "dev"
31
+ Requires-Dist: build; extra == "dev"
32
+ Requires-Dist: setuptools; extra == "dev"
33
+ Requires-Dist: wheel; extra == "dev"
34
+ Requires-Dist: petname==2.6; extra == "dev"
35
+ Requires-Dist: grpcio-tools; extra == "dev"
36
+
37
+ # Beaker-py
38
+
39
+ A lightweight pure-Python client for Beaker.
40
+
41
+ ## Installing
42
+
43
+ ### Installing with `pip`
44
+
45
+ **beaker-py** is available [on PyPI](https://pypi.org/project/beaker-py/). Just run
46
+
47
+ ```bash
48
+ pip install beaker-py
49
+ ```
50
+
51
+ ### Installing from source
52
+
53
+ To install **beaker-py** from source, first clone [the repository](https://github.com/allenai/beaker):
54
+
55
+ ```bash
56
+ git clone https://github.com/allenai/beaker.git
57
+ ```
58
+
59
+ Then create or activate a Python virtual environment, and run:
60
+
61
+ ```bash
62
+ cd beaker/bindings/python
63
+ make dev-install
64
+ ```
65
+
66
+ ## Quick start
67
+
68
+ If you've already configured the [Beaker command-line client](https://github.com/allenai/beaker/),
69
+ **beaker-py** will find and use the existing configuration file (usually located at `$HOME/.beaker/config.yml`) or `BEAKER_TOKEN` environment variable.
70
+
71
+ Then you can instantiate the Beaker client with the `.from_env()` class method:
72
+
73
+ ```python
74
+ from beaker import Beaker
75
+
76
+ with Beaker.from_env() as beaker:
77
+ ...
78
+ ```
79
+
80
+ With the Python client, you can:
81
+ - Query [**Clusters**](https://beaker-docs.apps.allenai.org/concept/clusters.html) with `beaker.cluster.*` methods, e.g. `beaker.cluster.get("ai2/jupiter-cirrascale-2")`.
82
+ - Manage [**Datasets**](https://beaker-docs.apps.allenai.org/concept/datasets.html) with `beaker.dataset.*` methods, e.g. `beaker.dataset.create(dataset_name, source_dir)`.
83
+ - Submit, track, and find [**Experiments**](https://beaker-docs.apps.allenai.org/concept/experiments.html) with `beaker.experiment.*`, `beaker.workload.*`, and `beaker.job.*` methods, e.g. `beaker.experiment.create(spec=spec, name=name)`.
84
+ - Manage [**Workspaces**](https://beaker-docs.apps.allenai.org/concept/workspaces.html) with `beaker.workspace.*` methods, e.g. `beaker.workspace.create("ai2/new_workspace")`.
85
+ - Manage [**Secrets**](https://beaker-docs.apps.allenai.org/concept/secrets.html) with `beaker.secret.*` methods, e.g. `beaker.secret.write(name, value)`.
86
+
87
+ ### Example workflow
88
+
89
+ Launch and follow an experiment like [beaker-gantry](https://github.com/allenai/beaker-gantry) does:
90
+
91
+ ```python
92
+ import time
93
+ from beaker import Beaker, BeakerExperimentSpec, BeakerJobPriority
94
+
95
+
96
+ with Beaker.from_env() as beaker:
97
+ # Build experiment spec...
98
+ spec = BeakerExperimentSpec.new(
99
+ description="beaker-py test run",
100
+ beaker_image="petew/hello-world",
101
+ priority=BeakerJobPriority.low,
102
+ preemptible=True,
103
+ )
104
+
105
+ # Create experiment workload...
106
+ workload = beaker.experiment.create(spec=spec)
107
+
108
+ # Wait for job to be created...
109
+ while (job := beaker.workload.get_latest_job(workload)) is None:
110
+ print("waiting for job to start...")
111
+ time.sleep(1.0)
112
+
113
+ # Follow logs...
114
+ print("Job logs:")
115
+ for job_log in beaker.job.logs(job, follow=True):
116
+ print(job_log.message.decode())
117
+ ```
118
+
119
+ ## Development
120
+
121
+ After [installing from source](#installing-from-source), you can run checks and tests locally with:
122
+
123
+ ```bash
124
+ make checks
125
+ ```
126
+
127
+ ### Releases
128
+
129
+ At the moment releases need to be published manually by following these steps:
130
+
131
+ 1. Ensure you've authenticated with [PyPI](https://pypi.org/) through a `~/.pypirc` file and have write permissions to the [beaker-py project](https://pypi.org/project/beaker-py/).
132
+ 2. Ensure the target release version defined in `src/beaker/version.py` is correct, or change the version on the fly by adding the `Make` argument `BEAKER_PY_VERSION=X.X.X` to the command in the next step.
133
+ 3. Run `make publish` for a stable release or `make publish-nightly` for a nightly pre-release.
@@ -0,0 +1,97 @@
1
+ # Beaker-py
2
+
3
+ A lightweight pure-Python client for Beaker.
4
+
5
+ ## Installing
6
+
7
+ ### Installing with `pip`
8
+
9
+ **beaker-py** is available [on PyPI](https://pypi.org/project/beaker-py/). Just run
10
+
11
+ ```bash
12
+ pip install beaker-py
13
+ ```
14
+
15
+ ### Installing from source
16
+
17
+ To install **beaker-py** from source, first clone [the repository](https://github.com/allenai/beaker):
18
+
19
+ ```bash
20
+ git clone https://github.com/allenai/beaker.git
21
+ ```
22
+
23
+ Then create or activate a Python virtual environment, and run:
24
+
25
+ ```bash
26
+ cd beaker/bindings/python
27
+ make dev-install
28
+ ```
29
+
30
+ ## Quick start
31
+
32
+ If you've already configured the [Beaker command-line client](https://github.com/allenai/beaker/),
33
+ **beaker-py** will find and use the existing configuration file (usually located at `$HOME/.beaker/config.yml`) or `BEAKER_TOKEN` environment variable.
34
+
35
+ Then you can instantiate the Beaker client with the `.from_env()` class method:
36
+
37
+ ```python
38
+ from beaker import Beaker
39
+
40
+ with Beaker.from_env() as beaker:
41
+ ...
42
+ ```
43
+
44
+ With the Python client, you can:
45
+ - Query [**Clusters**](https://beaker-docs.apps.allenai.org/concept/clusters.html) with `beaker.cluster.*` methods, e.g. `beaker.cluster.get("ai2/jupiter-cirrascale-2")`.
46
+ - Manage [**Datasets**](https://beaker-docs.apps.allenai.org/concept/datasets.html) with `beaker.dataset.*` methods, e.g. `beaker.dataset.create(dataset_name, source_dir)`.
47
+ - Submit, track, and find [**Experiments**](https://beaker-docs.apps.allenai.org/concept/experiments.html) with `beaker.experiment.*`, `beaker.workload.*`, and `beaker.job.*` methods, e.g. `beaker.experiment.create(spec=spec, name=name)`.
48
+ - Manage [**Workspaces**](https://beaker-docs.apps.allenai.org/concept/workspaces.html) with `beaker.workspace.*` methods, e.g. `beaker.workspace.create("ai2/new_workspace")`.
49
+ - Manage [**Secrets**](https://beaker-docs.apps.allenai.org/concept/secrets.html) with `beaker.secret.*` methods, e.g. `beaker.secret.write(name, value)`.
50
+
51
+ ### Example workflow
52
+
53
+ Launch and follow an experiment like [beaker-gantry](https://github.com/allenai/beaker-gantry) does:
54
+
55
+ ```python
56
+ import time
57
+ from beaker import Beaker, BeakerExperimentSpec, BeakerJobPriority
58
+
59
+
60
+ with Beaker.from_env() as beaker:
61
+ # Build experiment spec...
62
+ spec = BeakerExperimentSpec.new(
63
+ description="beaker-py test run",
64
+ beaker_image="petew/hello-world",
65
+ priority=BeakerJobPriority.low,
66
+ preemptible=True,
67
+ )
68
+
69
+ # Create experiment workload...
70
+ workload = beaker.experiment.create(spec=spec)
71
+
72
+ # Wait for job to be created...
73
+ while (job := beaker.workload.get_latest_job(workload)) is None:
74
+ print("waiting for job to start...")
75
+ time.sleep(1.0)
76
+
77
+ # Follow logs...
78
+ print("Job logs:")
79
+ for job_log in beaker.job.logs(job, follow=True):
80
+ print(job_log.message.decode())
81
+ ```
82
+
83
+ ## Development
84
+
85
+ After [installing from source](#installing-from-source), you can run checks and tests locally with:
86
+
87
+ ```bash
88
+ make checks
89
+ ```
90
+
91
+ ### Releases
92
+
93
+ At the moment releases need to be published manually by following these steps:
94
+
95
+ 1. Ensure you've authenticated with [PyPI](https://pypi.org/) through a `~/.pypirc` file and have write permissions to the [beaker-py project](https://pypi.org/project/beaker-py/).
96
+ 2. Ensure the target release version defined in `src/beaker/version.py` is correct, or change the version on the fly by adding the `Make` argument `BEAKER_PY_VERSION=X.X.X` to the command in the next step.
97
+ 3. Run `make publish` for a stable release or `make publish-nightly` for a nightly pre-release.
@@ -9,7 +9,6 @@ readme = "README.md"
9
9
  description = "A Python Beaker client"
10
10
  classifiers = [
11
11
  "Intended Audience :: Science/Research",
12
- "License :: OSI Approved :: Apache Software License",
13
12
  "Programming Language :: Python :: 3",
14
13
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
15
14
  ]
@@ -22,10 +21,10 @@ requires-python = ">=3.10"
22
21
  dependencies = [
23
22
  "requests",
24
23
  "packaging",
25
- "rich>=12.3,<14.0",
26
24
  "PyYAML",
27
25
  "grpcio>=1.70.0", # NOTE: when updating min version, make sure to also update it in the Makefile
28
- "protobuf>=5.0",
26
+ "protobuf>=5.0,<6.0",
27
+ "google-crc32c",
29
28
  ]
30
29
 
31
30
  [project.optional-dependencies]
@@ -50,8 +49,8 @@ dev = [
50
49
  [project.urls]
51
50
  Homepage = "https://github.com/allenai/beaker"
52
51
  Repository = "https://github.com/allenai/beaker"
53
- Changelog = "https://github.com/allenai/beaker-py/blob/main/CHANGELOG.md"
54
- Documentation = "https://beaker-py.readthedocs.io/"
52
+ Changelog = "https://github.com/allenai/beaker/blob/main/bindings/python/CHANGELOG.md"
53
+ #Documentation = "https://beaker-py.readthedocs.io/"
55
54
 
56
55
  [tool.setuptools]
57
56
  include-package-data = true
@@ -1,22 +1,39 @@
1
1
  from .client import Beaker
2
2
  from .types import (
3
3
  BeakerAuthRole,
4
+ BeakerCancelationCode,
5
+ BeakerCluster,
4
6
  BeakerConstraints,
5
7
  BeakerDataMount,
8
+ BeakerDataset,
9
+ BeakerDatasetFileAlgorithmType,
6
10
  BeakerDataSource,
11
+ BeakerEnvironment,
7
12
  BeakerEnvVar,
13
+ BeakerExperiment,
8
14
  BeakerExperimentSpec,
15
+ BeakerGpuType,
16
+ BeakerGroup,
17
+ BeakerImage,
9
18
  BeakerImageSource,
19
+ BeakerJob,
10
20
  BeakerJobPriority,
21
+ BeakerNode,
22
+ BeakerOrganization,
11
23
  BeakerResultSpec,
12
24
  BeakerRetrySpec,
25
+ BeakerSecret,
13
26
  BeakerSortOrder,
14
27
  BeakerSpecVersion,
28
+ BeakerTask,
15
29
  BeakerTaskContext,
16
30
  BeakerTaskResources,
17
31
  BeakerTaskSpec,
32
+ BeakerUser,
33
+ BeakerWorkload,
18
34
  BeakerWorkloadStatus,
19
35
  BeakerWorkloadType,
36
+ BeakerWorkspace,
20
37
  )
21
38
 
22
39
  __all__ = [
@@ -26,6 +43,9 @@ __all__ = [
26
43
  "BeakerJobPriority",
27
44
  "BeakerWorkloadType",
28
45
  "BeakerWorkloadStatus",
46
+ "BeakerDatasetFileAlgorithmType",
47
+ "BeakerCancelationCode",
48
+ "BeakerGpuType",
29
49
  "BeakerImageSource",
30
50
  "BeakerEnvVar",
31
51
  "BeakerDataSource",
@@ -38,4 +58,18 @@ __all__ = [
38
58
  "BeakerRetrySpec",
39
59
  "BeakerExperimentSpec",
40
60
  "BeakerConstraints",
61
+ "BeakerJob",
62
+ "BeakerWorkload",
63
+ "BeakerExperiment",
64
+ "BeakerTask",
65
+ "BeakerImage",
66
+ "BeakerCluster",
67
+ "BeakerNode",
68
+ "BeakerDataset",
69
+ "BeakerGroup",
70
+ "BeakerSecret",
71
+ "BeakerOrganization",
72
+ "BeakerUser",
73
+ "BeakerEnvironment",
74
+ "BeakerWorkspace",
41
75
  ]
@@ -61,7 +61,7 @@ class ClusterClient(ServiceClient):
61
61
  count += 1
62
62
  yield cluster
63
63
  if limit is not None and count >= limit:
64
- break
64
+ return
65
65
 
66
66
  def url(self, cluster: pb2.Cluster) -> str:
67
67
  return f"{self.config.agent_address}/orgs/{self.beaker.org_name}/clusters/{cluster.name}"
@@ -3,7 +3,7 @@ import os
3
3
  from dataclasses import dataclass
4
4
  from datetime import datetime
5
5
  from pathlib import Path
6
- from typing import Iterable, Literal
6
+ from typing import Generator, Iterable, Literal
7
7
  from urllib.parse import urlparse
8
8
 
9
9
  import grpc
@@ -55,13 +55,20 @@ class DatasetClient(ServiceClient):
55
55
  REQUEST_SIZE_LIMIT = 32 * 1024 * 1024
56
56
  DOWNLOAD_CHUNK_SIZE = 10 * 1024
57
57
 
58
- def get(self, dataset_id: str) -> pb2.Dataset:
58
+ def get(self, dataset: str) -> pb2.Dataset:
59
59
  return self.rpc_request(
60
60
  RpcMethod[pb2.GetDatasetResponse](self.service.GetDataset),
61
- pb2.GetDatasetRequest(dataset_id=self.resolve_dataset_id(dataset_id)),
62
- exceptions_for_status={grpc.StatusCode.NOT_FOUND: BeakerDatasetNotFound(dataset_id)},
61
+ pb2.GetDatasetRequest(dataset_id=self.resolve_dataset_id(dataset)),
62
+ exceptions_for_status={grpc.StatusCode.NOT_FOUND: BeakerDatasetNotFound(dataset)},
63
63
  ).dataset
64
64
 
65
+ def _get_storage(self, dataset: pb2.Dataset) -> _DatasetStorage:
66
+ dataset_info = self.http_request(
67
+ f"datasets/{self._url_quote(dataset.id)}",
68
+ exceptions_for_status={404: BeakerDatasetNotFound(dataset.id)},
69
+ ).json()
70
+ return _DatasetStorage(**dataset_info["storage"])
71
+
65
72
  def create(
66
73
  self,
67
74
  name: str,
@@ -136,7 +143,7 @@ class DatasetClient(ServiceClient):
136
143
  return dataset
137
144
 
138
145
  def commit(self, dataset: pb2.Dataset) -> pb2.Dataset:
139
- if dataset.committed is not None and dataset.committed.ByteSize() > 0:
146
+ if dataset.HasField("committed"):
140
147
  return dataset
141
148
 
142
149
  @self._retriable()
@@ -164,8 +171,8 @@ class DatasetClient(ServiceClient):
164
171
  max_workers: int | None = None,
165
172
  strip_paths: bool = False,
166
173
  ) -> int:
167
- if dataset.committed is not None and dataset.committed.ByteSize() > 0:
168
- raise BeakerDatasetWriteError(dataset.id)
174
+ if dataset.HasField("committed"):
175
+ raise BeakerDatasetWriteError(f"Dataset '{dataset.id}' has already been committed")
169
176
 
170
177
  total_bytes = 0
171
178
  # map source path to (target_path, size)
@@ -224,6 +231,23 @@ class DatasetClient(ServiceClient):
224
231
 
225
232
  return total_bytes
226
233
 
234
+ def upload(self, dataset: pb2.Dataset, source: PathOrStr | bytes, target: PathOrStr) -> int:
235
+ """
236
+ Upload a file to a dataset.
237
+
238
+ :param dataset: The dataset to upload to (must be uncommitted).
239
+ :param source: Path to the local source file or the contents as bytes.
240
+ :param target: The path within the dataset to upload the file to.
241
+
242
+ :returns: The number of bytes uploaded.
243
+ """
244
+ if dataset.HasField("committed"):
245
+ raise BeakerDatasetWriteError(f"Dataset '{dataset.id}' has already been committed")
246
+
247
+ size = len(source) if isinstance(source, bytes) else Path(source).stat().st_size
248
+ storage = self._get_storage(dataset)
249
+ return self._upload_file(dataset, storage=storage, size=size, source=source, target=target)
250
+
227
251
  def _upload_file(
228
252
  self,
229
253
  dataset: pb2.Dataset,
@@ -325,6 +349,91 @@ class DatasetClient(ServiceClient):
325
349
  finally:
326
350
  source_file.close()
327
351
 
352
+ def stream_file(
353
+ self,
354
+ dataset: pb2.Dataset,
355
+ file_path: str,
356
+ *,
357
+ offset: int = 0,
358
+ length: int = -1,
359
+ chunk_size: int | None = None,
360
+ validate_checksum: bool = True,
361
+ ) -> Generator[bytes, None, None]:
362
+ prefix = os.path.dirname(file_path)
363
+ file: pb2.DatasetFile | None = None
364
+ for f in self.list_files(dataset, prefix=prefix):
365
+ if f.path == file_path:
366
+ file = f
367
+ break
368
+ else:
369
+ raise FileNotFoundError(file_path)
370
+
371
+ yield from self._stream_file(
372
+ dataset,
373
+ file,
374
+ offset=offset,
375
+ length=length,
376
+ chunk_size=chunk_size,
377
+ validate_checksum=validate_checksum,
378
+ )
379
+
380
+ def _stream_file(
381
+ self,
382
+ dataset: pb2.Dataset,
383
+ file: pb2.DatasetFile,
384
+ chunk_size: int | None = None,
385
+ offset: int = 0,
386
+ length: int = -1,
387
+ validate_checksum: bool = True,
388
+ ) -> Generator[bytes, None, None]:
389
+ def stream_file() -> Generator[bytes, None, None]:
390
+ headers = {}
391
+ if offset > 0 and length > 0:
392
+ headers["Range"] = f"bytes={offset}-{offset + length - 1}"
393
+ elif offset > 0:
394
+ headers["Range"] = f"bytes={offset}-"
395
+ response = self.http_request(
396
+ f"datasets/{dataset.id}/files/{self._url_quote(file.path)}",
397
+ method="GET",
398
+ stream=True,
399
+ headers=headers,
400
+ exceptions_for_status={404: FileNotFoundError(file.path)},
401
+ )
402
+ for chunk in response.iter_content(chunk_size=chunk_size or self.DOWNLOAD_CHUNK_SIZE):
403
+ yield chunk
404
+
405
+ contents_hash = None
406
+ if offset == 0 and validate_checksum and file.HasField("digest"):
407
+ contents_hash = BeakerDatasetFileAlgorithmType(file.digest.algorithm).hasher()
408
+
409
+ retries = 0
410
+ while True:
411
+ try:
412
+ for chunk in stream_file():
413
+ offset += len(chunk)
414
+ if contents_hash is not None:
415
+ contents_hash.update(chunk)
416
+ yield chunk
417
+ break
418
+ except RequestException as err:
419
+ if retries < self.beaker.MAX_RETRIES:
420
+ self._log_and_wait(retries, err)
421
+ retries += 1
422
+ else:
423
+ raise
424
+
425
+ # Validate digest.
426
+ if file.HasField("digest") and contents_hash is not None:
427
+ import binascii
428
+
429
+ actual_digest = binascii.hexlify(contents_hash.digest()).decode()
430
+ expected_digest = binascii.hexlify(file.digest.value).decode()
431
+ if actual_digest != expected_digest:
432
+ raise BeakerChecksumFailedError(
433
+ f"Checksum for '{file.path}' failed. "
434
+ f"Expected '{expected_digest}', got '{actual_digest}'."
435
+ )
436
+
328
437
  def list_files(
329
438
  self, dataset: pb2.Dataset, *, prefix: str | None = None
330
439
  ) -> Iterable[pb2.DatasetFile]:
@@ -426,7 +535,7 @@ class DatasetClient(ServiceClient):
426
535
  count += 1
427
536
  yield dataset
428
537
  if limit is not None and count >= limit:
429
- break
538
+ return
430
539
 
431
540
  def url(self, dataset: pb2.Dataset) -> str:
432
541
  dataset_id = self.resolve_dataset_id(dataset)
@@ -1,4 +1,5 @@
1
1
  import grpc
2
+ import yaml
2
3
 
3
4
  from . import beaker_pb2 as pb2
4
5
  from ._service_client import RpcMethod, ServiceClient
@@ -33,19 +34,21 @@ class ExperimentClient(ServiceClient):
33
34
 
34
35
  return self.beaker.workload.get(data["id"])
35
36
 
36
- def get_spec(self, experiment: str | pb2.Experiment) -> str:
37
- return self.rpc_request(
37
+ def get_spec(self, experiment: pb2.Experiment | pb2.Workload) -> BeakerExperimentSpec:
38
+ yaml_str = self.rpc_request(
38
39
  RpcMethod[pb2.GetExperimentYamlSpecResponse](self.service.GetExperimentYamlSpec),
39
40
  pb2.GetExperimentYamlSpecRequest(experiment_id=self.resolve_experiment_id(experiment)),
40
41
  exceptions_for_status={grpc.StatusCode.NOT_FOUND: BeakerExperimentNotFound(experiment)},
41
42
  ).experiment_spec
43
+ json_dict = yaml.safe_load(yaml_str)
44
+ return BeakerExperimentSpec.from_json(json_dict)
42
45
 
43
- def restart_tasks(self, experiment: str | pb2.Experiment) -> pb2.Workload:
46
+ def restart_tasks(self, experiment: pb2.Experiment | pb2.Workload) -> pb2.Workload:
44
47
  return self.rpc_request(
45
48
  RpcMethod[pb2.RestartExperimentTasksResponse](self.service.RestartExperimentTasks),
46
49
  pb2.RestartExperimentTasksRequest(experiment_id=self.resolve_experiment_id(experiment)),
47
50
  exceptions_for_status={grpc.StatusCode.NOT_FOUND: BeakerExperimentNotFound(experiment)},
48
51
  ).workload
49
52
 
50
- def url(self, experiment: pb2.Experiment) -> str:
51
- return f"{self.config.agent_address}/ex/{self._url_quote(experiment.id)}"
53
+ def url(self, experiment: pb2.Experiment | pb2.Workload) -> str:
54
+ return f"{self.config.agent_address}/ex/{self._url_quote(self.resolve_experiment_id(experiment))}"
@@ -106,7 +106,7 @@ class GroupClient(ServiceClient):
106
106
  count += 1
107
107
  yield group
108
108
  if limit is not None and count >= limit:
109
- break
109
+ return
110
110
 
111
111
  def url(self, group: pb2.Group) -> str:
112
112
  group_id = self.resolve_group_id(group)
@@ -72,7 +72,7 @@ class ImageClient(ServiceClient):
72
72
  count += 1
73
73
  yield image
74
74
  if limit is not None and count >= limit:
75
- break
75
+ return
76
76
 
77
77
  def url(self, image: pb2.Image) -> str:
78
78
  image_id = self.resolve_image_id(image)
@@ -25,16 +25,26 @@ class JobClient(ServiceClient):
25
25
  follow: bool | None = None,
26
26
  since: datetime | None = None,
27
27
  ) -> Iterable[pb2.JobLog]:
28
- yield from self.rpc_streaming_request(
28
+ request = pb2.StreamJobLogsRequest(
29
+ job_id=job.id,
30
+ tail_lines=tail_lines,
31
+ follow=follow, # type: ignore
32
+ since=since, # type: ignore
33
+ )
34
+
35
+ def on_failure():
36
+ nonlocal request, since
37
+ request.MergeFrom(pb2.StreamJobLogsRequest(since=since)) # type: ignore[arg-type]
38
+
39
+ for job_log in self.rpc_streaming_request(
29
40
  RpcStreamingMethod[pb2.JobLog](self.service.StreamJobLogs),
30
- pb2.StreamJobLogsRequest(
31
- job_id=job.id,
32
- tail_lines=tail_lines,
33
- follow=follow, # type: ignore
34
- since=since, # type: ignore
35
- ),
41
+ request,
36
42
  exceptions_for_status={grpc.StatusCode.NOT_FOUND: BeakerJobNotFound(job)},
37
- )
43
+ on_failure=on_failure,
44
+ retriable=True,
45
+ ):
46
+ since = job_log.timestamp.ToDatetime()
47
+ yield job_log
38
48
 
39
49
  def list_summarized_events(
40
50
  self,
@@ -61,6 +71,7 @@ class JobClient(ServiceClient):
61
71
  *,
62
72
  org: pb2.Organization | None = None,
63
73
  task: pb2.Task | None = None,
74
+ environment: pb2.Environment | None = None,
64
75
  finalized: bool | None = None,
65
76
  elegible_for_cluster: pb2.Cluster | None = None,
66
77
  scheduled_on_node: pb2.Node | None = None,
@@ -85,6 +96,7 @@ class JobClient(ServiceClient):
85
96
  ),
86
97
  organization_id=self.resolve_org_id(org),
87
98
  task_id=None if task is None else task.id,
99
+ environment_id=None if environment is None else environment.id,
88
100
  finalized=finalized, # type: ignore
89
101
  eligible_for_cluster_id=None
90
102
  if elegible_for_cluster is None
@@ -106,7 +118,7 @@ class JobClient(ServiceClient):
106
118
  count += 1
107
119
  yield job
108
120
  if limit is not None and count >= limit:
109
- break
121
+ return
110
122
 
111
123
  def url(self, job: pb2.Job) -> str:
112
124
  job_id = job.id
@@ -65,4 +65,4 @@ class NodeClient(ServiceClient):
65
65
  count += 1
66
66
  yield node
67
67
  if limit is not None and count >= limit:
68
- break
68
+ return
@@ -68,4 +68,4 @@ class SecretClient(ServiceClient):
68
68
  count += 1
69
69
  yield secret
70
70
  if limit is not None and count >= limit:
71
- break
71
+ return