konduktor-nightly 0.1.0.dev20250209104336__py3-none-any.whl → 0.1.0.dev20250313070642__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +16 -6
- konduktor/adaptors/__init__.py +0 -0
- konduktor/adaptors/common.py +88 -0
- konduktor/adaptors/gcp.py +112 -0
- konduktor/backends/__init__.py +8 -0
- konduktor/backends/backend.py +86 -0
- konduktor/backends/jobset.py +218 -0
- konduktor/backends/jobset_utils.py +447 -0
- konduktor/check.py +192 -0
- konduktor/cli.py +790 -0
- konduktor/cloud_stores.py +158 -0
- konduktor/config.py +420 -0
- konduktor/constants.py +36 -0
- konduktor/controller/constants.py +6 -6
- konduktor/controller/launch.py +3 -3
- konduktor/controller/node.py +5 -5
- konduktor/controller/parse.py +23 -23
- konduktor/dashboard/backend/main.py +57 -57
- konduktor/dashboard/backend/sockets.py +19 -19
- konduktor/data/__init__.py +9 -0
- konduktor/data/constants.py +12 -0
- konduktor/data/data_utils.py +223 -0
- konduktor/data/gcp/__init__.py +19 -0
- konduktor/data/gcp/constants.py +42 -0
- konduktor/data/gcp/gcs.py +906 -0
- konduktor/data/gcp/utils.py +9 -0
- konduktor/data/storage.py +799 -0
- konduktor/data/storage_utils.py +500 -0
- konduktor/execution.py +444 -0
- konduktor/kube_client.py +153 -48
- konduktor/logging.py +49 -5
- konduktor/manifests/dmesg_daemonset.yaml +8 -0
- konduktor/manifests/pod_cleanup_controller.yaml +129 -0
- konduktor/resource.py +478 -0
- konduktor/task.py +867 -0
- konduktor/templates/jobset.yaml.j2 +31 -0
- konduktor/templates/pod.yaml.j2 +185 -0
- konduktor/usage/__init__.py +0 -0
- konduktor/usage/constants.py +21 -0
- konduktor/utils/__init__.py +0 -0
- konduktor/utils/accelerator_registry.py +21 -0
- konduktor/utils/annotations.py +62 -0
- konduktor/utils/base64_utils.py +93 -0
- konduktor/utils/common_utils.py +393 -0
- konduktor/utils/constants.py +5 -0
- konduktor/utils/env_options.py +55 -0
- konduktor/utils/exceptions.py +226 -0
- konduktor/utils/kubernetes_enums.py +8 -0
- konduktor/utils/kubernetes_utils.py +652 -0
- konduktor/utils/log_utils.py +251 -0
- konduktor/utils/loki_utils.py +85 -0
- konduktor/utils/rich_utils.py +123 -0
- konduktor/utils/schemas.py +581 -0
- konduktor/utils/subprocess_utils.py +273 -0
- konduktor/utils/ux_utils.py +216 -0
- konduktor/utils/validator.py +20 -0
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/LICENSE +0 -1
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/METADATA +13 -2
- konduktor_nightly-0.1.0.dev20250313070642.dist-info/RECORD +94 -0
- konduktor_nightly-0.1.0.dev20250209104336.dist-info/RECORD +0 -48
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,500 @@
|
|
1
|
+
# Proprietary Changes made for Trainy under the Trainy Software License
|
2
|
+
# Original source: skypilot: https://github.com/skypilot-org/skypilot
|
3
|
+
# which is Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
"""Utility functions for the storage module."""
|
14
|
+
|
15
|
+
import glob
|
16
|
+
import os
|
17
|
+
import shlex
|
18
|
+
import subprocess
|
19
|
+
import typing
|
20
|
+
from typing import List, Optional
|
21
|
+
|
22
|
+
import colorama
|
23
|
+
|
24
|
+
if typing.TYPE_CHECKING:
|
25
|
+
from konduktor.data.constants import SourceType, StorageHandle
|
26
|
+
|
27
|
+
|
28
|
+
from konduktor import constants, logging
|
29
|
+
from konduktor.utils import common_utils, exceptions
|
30
|
+
|
31
|
+
logger = logging.get_logger(__name__)
|
32
|
+
|
33
|
+
_FILE_EXCLUSION_FROM_GITIGNORE_FAILURE_MSG = (
|
34
|
+
f'{colorama.Fore.YELLOW}Warning: Files/dirs '
|
35
|
+
'specified in .gitignore will be uploaded '
|
36
|
+
'to the cloud storage for {path!r}'
|
37
|
+
'due to the following error: {error_msg!r}'
|
38
|
+
)
|
39
|
+
|
40
|
+
|
41
|
+
def get_excluded_files_from_konduktorignore(src_dir_path: str) -> List[str]:
|
42
|
+
"""List files and patterns ignored by the .konduktor file
|
43
|
+
in the given source directory.
|
44
|
+
"""
|
45
|
+
excluded_list: List[str] = []
|
46
|
+
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
47
|
+
konduktorignore_path = os.path.join(
|
48
|
+
expand_src_dir_path, constants.KONDUKTOR_IGNORE_FILE
|
49
|
+
)
|
50
|
+
|
51
|
+
try:
|
52
|
+
with open(konduktorignore_path, 'r', encoding='utf-8') as f:
|
53
|
+
for line in f:
|
54
|
+
line = line.strip()
|
55
|
+
if line and not line.startswith('#'):
|
56
|
+
# Make parsing consistent with rsync.
|
57
|
+
# Rsync uses '/' as current directory.
|
58
|
+
if line.startswith('/'):
|
59
|
+
line = '.' + line
|
60
|
+
else:
|
61
|
+
line = '**/' + line
|
62
|
+
# Find all files matching the pattern.
|
63
|
+
matching_files = glob.glob(
|
64
|
+
os.path.join(expand_src_dir_path, line), recursive=True
|
65
|
+
)
|
66
|
+
# Process filenames to comply with cloud rsync format.
|
67
|
+
for i in range(len(matching_files)):
|
68
|
+
matching_files[i] = os.path.relpath(
|
69
|
+
matching_files[i], expand_src_dir_path
|
70
|
+
)
|
71
|
+
excluded_list.extend(matching_files)
|
72
|
+
except IOError as e:
|
73
|
+
logger.warning(
|
74
|
+
f'Error reading {konduktorignore_path}: '
|
75
|
+
f'{common_utils.format_exception(e, use_bracket=True)}'
|
76
|
+
)
|
77
|
+
|
78
|
+
return excluded_list
|
79
|
+
|
80
|
+
|
81
|
+
def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
82
|
+
"""Lists files and patterns ignored by git in the source directory
|
83
|
+
|
84
|
+
Runs `git status --ignored` which returns a list of excluded files and
|
85
|
+
patterns read from .gitignore and .git/info/exclude using git.
|
86
|
+
`git init` is run if SRC_DIR_PATH is not a git repository and removed
|
87
|
+
after obtaining excluded list.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
List[str] containing files and patterns to be ignored. Some of the
|
91
|
+
patterns include, **/mydir/*.txt, !myfile.log, or file-*/.
|
92
|
+
"""
|
93
|
+
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
94
|
+
|
95
|
+
git_exclude_path = os.path.join(expand_src_dir_path, '.git/info/exclude')
|
96
|
+
gitignore_path = os.path.join(expand_src_dir_path, constants.GIT_IGNORE_FILE)
|
97
|
+
|
98
|
+
git_exclude_exists = os.path.isfile(git_exclude_path)
|
99
|
+
gitignore_exists = os.path.isfile(gitignore_path)
|
100
|
+
|
101
|
+
# This command outputs a list to be excluded according to .gitignore
|
102
|
+
# and .git/info/exclude
|
103
|
+
filter_cmd = (
|
104
|
+
f'git -C {shlex.quote(expand_src_dir_path)} ' 'status --ignored --porcelain=v1'
|
105
|
+
)
|
106
|
+
excluded_list: List[str] = []
|
107
|
+
|
108
|
+
if git_exclude_exists or gitignore_exists:
|
109
|
+
try:
|
110
|
+
output = subprocess.run(
|
111
|
+
filter_cmd,
|
112
|
+
shell=True,
|
113
|
+
stdout=subprocess.PIPE,
|
114
|
+
stderr=subprocess.PIPE,
|
115
|
+
check=True,
|
116
|
+
text=True,
|
117
|
+
)
|
118
|
+
except subprocess.CalledProcessError as e:
|
119
|
+
# when the SRC_DIR_PATH is not a git repo and .git
|
120
|
+
# does not exist in it
|
121
|
+
if e.returncode == exceptions.GIT_FATAL_EXIT_CODE:
|
122
|
+
if 'not a git repository' in e.stderr:
|
123
|
+
# Check if the user has 'write' permission to
|
124
|
+
# SRC_DIR_PATH
|
125
|
+
if not os.access(expand_src_dir_path, os.W_OK):
|
126
|
+
error_msg = 'Write permission denial'
|
127
|
+
logger.warning(
|
128
|
+
_FILE_EXCLUSION_FROM_GITIGNORE_FAILURE_MSG.format(
|
129
|
+
path=src_dir_path, error_msg=error_msg
|
130
|
+
)
|
131
|
+
)
|
132
|
+
return excluded_list
|
133
|
+
init_cmd = f'git -C {expand_src_dir_path} init'
|
134
|
+
try:
|
135
|
+
subprocess.run(
|
136
|
+
init_cmd,
|
137
|
+
shell=True,
|
138
|
+
stdout=subprocess.PIPE,
|
139
|
+
stderr=subprocess.PIPE,
|
140
|
+
check=True,
|
141
|
+
)
|
142
|
+
output = subprocess.run(
|
143
|
+
filter_cmd,
|
144
|
+
shell=True,
|
145
|
+
stdout=subprocess.PIPE,
|
146
|
+
stderr=subprocess.PIPE,
|
147
|
+
check=True,
|
148
|
+
text=True,
|
149
|
+
)
|
150
|
+
except subprocess.CalledProcessError as init_e:
|
151
|
+
logger.warning(
|
152
|
+
_FILE_EXCLUSION_FROM_GITIGNORE_FAILURE_MSG.format(
|
153
|
+
path=src_dir_path, error_msg=init_e.stderr
|
154
|
+
)
|
155
|
+
)
|
156
|
+
return excluded_list
|
157
|
+
if git_exclude_exists:
|
158
|
+
# removes all the files/dirs created with 'git init'
|
159
|
+
# under .git/ except .git/info/exclude
|
160
|
+
remove_files_cmd = (
|
161
|
+
f'find {expand_src_dir_path}'
|
162
|
+
f'/.git -path {git_exclude_path}'
|
163
|
+
' -prune -o -type f -exec rm -f '
|
164
|
+
'{} +'
|
165
|
+
)
|
166
|
+
remove_dirs_cmd = (
|
167
|
+
f'find {expand_src_dir_path}'
|
168
|
+
f'/.git -path {git_exclude_path}'
|
169
|
+
' -o -type d -empty -delete'
|
170
|
+
)
|
171
|
+
subprocess.run(
|
172
|
+
remove_files_cmd,
|
173
|
+
shell=True,
|
174
|
+
stdout=subprocess.PIPE,
|
175
|
+
stderr=subprocess.PIPE,
|
176
|
+
check=True,
|
177
|
+
)
|
178
|
+
subprocess.run(
|
179
|
+
remove_dirs_cmd,
|
180
|
+
shell=True,
|
181
|
+
stdout=subprocess.PIPE,
|
182
|
+
stderr=subprocess.PIPE,
|
183
|
+
check=True,
|
184
|
+
)
|
185
|
+
|
186
|
+
output_list = output.stdout.split('\n')
|
187
|
+
for line in output_list:
|
188
|
+
# FILTER_CMD outputs items preceded by '!!'
|
189
|
+
# to specify excluded files/dirs
|
190
|
+
# e.g., '!! mydir/' or '!! mydir/myfile.txt'
|
191
|
+
if line.startswith('!!'):
|
192
|
+
to_be_excluded = line[3:]
|
193
|
+
if line.endswith('/'):
|
194
|
+
# aws s3 sync and gsutil rsync require * to exclude
|
195
|
+
# files/dirs under the specified directory.
|
196
|
+
to_be_excluded += '*'
|
197
|
+
excluded_list.append(to_be_excluded)
|
198
|
+
return excluded_list
|
199
|
+
|
200
|
+
|
201
|
+
def get_excluded_files(src_dir_path: str) -> List[str]:
|
202
|
+
# TODO: this could return a huge list of files,
|
203
|
+
# should think of ways to optimize.
|
204
|
+
"""List files and directories to be excluded."""
|
205
|
+
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
206
|
+
konduktorignore_path = os.path.join(
|
207
|
+
expand_src_dir_path, constants.KONDUKTOR_IGNORE_FILE
|
208
|
+
)
|
209
|
+
if os.path.exists(konduktorignore_path):
|
210
|
+
logger.info(
|
211
|
+
f' {colorama.Style.DIM}'
|
212
|
+
f'Excluded files to sync to cluster based on '
|
213
|
+
f'{constants.KONDUKTOR_IGNORE_FILE}.'
|
214
|
+
f'{colorama.Style.RESET_ALL}'
|
215
|
+
)
|
216
|
+
return get_excluded_files_from_konduktorignore(src_dir_path)
|
217
|
+
logger.info(
|
218
|
+
f' {colorama.Style.DIM}'
|
219
|
+
f'Excluded files to sync to cluster based on '
|
220
|
+
f'{constants.GIT_IGNORE_FILE}.'
|
221
|
+
f'{colorama.Style.RESET_ALL}'
|
222
|
+
)
|
223
|
+
return get_excluded_files_from_gitignore(src_dir_path)
|
224
|
+
|
225
|
+
|
226
|
+
class AbstractStore:
|
227
|
+
"""AbstractStore abstracts away the different storage types exposed by
|
228
|
+
different clouds.
|
229
|
+
|
230
|
+
Storage objects are backed by AbstractStores, each representing a store
|
231
|
+
present in a cloud.
|
232
|
+
"""
|
233
|
+
|
234
|
+
class StoreMetadata:
|
235
|
+
"""A pickle-able representation of Store
|
236
|
+
|
237
|
+
Allows store objects to be written to and reconstructed from
|
238
|
+
global_user_state.
|
239
|
+
"""
|
240
|
+
|
241
|
+
def __init__(
|
242
|
+
self,
|
243
|
+
*,
|
244
|
+
name: str,
|
245
|
+
source: Optional['SourceType'],
|
246
|
+
region: Optional[str] = None,
|
247
|
+
is_sky_managed: Optional[bool] = None,
|
248
|
+
_bucket_sub_path: Optional[str] = None,
|
249
|
+
):
|
250
|
+
self.name = name
|
251
|
+
self.source = source
|
252
|
+
self.region = region
|
253
|
+
self.is_sky_managed = is_sky_managed
|
254
|
+
self._bucket_sub_path = _bucket_sub_path
|
255
|
+
|
256
|
+
def __repr__(self):
|
257
|
+
return (
|
258
|
+
f'StoreMetadata('
|
259
|
+
f'\n\tname={self.name},'
|
260
|
+
f'\n\tsource={self.source},'
|
261
|
+
f'\n\tregion={self.region},'
|
262
|
+
f'\n\tis_sky_managed={self.is_sky_managed},'
|
263
|
+
f'\n\t_bucket_sub_path={self._bucket_sub_path}'
|
264
|
+
)
|
265
|
+
|
266
|
+
def __init__(
|
267
|
+
self,
|
268
|
+
name: str,
|
269
|
+
source: Optional['SourceType'],
|
270
|
+
region: Optional[str] = None,
|
271
|
+
is_sky_managed: Optional[bool] = None,
|
272
|
+
sync_on_reconstruction: Optional[bool] = True,
|
273
|
+
_bucket_sub_path: Optional[str] = None,
|
274
|
+
):
|
275
|
+
"""Initialize AbstractStore
|
276
|
+
|
277
|
+
Args:
|
278
|
+
name: Store name
|
279
|
+
source: Data source for the store
|
280
|
+
region: Region to place the bucket in
|
281
|
+
is_sky_managed: Whether the store is managed by Sky. If None, it
|
282
|
+
must be populated by the implementing class during initialization.
|
283
|
+
|
284
|
+
Raises:
|
285
|
+
StorageBucketCreateError: If bucket creation fails
|
286
|
+
StorageBucketGetError: If fetching existing bucket fails
|
287
|
+
StorageInitError: If general initialization fails
|
288
|
+
"""
|
289
|
+
self.name = name
|
290
|
+
self.source = source
|
291
|
+
self.region = region
|
292
|
+
self.is_sky_managed = is_sky_managed
|
293
|
+
self.sync_on_reconstruction = sync_on_reconstruction
|
294
|
+
self._bucket_sub_path = _bucket_sub_path
|
295
|
+
# Whether sky is responsible for the lifecycle of the Store.
|
296
|
+
self._validate()
|
297
|
+
self.initialize()
|
298
|
+
|
299
|
+
@property
|
300
|
+
def bucket_sub_path(self) -> Optional[str]:
|
301
|
+
"""Get the bucket_sub_path."""
|
302
|
+
return self._bucket_sub_path
|
303
|
+
|
304
|
+
@classmethod
|
305
|
+
def from_metadata(cls, metadata: StoreMetadata, **override_args):
|
306
|
+
"""Create a Store from a StoreMetadata object.
|
307
|
+
|
308
|
+
Used when reconstructing Storage and Store objects from
|
309
|
+
global_user_state.
|
310
|
+
"""
|
311
|
+
return cls(
|
312
|
+
name=override_args.get('name', metadata.name),
|
313
|
+
source=override_args.get('source', metadata.source),
|
314
|
+
region=override_args.get('region', metadata.region),
|
315
|
+
)
|
316
|
+
|
317
|
+
def get_metadata(self) -> StoreMetadata:
|
318
|
+
return self.StoreMetadata(
|
319
|
+
name=self.name,
|
320
|
+
source=self.source,
|
321
|
+
region=self.region,
|
322
|
+
)
|
323
|
+
|
324
|
+
def initialize(self):
|
325
|
+
"""Initializes the Store object on the cloud.
|
326
|
+
|
327
|
+
Initialization involves fetching bucket if exists, or creating it if
|
328
|
+
it does not.
|
329
|
+
|
330
|
+
Raises:
|
331
|
+
StorageBucketCreateError: If bucket creation fails
|
332
|
+
StorageBucketGetError: If fetching existing bucket fails
|
333
|
+
StorageInitError: If general initialization fails.
|
334
|
+
"""
|
335
|
+
pass
|
336
|
+
|
337
|
+
def _validate(self) -> None:
|
338
|
+
"""Runs validation checks on class args"""
|
339
|
+
pass
|
340
|
+
|
341
|
+
def upload(self) -> None:
|
342
|
+
"""Uploads source to the store bucket
|
343
|
+
|
344
|
+
Upload must be called by the Storage handler - it is not called on
|
345
|
+
Store initialization.
|
346
|
+
"""
|
347
|
+
raise NotImplementedError
|
348
|
+
|
349
|
+
def delete(self) -> None:
|
350
|
+
"""Removes the Storage object from the cloud."""
|
351
|
+
raise NotImplementedError
|
352
|
+
|
353
|
+
def get_handle(self) -> 'StorageHandle':
|
354
|
+
"""Returns the storage handle for use by the execution backend to attach
|
355
|
+
to VM/containers
|
356
|
+
"""
|
357
|
+
raise NotImplementedError
|
358
|
+
|
359
|
+
def download_remote_dir(self, local_path: str) -> None:
|
360
|
+
"""Downloads directory from remote bucket to the specified
|
361
|
+
local_path
|
362
|
+
|
363
|
+
Args:
|
364
|
+
local_path: Local path on user's device
|
365
|
+
"""
|
366
|
+
raise NotImplementedError
|
367
|
+
|
368
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
369
|
+
"""Downloads file from remote to local on Store
|
370
|
+
|
371
|
+
Args:
|
372
|
+
remote_path: str; Remote file path on Store
|
373
|
+
local_path: str; Local file path on user's device
|
374
|
+
"""
|
375
|
+
raise NotImplementedError
|
376
|
+
|
377
|
+
def mount_command(self, mount_path: str) -> str:
|
378
|
+
"""Returns the command to mount the Store to the specified mount_path.
|
379
|
+
|
380
|
+
Includes the setup commands to install mounting tools.
|
381
|
+
|
382
|
+
Args:
|
383
|
+
mount_path: str; Mount path on remote server
|
384
|
+
"""
|
385
|
+
raise NotImplementedError
|
386
|
+
|
387
|
+
def __deepcopy__(self, memo):
|
388
|
+
# S3 Client and GCS Client cannot be deep copied, hence the
|
389
|
+
# original Store object is returned
|
390
|
+
return self
|
391
|
+
|
392
|
+
def _validate_existing_bucket(self):
|
393
|
+
"""Validates the storage fields for existing buckets."""
|
394
|
+
# Check if 'source' is None, this is only allowed when Storage is in
|
395
|
+
# either MOUNT mode or COPY mode with sky-managed storage.
|
396
|
+
# Note: In COPY mode, a 'source' being None with non-sky-managed
|
397
|
+
# storage is already handled as an error in _validate_storage_spec.
|
398
|
+
if self.source is None:
|
399
|
+
# Retrieve a handle associated with the storage name.
|
400
|
+
# This handle links to sky managed storage if it exists.
|
401
|
+
raise NotImplementedError("We don't handle empty sources for now")
|
402
|
+
|
403
|
+
@classmethod
|
404
|
+
def check_credentials(cls):
|
405
|
+
"""
|
406
|
+
Check if the credentials stored on client are valid for the store.
|
407
|
+
This function always runs after check_credentials_from_secret. If
|
408
|
+
the credentials work, we create/update the secret on the cluster.
|
409
|
+
"""
|
410
|
+
raise NotImplementedError
|
411
|
+
|
412
|
+
@classmethod
|
413
|
+
def set_secret_credentials(cls):
|
414
|
+
"""
|
415
|
+
Set the k8s secret storing the credentials for the store.
|
416
|
+
"""
|
417
|
+
raise NotImplementedError
|
418
|
+
|
419
|
+
# TODO(zhwu): Make the return type immutable.
|
420
|
+
@classmethod
|
421
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
422
|
+
"""(Advanced) Returns all available user identities of this cloud.
|
423
|
+
|
424
|
+
The user "identity" is associated with each SkyPilot cluster they
|
425
|
+
create. This is used in protecting cluster operations, such as
|
426
|
+
provision, teardown and status refreshing, in a multi-identity
|
427
|
+
scenario, where the same user/device can switch between different
|
428
|
+
cloud identities. We check that the user identity matches before:
|
429
|
+
- Provisioning/starting a cluster
|
430
|
+
- Stopping/tearing down a cluster
|
431
|
+
- Refreshing the status of a cluster
|
432
|
+
|
433
|
+
Design choices:
|
434
|
+
1. We allow the operations that can correctly work with a different
|
435
|
+
user identity, as a user should have full control over all their
|
436
|
+
clusters (no matter which identity it belongs to), e.g.,
|
437
|
+
submitting jobs, viewing logs, auto-stopping, etc.
|
438
|
+
2. A cloud implementation can optionally switch between different
|
439
|
+
identities if required for cluster operations. In this case,
|
440
|
+
the cloud implementation should return multiple identities
|
441
|
+
as a list. E.g., our Kubernetes implementation can use multiple
|
442
|
+
kubeconfig contexts to switch between different identities.
|
443
|
+
|
444
|
+
The choice of what constitutes an identity is up to each cloud's
|
445
|
+
implementation. In general, to suffice for the above purposes,
|
446
|
+
ensure that different identities should imply different sets of
|
447
|
+
resources are used when the user invoked each cloud's default
|
448
|
+
CLI/API.
|
449
|
+
|
450
|
+
An identity is a list of strings. The list is in the order of
|
451
|
+
strictness, i.e., the first element is the most strict identity, and
|
452
|
+
the last element is the least strict identity.
|
453
|
+
When performing an identity check between the current active identity
|
454
|
+
and the owner identity associated with a cluster, we compare the two
|
455
|
+
lists in order: if a position does not match, we go to the next. To
|
456
|
+
see an example, see the docstring of the AWS.get_user_identities.
|
457
|
+
|
458
|
+
Example identities (see cloud implementations):
|
459
|
+
- AWS: [UserId, AccountId]
|
460
|
+
- GCP: [email address + project ID]
|
461
|
+
- Azure: [email address + subscription ID]
|
462
|
+
- Kubernetes: [context name]
|
463
|
+
|
464
|
+
Example return values:
|
465
|
+
- AWS: [[UserId, AccountId]]
|
466
|
+
- GCP: [[email address + project ID]]
|
467
|
+
- Azure: [[email address + subscription ID]]
|
468
|
+
- Kubernetes: [[current active context], [context 2], ...]
|
469
|
+
|
470
|
+
Returns:
|
471
|
+
None if the cloud does not have a concept of user identity
|
472
|
+
(access protection will be disabled for these clusters);
|
473
|
+
otherwise a list of available identities with the current active
|
474
|
+
identity being the first element. Most clouds have only one identity
|
475
|
+
available, so the returned list will only have one element: the
|
476
|
+
current active identity.
|
477
|
+
|
478
|
+
Raises:
|
479
|
+
exceptions.CloudUserIdentityError: If the user identity cannot be
|
480
|
+
retrieved.
|
481
|
+
"""
|
482
|
+
return None
|
483
|
+
|
484
|
+
@classmethod
|
485
|
+
def get_active_user_identity(cls) -> Optional[List[str]]:
|
486
|
+
"""Returns currently active user identity of this cloud
|
487
|
+
|
488
|
+
See get_user_identities for definition of user identity.
|
489
|
+
|
490
|
+
Returns:
|
491
|
+
None if the cloud does not have a concept of user identity;
|
492
|
+
otherwise the current active identity.
|
493
|
+
"""
|
494
|
+
identities = cls.get_user_identities()
|
495
|
+
return identities[0] if identities is not None else None
|
496
|
+
|
497
|
+
@classmethod
|
498
|
+
def get_k8s_credential_name(cls) -> str:
|
499
|
+
"""Returns the name of the k8s secret storing the credentials for the store."""
|
500
|
+
raise NotImplementedError
|