skypilot-nightly 1.0.0.dev20250401__py3-none-any.whl → 1.0.0.dev20250402__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +7 -0
- sky/backends/cloud_vm_ray_backend.py +58 -13
- sky/check.py +2 -2
- sky/cli.py +2 -1
- sky/client/cli.py +2 -1
- sky/cloud_stores.py +8 -10
- sky/data/data_utils.py +178 -90
- sky/data/mounting_utils.py +79 -22
- sky/data/storage.py +95 -30
- sky/global_user_state.py +5 -0
- sky/models.py +4 -1
- sky/server/requests/payloads.py +4 -4
- sky/server/server.py +8 -1
- sky/skylet/constants.py +8 -2
- sky/task.py +2 -2
- sky/utils/controller_utils.py +2 -2
- sky/utils/kubernetes/gpu_labeler.py +35 -42
- {skypilot_nightly-1.0.0.dev20250401.dist-info → skypilot_nightly-1.0.0.dev20250402.dist-info}/METADATA +1 -5
- {skypilot_nightly-1.0.0.dev20250401.dist-info → skypilot_nightly-1.0.0.dev20250402.dist-info}/RECORD +24 -24
- {skypilot_nightly-1.0.0.dev20250401.dist-info → skypilot_nightly-1.0.0.dev20250402.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250401.dist-info → skypilot_nightly-1.0.0.dev20250402.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250401.dist-info → skypilot_nightly-1.0.0.dev20250402.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250401.dist-info → skypilot_nightly-1.0.0.dev20250402.dist-info}/top_level.txt +0 -0
sky/data/mounting_utils.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
"""Helper functions for object store mounting in Sky Storage"""
|
2
|
+
import hashlib
|
3
|
+
import os
|
2
4
|
import random
|
3
5
|
import shlex
|
4
6
|
import textwrap
|
5
7
|
from typing import Optional
|
6
8
|
|
7
9
|
from sky import exceptions
|
10
|
+
from sky.skylet import constants
|
8
11
|
from sky.utils import command_runner
|
9
12
|
|
10
13
|
# Values used to construct mounting commands
|
@@ -14,11 +17,17 @@ _TYPE_CACHE_TTL = '5s'
|
|
14
17
|
_RENAME_DIR_LIMIT = 10000
|
15
18
|
# https://github.com/GoogleCloudPlatform/gcsfuse/releases
|
16
19
|
GCSFUSE_VERSION = '2.2.0'
|
20
|
+
# Creates a fusermount3 soft link on older (<22) Ubuntu systems to utilize
|
21
|
+
# Rclone's mounting utility.
|
22
|
+
FUSERMOUNT3_SOFT_LINK_CMD = ('[ ! -f /bin/fusermount3 ] && '
|
23
|
+
'sudo ln -s /bin/fusermount /bin/fusermount3 || '
|
24
|
+
'true')
|
17
25
|
# https://github.com/Azure/azure-storage-fuse/releases
|
18
26
|
BLOBFUSE2_VERSION = '2.2.0'
|
19
27
|
_BLOBFUSE_CACHE_ROOT_DIR = '~/.sky/blobfuse2_cache'
|
20
28
|
_BLOBFUSE_CACHE_DIR = ('~/.sky/blobfuse2_cache/'
|
21
29
|
'{storage_account_name}_{container_name}')
|
30
|
+
# https://github.com/rclone/rclone/releases
|
22
31
|
RCLONE_VERSION = 'v1.68.2'
|
23
32
|
|
24
33
|
|
@@ -112,7 +121,12 @@ def get_az_mount_install_cmd() -> str:
|
|
112
121
|
'sudo apt-get update; '
|
113
122
|
'sudo apt-get install -y '
|
114
123
|
'-o Dpkg::Options::="--force-confdef" '
|
115
|
-
'fuse3 libfuse3-dev
|
124
|
+
'fuse3 libfuse3-dev || { '
|
125
|
+
' echo "fuse3 not available, falling back to fuse"; '
|
126
|
+
' sudo apt-get install -y '
|
127
|
+
' -o Dpkg::Options::="--force-confdef" '
|
128
|
+
' fuse libfuse-dev; '
|
129
|
+
'} && '
|
116
130
|
'ARCH=$(uname -m) && '
|
117
131
|
'if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then '
|
118
132
|
' echo "blobfuse2 is not supported on $ARCH" && '
|
@@ -203,31 +217,17 @@ def get_r2_mount_cmd(r2_credentials_path: str,
|
|
203
217
|
return mount_cmd
|
204
218
|
|
205
219
|
|
206
|
-
def
|
207
|
-
|
208
|
-
install_cmd = ('rclone version >/dev/null 2>&1 || '
|
209
|
-
'(curl https://rclone.org/install.sh | '
|
210
|
-
'sudo bash)')
|
211
|
-
return install_cmd
|
212
|
-
|
213
|
-
|
214
|
-
def get_cos_mount_cmd(rclone_config_data: str,
|
215
|
-
rclone_config_path: str,
|
216
|
-
bucket_rclone_profile: str,
|
220
|
+
def get_cos_mount_cmd(rclone_config: str,
|
221
|
+
rclone_profile_name: str,
|
217
222
|
bucket_name: str,
|
218
223
|
mount_path: str,
|
219
224
|
_bucket_sub_path: Optional[str] = None) -> str:
|
220
225
|
"""Returns a command to mount an IBM COS bucket using rclone."""
|
221
|
-
# creates a fusermount soft link on older (<22) Ubuntu systems for
|
222
|
-
# rclone's mount utility.
|
223
|
-
set_fuser3_soft_link = ('[ ! -f /bin/fusermount3 ] && '
|
224
|
-
'sudo ln -s /bin/fusermount /bin/fusermount3 || '
|
225
|
-
'true')
|
226
226
|
# stores bucket profile in rclone config file at the cluster's nodes.
|
227
|
-
configure_rclone_profile = (f'{
|
228
|
-
'mkdir -p
|
229
|
-
f'echo "{
|
230
|
-
f'{
|
227
|
+
configure_rclone_profile = (f'{FUSERMOUNT3_SOFT_LINK_CMD}; '
|
228
|
+
f'mkdir -p {constants.RCLONE_CONFIG_DIR} && '
|
229
|
+
f'echo "{rclone_config}" >> '
|
230
|
+
f'{constants.RCLONE_CONFIG_PATH}')
|
231
231
|
if _bucket_sub_path is None:
|
232
232
|
sub_path_arg = f'{bucket_name}/{_bucket_sub_path}'
|
233
233
|
else:
|
@@ -235,11 +235,68 @@ def get_cos_mount_cmd(rclone_config_data: str,
|
|
235
235
|
# --daemon will keep the mounting process running in the background.
|
236
236
|
mount_cmd = (f'{configure_rclone_profile} && '
|
237
237
|
'rclone mount '
|
238
|
-
f'{
|
238
|
+
f'{rclone_profile_name}:{sub_path_arg} {mount_path} '
|
239
239
|
'--daemon')
|
240
240
|
return mount_cmd
|
241
241
|
|
242
242
|
|
243
|
+
def get_mount_cached_cmd(rclone_config: str, rclone_profile_name: str,
|
244
|
+
bucket_name: str, mount_path: str) -> str:
|
245
|
+
"""Returns a command to mount a bucket using rclone with vfs cache."""
|
246
|
+
# stores bucket profile in rclone config file at the remote nodes.
|
247
|
+
configure_rclone_profile = (f'{FUSERMOUNT3_SOFT_LINK_CMD}; '
|
248
|
+
f'mkdir -p {constants.RCLONE_CONFIG_DIR} && '
|
249
|
+
f'echo {shlex.quote(rclone_config)} >> '
|
250
|
+
f'{constants.RCLONE_CONFIG_PATH}')
|
251
|
+
# Assume mount path is unique. We use a hash of mount path as
|
252
|
+
# various filenames related to the mount.
|
253
|
+
# This is because the full path may be longer than
|
254
|
+
# the filename length limit.
|
255
|
+
# The hash is a non-negative integer in string form.
|
256
|
+
hashed_mount_path = hashlib.md5(mount_path.encode()).hexdigest()
|
257
|
+
log_file_path = os.path.join(constants.RCLONE_LOG_DIR,
|
258
|
+
f'{hashed_mount_path}.log')
|
259
|
+
create_log_cmd = (f'mkdir -p {constants.RCLONE_LOG_DIR} && '
|
260
|
+
f'touch {log_file_path}')
|
261
|
+
# when mounting multiple directories with vfs cache mode, it's handled by
|
262
|
+
# rclone to create separate cache directories at ~/.cache/rclone/vfs. It is
|
263
|
+
# not necessary to specify separate cache directories.
|
264
|
+
mount_cmd = (
|
265
|
+
f'{create_log_cmd} && '
|
266
|
+
f'{configure_rclone_profile} && '
|
267
|
+
'rclone mount '
|
268
|
+
f'{rclone_profile_name}:{bucket_name} {mount_path} '
|
269
|
+
# '--daemon' keeps the mounting process running in the background.
|
270
|
+
# fail in 10 seconds if mount cannot complete by then,
|
271
|
+
# which should be plenty of time.
|
272
|
+
'--daemon --daemon-wait 10 '
|
273
|
+
f'--log-file {log_file_path} --log-level INFO '
|
274
|
+
# '--dir-cache-time' sets how long directory listings are cached before
|
275
|
+
# rclone checks the remote storage for changes again. A shorter
|
276
|
+
# interval allows for faster detection of new or updated files on the
|
277
|
+
# remote, but increases the frequency of metadata lookups.
|
278
|
+
'--allow-other --vfs-cache-mode full --dir-cache-time 10s '
|
279
|
+
# '--transfers 1' guarantees the files written at the local mount point
|
280
|
+
# to be uploaded to the backend storage in the order of creation.
|
281
|
+
# '--vfs-cache-poll-interval' specifies the frequency of how often
|
282
|
+
# rclone checks the local mount point for stale objects in cache.
|
283
|
+
# '--vfs-write-back' defines the time to write files on remote storage
|
284
|
+
# after last use of the file in local mountpoint.
|
285
|
+
'--transfers 1 --vfs-cache-poll-interval 10s --vfs-write-back 1s '
|
286
|
+
# Have rclone evict files if the cache size exceeds 10G.
|
287
|
+
# This is to prevent cache from growing too large and
|
288
|
+
# using up all the disk space. Note that files that opened
|
289
|
+
# by a process is not evicted from the cache.
|
290
|
+
'--vfs-cache-max-size 10G '
|
291
|
+
# give each mount its own cache directory
|
292
|
+
f'--cache-dir {constants.RCLONE_CACHE_DIR}/{hashed_mount_path} '
|
293
|
+
# This command produces children processes, which need to be
|
294
|
+
# detached from the current process's terminal. The command doesn't
|
295
|
+
# produce any output, so we aren't dropping any logs.
|
296
|
+
'> /dev/null 2>&1')
|
297
|
+
return mount_cmd
|
298
|
+
|
299
|
+
|
243
300
|
def get_rclone_install_cmd() -> str:
|
244
301
|
""" RClone installation for both apt-get and rpm.
|
245
302
|
This would be common command.
|
sky/data/storage.py
CHANGED
@@ -30,7 +30,6 @@ from sky.data import data_transfer
|
|
30
30
|
from sky.data import data_utils
|
31
31
|
from sky.data import mounting_utils
|
32
32
|
from sky.data import storage_utils
|
33
|
-
from sky.data.data_utils import Rclone
|
34
33
|
from sky.skylet import constants
|
35
34
|
from sky.utils import common_utils
|
36
35
|
from sky.utils import rich_utils
|
@@ -266,6 +265,13 @@ class StoreType(enum.Enum):
|
|
266
265
|
class StorageMode(enum.Enum):
|
267
266
|
MOUNT = 'MOUNT'
|
268
267
|
COPY = 'COPY'
|
268
|
+
MOUNT_CACHED = 'MOUNT_CACHED'
|
269
|
+
|
270
|
+
|
271
|
+
MOUNTABLE_STORAGE_MODES = [
|
272
|
+
StorageMode.MOUNT,
|
273
|
+
StorageMode.MOUNT_CACHED,
|
274
|
+
]
|
269
275
|
|
270
276
|
|
271
277
|
class AbstractStore:
|
@@ -451,13 +457,27 @@ class AbstractStore:
|
|
451
457
|
def mount_command(self, mount_path: str) -> str:
|
452
458
|
"""Returns the command to mount the Store to the specified mount_path.
|
453
459
|
|
454
|
-
Includes the setup commands to
|
460
|
+
This command is used for MOUNT mode. Includes the setup commands to
|
461
|
+
install mounting tools.
|
455
462
|
|
456
463
|
Args:
|
457
464
|
mount_path: str; Mount path on remote server
|
458
465
|
"""
|
459
466
|
raise NotImplementedError
|
460
467
|
|
468
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
469
|
+
"""Returns the command to mount the Store to the specified mount_path.
|
470
|
+
|
471
|
+
This command is used for MOUNT_CACHED mode. Includes the setup commands
|
472
|
+
to install mounting tools.
|
473
|
+
|
474
|
+
Args:
|
475
|
+
mount_path: str; Mount path on remote server
|
476
|
+
"""
|
477
|
+
raise exceptions.NotSupportedError(
|
478
|
+
f'{StorageMode.MOUNT_CACHED.value} is '
|
479
|
+
f'not supported for {self.name}.')
|
480
|
+
|
461
481
|
def __deepcopy__(self, memo):
|
462
482
|
# S3 Client and GCS Client cannot be deep copied, hence the
|
463
483
|
# original Store object is returned
|
@@ -1725,6 +1745,17 @@ class S3Store(AbstractStore):
|
|
1725
1745
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
1726
1746
|
mount_cmd)
|
1727
1747
|
|
1748
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
1749
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
1750
|
+
rclone_profile_name = (
|
1751
|
+
data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
|
1752
|
+
rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
|
1753
|
+
rclone_profile_name=rclone_profile_name)
|
1754
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
1755
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
1756
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
1757
|
+
mount_cached_cmd)
|
1758
|
+
|
1728
1759
|
def _create_s3_bucket(self,
|
1729
1760
|
bucket_name: str,
|
1730
1761
|
region=_DEFAULT_REGION) -> StorageHandle:
|
@@ -2252,6 +2283,17 @@ class GcsStore(AbstractStore):
|
|
2252
2283
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
2253
2284
|
mount_cmd, version_check_cmd)
|
2254
2285
|
|
2286
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
2287
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
2288
|
+
rclone_profile_name = (
|
2289
|
+
data_utils.Rclone.RcloneStores.GCS.get_profile_name(self.name))
|
2290
|
+
rclone_config = data_utils.Rclone.RcloneStores.GCS.get_config(
|
2291
|
+
rclone_profile_name=rclone_profile_name)
|
2292
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
2293
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
2294
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
2295
|
+
mount_cached_cmd)
|
2296
|
+
|
2255
2297
|
def _download_file(self, remote_path: str, local_path: str) -> None:
|
2256
2298
|
"""Downloads file from remote to local on GS bucket
|
2257
2299
|
|
@@ -3126,6 +3168,19 @@ class AzureBlobStore(AbstractStore):
|
|
3126
3168
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3127
3169
|
mount_cmd)
|
3128
3170
|
|
3171
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
3172
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
3173
|
+
rclone_profile_name = (
|
3174
|
+
data_utils.Rclone.RcloneStores.AZURE.get_profile_name(self.name))
|
3175
|
+
rclone_config = data_utils.Rclone.RcloneStores.AZURE.get_config(
|
3176
|
+
rclone_profile_name=rclone_profile_name,
|
3177
|
+
storage_account_name=self.storage_account_name,
|
3178
|
+
storage_account_key=self.storage_account_key)
|
3179
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
3180
|
+
rclone_config, rclone_profile_name, self.container_name, mount_path)
|
3181
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3182
|
+
mount_cached_cmd)
|
3183
|
+
|
3129
3184
|
def _create_az_bucket(self, container_name: str) -> StorageHandle:
|
3130
3185
|
"""Creates AZ Container.
|
3131
3186
|
|
@@ -3562,6 +3617,17 @@ class R2Store(AbstractStore):
|
|
3562
3617
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3563
3618
|
mount_cmd)
|
3564
3619
|
|
3620
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
3621
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
3622
|
+
rclone_profile_name = (
|
3623
|
+
data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
|
3624
|
+
rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
|
3625
|
+
rclone_profile_name=rclone_profile_name)
|
3626
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
3627
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
3628
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3629
|
+
mount_cached_cmd)
|
3630
|
+
|
3565
3631
|
def _create_r2_bucket(self,
|
3566
3632
|
bucket_name: str,
|
3567
3633
|
region='auto') -> StorageHandle:
|
@@ -3681,11 +3747,10 @@ class IBMCosStore(AbstractStore):
|
|
3681
3747
|
_bucket_sub_path: Optional[str] = None):
|
3682
3748
|
self.client: 'storage.Client'
|
3683
3749
|
self.bucket: 'StorageHandle'
|
3750
|
+
self.rclone_profile_name = (
|
3751
|
+
data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
|
3684
3752
|
super().__init__(name, source, region, is_sky_managed,
|
3685
3753
|
sync_on_reconstruction, _bucket_sub_path)
|
3686
|
-
self.bucket_rclone_profile = \
|
3687
|
-
Rclone.generate_rclone_bucket_profile_name(
|
3688
|
-
self.name, Rclone.RcloneClouds.IBM)
|
3689
3754
|
|
3690
3755
|
def _validate(self):
|
3691
3756
|
if self.source is not None and isinstance(self.source, str):
|
@@ -3897,11 +3962,10 @@ class IBMCosStore(AbstractStore):
|
|
3897
3962
|
# .git directory is excluded from the sync
|
3898
3963
|
# wrapping src_dir_path with "" to support path with spaces
|
3899
3964
|
src_dir_path = shlex.quote(src_dir_path)
|
3900
|
-
sync_command = (
|
3901
|
-
|
3902
|
-
|
3903
|
-
|
3904
|
-
f'/{dest_dir_name}')
|
3965
|
+
sync_command = ('rclone copy --exclude ".git/*" '
|
3966
|
+
f'{src_dir_path} '
|
3967
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}'
|
3968
|
+
f'/{dest_dir_name}')
|
3905
3969
|
return sync_command
|
3906
3970
|
|
3907
3971
|
def get_file_sync_command(base_dir_path, file_names) -> str:
|
@@ -3927,10 +3991,9 @@ class IBMCosStore(AbstractStore):
|
|
3927
3991
|
for file_name in file_names
|
3928
3992
|
])
|
3929
3993
|
base_dir_path = shlex.quote(base_dir_path)
|
3930
|
-
sync_command = (
|
3931
|
-
|
3932
|
-
|
3933
|
-
f'{self.bucket_rclone_profile}:{self.name}{sub_path}')
|
3994
|
+
sync_command = ('rclone copy '
|
3995
|
+
f'{includes} {base_dir_path} '
|
3996
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}')
|
3934
3997
|
return sync_command
|
3935
3998
|
|
3936
3999
|
# Generate message for upload
|
@@ -3976,7 +4039,8 @@ class IBMCosStore(AbstractStore):
|
|
3976
4039
|
'sky storage delete' or 'sky start'
|
3977
4040
|
"""
|
3978
4041
|
|
3979
|
-
bucket_profile_name = Rclone.
|
4042
|
+
bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
|
4043
|
+
self.name)
|
3980
4044
|
try:
|
3981
4045
|
bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
|
3982
4046
|
except exceptions.StorageBucketGetError as e:
|
@@ -4011,9 +4075,9 @@ class IBMCosStore(AbstractStore):
|
|
4011
4075
|
'`rclone lsd <remote>` on relevant remotes returned '
|
4012
4076
|
'via `rclone listremotes` to debug.')
|
4013
4077
|
|
4014
|
-
Rclone.store_rclone_config(
|
4078
|
+
data_utils.Rclone.store_rclone_config(
|
4015
4079
|
self.name,
|
4016
|
-
Rclone.
|
4080
|
+
data_utils.Rclone.RcloneStores.IBM,
|
4017
4081
|
self.region, # type: ignore
|
4018
4082
|
)
|
4019
4083
|
|
@@ -4053,18 +4117,18 @@ class IBMCosStore(AbstractStore):
|
|
4053
4117
|
mount_path: str; Path to mount the bucket to.
|
4054
4118
|
"""
|
4055
4119
|
# install rclone if not installed.
|
4056
|
-
install_cmd = mounting_utils.
|
4057
|
-
|
4058
|
-
self.
|
4059
|
-
|
4060
|
-
|
4061
|
-
|
4062
|
-
|
4063
|
-
|
4064
|
-
|
4065
|
-
|
4066
|
-
|
4067
|
-
|
4120
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
4121
|
+
rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
|
4122
|
+
rclone_profile_name=self.rclone_profile_name,
|
4123
|
+
region=self.region) # type: ignore
|
4124
|
+
mount_cmd = (
|
4125
|
+
mounting_utils.get_cos_mount_cmd(
|
4126
|
+
rclone_config,
|
4127
|
+
self.rclone_profile_name,
|
4128
|
+
self.bucket.name,
|
4129
|
+
mount_path,
|
4130
|
+
self._bucket_sub_path, # type: ignore
|
4131
|
+
))
|
4068
4132
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
4069
4133
|
mount_cmd)
|
4070
4134
|
|
@@ -4128,7 +4192,8 @@ class IBMCosStore(AbstractStore):
|
|
4128
4192
|
except ibm.ibm_botocore.exceptions.ClientError as e:
|
4129
4193
|
if e.__class__.__name__ == 'NoSuchBucket':
|
4130
4194
|
logger.debug('bucket already removed')
|
4131
|
-
Rclone.delete_rclone_bucket_profile(
|
4195
|
+
data_utils.Rclone.delete_rclone_bucket_profile(
|
4196
|
+
self.name, data_utils.Rclone.RcloneStores.IBM)
|
4132
4197
|
|
4133
4198
|
|
4134
4199
|
class OciStore(AbstractStore):
|
sky/global_user_state.py
CHANGED
@@ -186,6 +186,11 @@ def get_user(user_id: str) -> models.User:
|
|
186
186
|
return models.User(id=row[0], name=row[1])
|
187
187
|
|
188
188
|
|
189
|
+
def get_all_users() -> List[models.User]:
|
190
|
+
rows = _DB.cursor.execute('SELECT id, name FROM users').fetchall()
|
191
|
+
return [models.User(id=row[0], name=row[1]) for row in rows]
|
192
|
+
|
193
|
+
|
189
194
|
def add_or_update_cluster(cluster_name: str,
|
190
195
|
cluster_handle: 'backends.ResourceHandle',
|
191
196
|
requested_resources: Optional[Set[Any]],
|
sky/models.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
import collections
|
4
4
|
import dataclasses
|
5
|
-
from typing import Dict, Optional
|
5
|
+
from typing import Any, Dict, Optional
|
6
6
|
|
7
7
|
|
8
8
|
@dataclasses.dataclass
|
@@ -12,6 +12,9 @@ class User:
|
|
12
12
|
# Display name of the user
|
13
13
|
name: Optional[str] = None
|
14
14
|
|
15
|
+
def to_dict(self) -> Dict[str, Any]:
|
16
|
+
return {'id': self.id, 'name': self.name}
|
17
|
+
|
15
18
|
|
16
19
|
RealtimeGpuAvailability = collections.namedtuple(
|
17
20
|
'RealtimeGpuAvailability', ['gpu', 'counts', 'capacity', 'available'])
|
sky/server/requests/payloads.py
CHANGED
@@ -115,8 +115,8 @@ class RequestBody(pydantic.BaseModel):
|
|
115
115
|
|
116
116
|
class CheckBody(RequestBody):
|
117
117
|
"""The request body for the check endpoint."""
|
118
|
-
clouds: Optional[Tuple[str, ...]]
|
119
|
-
verbose: bool
|
118
|
+
clouds: Optional[Tuple[str, ...]] = None
|
119
|
+
verbose: bool = False
|
120
120
|
|
121
121
|
|
122
122
|
class DagRequestBody(RequestBody):
|
@@ -340,8 +340,8 @@ class JobsQueueBody(RequestBody):
|
|
340
340
|
|
341
341
|
class JobsCancelBody(RequestBody):
|
342
342
|
"""The request body for the jobs cancel endpoint."""
|
343
|
-
name: Optional[str]
|
344
|
-
job_ids: Optional[List[int]]
|
343
|
+
name: Optional[str] = None
|
344
|
+
job_ids: Optional[List[int]] = None
|
345
345
|
all: bool = False
|
346
346
|
all_users: bool = False
|
347
347
|
|
sky/server/server.py
CHANGED
@@ -12,7 +12,7 @@ import pathlib
|
|
12
12
|
import re
|
13
13
|
import shutil
|
14
14
|
import sys
|
15
|
-
from typing import Dict, List, Literal, Optional, Set, Tuple
|
15
|
+
from typing import Any, Dict, List, Literal, Optional, Set, Tuple
|
16
16
|
import uuid
|
17
17
|
import zipfile
|
18
18
|
|
@@ -675,6 +675,13 @@ async def logs(
|
|
675
675
|
)
|
676
676
|
|
677
677
|
|
678
|
+
@app.get('/users')
|
679
|
+
async def users() -> List[Dict[str, Any]]:
|
680
|
+
"""Gets all users."""
|
681
|
+
user_list = global_user_state.get_all_users()
|
682
|
+
return [user.to_dict() for user in user_list]
|
683
|
+
|
684
|
+
|
678
685
|
@app.post('/download_logs')
|
679
686
|
async def download_logs(
|
680
687
|
request: fastapi.Request,
|
sky/skylet/constants.py
CHANGED
@@ -51,7 +51,7 @@ SKY_RAY_CMD = (f'{SKY_PYTHON_CMD} $([ -s {SKY_RAY_PATH_FILE} ] && '
|
|
51
51
|
f'cat {SKY_RAY_PATH_FILE} 2> /dev/null || which ray)')
|
52
52
|
# Separate env for SkyPilot runtime dependencies.
|
53
53
|
SKY_REMOTE_PYTHON_ENV_NAME = 'skypilot-runtime'
|
54
|
-
SKY_REMOTE_PYTHON_ENV = f'~/{SKY_REMOTE_PYTHON_ENV_NAME}'
|
54
|
+
SKY_REMOTE_PYTHON_ENV: str = f'~/{SKY_REMOTE_PYTHON_ENV_NAME}'
|
55
55
|
ACTIVATE_SKY_REMOTE_PYTHON_ENV = f'source {SKY_REMOTE_PYTHON_ENV}/bin/activate'
|
56
56
|
# uv is used for venv and pip, much faster than python implementations.
|
57
57
|
SKY_UV_INSTALL_DIR = '"$HOME/.local/bin"'
|
@@ -60,7 +60,7 @@ SKY_UV_CMD = f'UV_SYSTEM_PYTHON=false {SKY_UV_INSTALL_DIR}/uv'
|
|
60
60
|
SKY_UV_INSTALL_CMD = (f'{SKY_UV_CMD} -V >/dev/null 2>&1 || '
|
61
61
|
'curl -LsSf https://astral.sh/uv/install.sh '
|
62
62
|
f'| UV_INSTALL_DIR={SKY_UV_INSTALL_DIR} sh')
|
63
|
-
SKY_UV_PIP_CMD = f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} pip'
|
63
|
+
SKY_UV_PIP_CMD: str = (f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} pip')
|
64
64
|
# Deleting the SKY_REMOTE_PYTHON_ENV_NAME from the PATH to deactivate the
|
65
65
|
# environment. `deactivate` command does not work when conda is used.
|
66
66
|
DEACTIVATE_SKY_REMOTE_PYTHON_ENV = (
|
@@ -331,6 +331,12 @@ SKYPILOT_NODE_RANK = f'{SKYPILOT_ENV_VAR_PREFIX}NODE_RANK'
|
|
331
331
|
# known after provisioning.
|
332
332
|
SKY_SSH_USER_PLACEHOLDER = 'skypilot:ssh_user'
|
333
333
|
|
334
|
+
RCLONE_CONFIG_DIR = '~/.config/rclone'
|
335
|
+
RCLONE_CONFIG_PATH = f'{RCLONE_CONFIG_DIR}/rclone.conf'
|
336
|
+
RCLONE_LOG_DIR = '~/.sky/rclone_log'
|
337
|
+
RCLONE_CACHE_DIR = '~/.cache/rclone'
|
338
|
+
RCLONE_CACHE_REFRESH_INTERVAL = 10
|
339
|
+
|
334
340
|
# The keys that can be overridden in the `~/.sky/config.yaml` file. The
|
335
341
|
# overrides are specified in task YAMLs.
|
336
342
|
OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
|
sky/task.py
CHANGED
@@ -1128,7 +1128,7 @@ class Task:
|
|
1128
1128
|
assert storage.name is not None, storage
|
1129
1129
|
# extract region from rclone.conf
|
1130
1130
|
cos_region = data_utils.Rclone.get_region_from_rclone(
|
1131
|
-
storage.name, data_utils.Rclone.
|
1131
|
+
storage.name, data_utils.Rclone.RcloneStores.IBM)
|
1132
1132
|
blob_path = f'cos://{cos_region}/{storage.name}'
|
1133
1133
|
blob_path = storage.get_bucket_sub_path_prefix(blob_path)
|
1134
1134
|
self.update_file_mounts({mnt_path: blob_path})
|
@@ -1268,7 +1268,7 @@ class Task:
|
|
1268
1268
|
|
1269
1269
|
# Storage mounting
|
1270
1270
|
for _, storage_mount in self.storage_mounts.items():
|
1271
|
-
if storage_mount.mode
|
1271
|
+
if storage_mount.mode in storage_lib.MOUNTABLE_STORAGE_MODES:
|
1272
1272
|
required_features.add(
|
1273
1273
|
clouds.CloudImplementationFeatures.STORAGE_MOUNTING)
|
1274
1274
|
break
|
sky/utils/controller_utils.py
CHANGED
@@ -1025,7 +1025,7 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
1025
1025
|
# it was handled in step 6.
|
1026
1026
|
updated_mount_storages = {}
|
1027
1027
|
for storage_path, storage_obj in task.storage_mounts.items():
|
1028
|
-
if (storage_obj.mode
|
1028
|
+
if (storage_obj.mode in storage_lib.MOUNTABLE_STORAGE_MODES and
|
1029
1029
|
not storage_obj.source):
|
1030
1030
|
# Construct source URL with first store type and storage name
|
1031
1031
|
# E.g., s3://my-storage-name
|
@@ -1043,7 +1043,7 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
1043
1043
|
new_storage = storage_lib.Storage.from_yaml_config({
|
1044
1044
|
'source': source,
|
1045
1045
|
'persistent': storage_obj.persistent,
|
1046
|
-
'mode':
|
1046
|
+
'mode': storage_obj.mode.value,
|
1047
1047
|
# We enable force delete to allow the controller to delete
|
1048
1048
|
# the object store in case persistent is set to False.
|
1049
1049
|
'_force_delete': True
|