skypilot-nightly 1.0.0.dev20250408__py3-none-any.whl → 1.0.0.dev20250410__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/azure.py +1 -1
- sky/adaptors/nebius.py +5 -27
- sky/backends/backend.py +9 -7
- sky/backends/cloud_vm_ray_backend.py +7 -7
- sky/backends/local_docker_backend.py +3 -3
- sky/cloud_stores.py +0 -4
- sky/clouds/do.py +4 -5
- sky/clouds/gcp.py +5 -3
- sky/clouds/nebius.py +22 -12
- sky/clouds/service_catalog/data_fetchers/fetch_ibm.py +1 -2
- sky/clouds/service_catalog/gcp_catalog.py +37 -10
- sky/core.py +6 -6
- sky/data/data_utils.py +5 -9
- sky/data/mounting_utils.py +1 -1
- sky/data/storage.py +25 -31
- sky/data/storage_utils.py +27 -18
- sky/execution.py +11 -4
- sky/jobs/server/server.py +5 -1
- sky/provision/do/utils.py +19 -16
- sky/provision/gcp/config.py +30 -20
- sky/setup_files/dependencies.py +1 -1
- sky/skylet/log_lib.py +4 -0
- sky/task.py +27 -7
- sky/utils/schemas.py +25 -7
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/METADATA +2 -2
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/RECORD +31 -31
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/top_level.txt +0 -0
sky/data/storage.py
CHANGED
@@ -1616,9 +1616,25 @@ class S3Store(AbstractStore):
|
|
1616
1616
|
# we exclude .git directory from the sync
|
1617
1617
|
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
1618
1618
|
excluded_list.append('.git/*')
|
1619
|
+
|
1620
|
+
# Process exclusion patterns to make them work correctly with aws
|
1621
|
+
# s3 sync
|
1622
|
+
processed_excludes = []
|
1623
|
+
for excluded_path in excluded_list:
|
1624
|
+
# Check if the path is a directory exclusion pattern
|
1625
|
+
# For AWS S3 sync, directory patterns need to end with "/**" to
|
1626
|
+
# exclude all contents
|
1627
|
+
if (excluded_path.endswith('/') or os.path.isdir(
|
1628
|
+
os.path.join(src_dir_path, excluded_path.rstrip('/')))):
|
1629
|
+
# Remove any trailing slash and add '/*' to exclude all
|
1630
|
+
# contents
|
1631
|
+
processed_excludes.append(f'{excluded_path.rstrip("/")}/*')
|
1632
|
+
else:
|
1633
|
+
processed_excludes.append(excluded_path)
|
1634
|
+
|
1619
1635
|
excludes = ' '.join([
|
1620
1636
|
f'--exclude {shlex.quote(file_name)}'
|
1621
|
-
for file_name in
|
1637
|
+
for file_name in processed_excludes
|
1622
1638
|
])
|
1623
1639
|
src_dir_path = shlex.quote(src_dir_path)
|
1624
1640
|
sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
|
@@ -4676,7 +4692,6 @@ class NebiusStore(AbstractStore):
|
|
4676
4692
|
_bucket_sub_path: Optional[str] = None):
|
4677
4693
|
self.client: 'boto3.client.Client'
|
4678
4694
|
self.bucket: 'StorageHandle'
|
4679
|
-
self.region = region if region is not None else nebius.DEFAULT_REGION
|
4680
4695
|
super().__init__(name, source, region, is_sky_managed,
|
4681
4696
|
sync_on_reconstruction, _bucket_sub_path)
|
4682
4697
|
|
@@ -4749,7 +4764,7 @@ class NebiusStore(AbstractStore):
|
|
4749
4764
|
StorageBucketGetError: If fetching existing bucket fails
|
4750
4765
|
StorageInitError: If general initialization fails.
|
4751
4766
|
"""
|
4752
|
-
self.client = data_utils.create_nebius_client(
|
4767
|
+
self.client = data_utils.create_nebius_client()
|
4753
4768
|
self.bucket, is_new_bucket = self._get_bucket()
|
4754
4769
|
if self.is_sky_managed is None:
|
4755
4770
|
# If is_sky_managed is not specified, then this is a new storage
|
@@ -4846,12 +4861,10 @@ class NebiusStore(AbstractStore):
|
|
4846
4861
|
f'--include {shlex.quote(file_name)}'
|
4847
4862
|
for file_name in file_names
|
4848
4863
|
])
|
4849
|
-
endpoint_url = nebius.create_endpoint(self.region)
|
4850
4864
|
base_dir_path = shlex.quote(base_dir_path)
|
4851
4865
|
sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
|
4852
4866
|
f'{includes} {base_dir_path} '
|
4853
4867
|
f's3://{self.name}{sub_path} '
|
4854
|
-
f'--endpoint={endpoint_url} '
|
4855
4868
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4856
4869
|
return sync_command
|
4857
4870
|
|
@@ -4863,12 +4876,10 @@ class NebiusStore(AbstractStore):
|
|
4863
4876
|
f'--exclude {shlex.quote(file_name)}'
|
4864
4877
|
for file_name in excluded_list
|
4865
4878
|
])
|
4866
|
-
endpoint_url = nebius.create_endpoint(self.region)
|
4867
4879
|
src_dir_path = shlex.quote(src_dir_path)
|
4868
4880
|
sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
|
4869
4881
|
f'{src_dir_path} '
|
4870
4882
|
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
4871
|
-
f'--endpoint={endpoint_url} '
|
4872
4883
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4873
4884
|
return sync_command
|
4874
4885
|
|
@@ -4927,7 +4938,6 @@ class NebiusStore(AbstractStore):
|
|
4927
4938
|
"""
|
4928
4939
|
nebius_s = nebius.resource('s3')
|
4929
4940
|
bucket = nebius_s.Bucket(self.name)
|
4930
|
-
endpoint_url = nebius.create_endpoint(self.region)
|
4931
4941
|
try:
|
4932
4942
|
# Try Public bucket case.
|
4933
4943
|
# This line does not error out if the bucket is an external public
|
@@ -4942,7 +4952,6 @@ class NebiusStore(AbstractStore):
|
|
4942
4952
|
# user.
|
4943
4953
|
if error_code == '403':
|
4944
4954
|
command = (f'aws s3 ls s3://{self.name} '
|
4945
|
-
f'--endpoint={endpoint_url} '
|
4946
4955
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4947
4956
|
with ux_utils.print_exception_no_traceback():
|
4948
4957
|
raise exceptions.StorageBucketGetError(
|
@@ -4954,7 +4963,7 @@ class NebiusStore(AbstractStore):
|
|
4954
4963
|
raise exceptions.StorageBucketGetError(
|
4955
4964
|
'Attempted to use a non-existent bucket as a source: '
|
4956
4965
|
f'{self.source}. Consider using `aws s3 ls '
|
4957
|
-
f's3://{self.name}
|
4966
|
+
f's3://{self.name} '
|
4958
4967
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
|
4959
4968
|
|
4960
4969
|
# If bucket cannot be found in both private and public settings,
|
@@ -4962,7 +4971,7 @@ class NebiusStore(AbstractStore):
|
|
4962
4971
|
# Store object is being reconstructed for deletion or re-mount with
|
4963
4972
|
# sky start, and error is raised instead.
|
4964
4973
|
if self.sync_on_reconstruction:
|
4965
|
-
bucket = self._create_nebius_bucket(self.name
|
4974
|
+
bucket = self._create_nebius_bucket(self.name)
|
4966
4975
|
return bucket, True
|
4967
4976
|
else:
|
4968
4977
|
# Raised when Storage object is reconstructed for sky storage
|
@@ -4991,38 +5000,27 @@ class NebiusStore(AbstractStore):
|
|
4991
5000
|
mount_path: str; Path to mount the bucket to.
|
4992
5001
|
"""
|
4993
5002
|
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
4994
|
-
endpoint_url = nebius.create_endpoint(self.region)
|
4995
5003
|
nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
|
5004
|
+
endpoint_url = self.client.meta.endpoint_url
|
4996
5005
|
mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
|
4997
|
-
endpoint_url,
|
4998
5006
|
self.bucket.name,
|
5007
|
+
endpoint_url,
|
4999
5008
|
mount_path,
|
5000
5009
|
self._bucket_sub_path)
|
5001
5010
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
5002
5011
|
mount_cmd)
|
5003
5012
|
|
5004
|
-
def _create_nebius_bucket(self,
|
5005
|
-
|
5006
|
-
region='auto') -> StorageHandle:
|
5007
|
-
"""Creates S3 bucket with specific name in specific region
|
5013
|
+
def _create_nebius_bucket(self, bucket_name: str) -> StorageHandle:
|
5014
|
+
"""Creates S3 bucket with specific name
|
5008
5015
|
|
5009
5016
|
Args:
|
5010
5017
|
bucket_name: str; Name of bucket
|
5011
|
-
region: str; Region name, e.g. us-west-1, us-east-2
|
5012
5018
|
Raises:
|
5013
5019
|
StorageBucketCreateError: If bucket creation fails.
|
5014
5020
|
"""
|
5015
5021
|
nebius_client = self.client
|
5016
5022
|
try:
|
5017
|
-
|
5018
|
-
nebius_client.create_bucket(Bucket=bucket_name)
|
5019
|
-
else:
|
5020
|
-
location = {'LocationConstraint': region}
|
5021
|
-
nebius_client.create_bucket(Bucket=bucket_name,
|
5022
|
-
CreateBucketConfiguration=location)
|
5023
|
-
logger.info(f' {colorama.Style.DIM}Created Nebius bucket '
|
5024
|
-
f'{bucket_name!r} in {region}'
|
5025
|
-
f'{colorama.Style.RESET_ALL}')
|
5023
|
+
nebius_client.create_bucket(Bucket=bucket_name)
|
5026
5024
|
except aws.botocore_exceptions().ClientError as e:
|
5027
5025
|
with ux_utils.print_exception_no_traceback():
|
5028
5026
|
raise exceptions.StorageBucketCreateError(
|
@@ -5070,9 +5068,7 @@ class NebiusStore(AbstractStore):
|
|
5070
5068
|
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
|
5071
5069
|
# The fastest way to delete is to run `aws s3 rb --force`,
|
5072
5070
|
# which removes the bucket by force.
|
5073
|
-
endpoint_url = nebius.create_endpoint(self.region)
|
5074
5071
|
remove_command = (f'aws s3 rb s3://{bucket_name} --force '
|
5075
|
-
f'--endpoint {endpoint_url} '
|
5076
5072
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
5077
5073
|
|
5078
5074
|
success = self._execute_nebius_remove_command(
|
@@ -5094,10 +5090,8 @@ class NebiusStore(AbstractStore):
|
|
5094
5090
|
def _delete_nebius_bucket_sub_path(self, bucket_name: str,
|
5095
5091
|
sub_path: str) -> bool:
|
5096
5092
|
"""Deletes the sub path from the bucket."""
|
5097
|
-
endpoint_url = nebius.create_endpoint(self.region)
|
5098
5093
|
remove_command = (
|
5099
5094
|
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
5100
|
-
f'--endpoint {endpoint_url} '
|
5101
5095
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
5102
5096
|
return self._execute_nebius_remove_command(
|
5103
5097
|
remove_command, bucket_name, f'Removing objects from '
|
sky/data/storage_utils.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4
4
|
import pathlib
|
5
5
|
import shlex
|
6
6
|
import subprocess
|
7
|
-
from typing import Any, Dict, List, Optional, TextIO, Union
|
7
|
+
from typing import Any, Dict, List, Optional, Set, TextIO, Union
|
8
8
|
import warnings
|
9
9
|
import zipfile
|
10
10
|
|
@@ -71,7 +71,7 @@ def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
|
|
71
71
|
"""List files and patterns ignored by the .skyignore file
|
72
72
|
in the given source directory.
|
73
73
|
"""
|
74
|
-
excluded_list:
|
74
|
+
excluded_list: Set[str] = set()
|
75
75
|
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
76
76
|
skyignore_path = os.path.join(expand_src_dir_path,
|
77
77
|
constants.SKY_IGNORE_FILE)
|
@@ -95,12 +95,12 @@ def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
|
|
95
95
|
for i in range(len(matching_files)):
|
96
96
|
matching_files[i] = os.path.relpath(
|
97
97
|
matching_files[i], expand_src_dir_path)
|
98
|
-
excluded_list.
|
98
|
+
excluded_list.update(matching_files)
|
99
99
|
except IOError as e:
|
100
100
|
logger.warning(f'Error reading {skyignore_path}: '
|
101
101
|
f'{common_utils.format_exception(e, use_bracket=True)}')
|
102
102
|
|
103
|
-
return excluded_list
|
103
|
+
return list(excluded_list)
|
104
104
|
|
105
105
|
|
106
106
|
def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
@@ -111,8 +111,8 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
|
111
111
|
This will also be run for all submodules under the src_dir_path.
|
112
112
|
|
113
113
|
Returns:
|
114
|
-
List[str] containing files and
|
115
|
-
patterns
|
114
|
+
List[str] containing files and folders to be ignored. There won't be any
|
115
|
+
patterns.
|
116
116
|
"""
|
117
117
|
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
118
118
|
|
@@ -210,10 +210,6 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
|
210
210
|
return []
|
211
211
|
|
212
212
|
to_be_excluded = os.path.join(repo, item)
|
213
|
-
if item.endswith('/'):
|
214
|
-
# aws s3 sync and gsutil rsync require * to exclude
|
215
|
-
# files/dirs under the specified directory.
|
216
|
-
to_be_excluded += '*'
|
217
213
|
|
218
214
|
excluded_list.append(to_be_excluded)
|
219
215
|
|
@@ -223,11 +219,21 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
|
223
219
|
def get_excluded_files(src_dir_path: str) -> List[str]:
|
224
220
|
# TODO: this could return a huge list of files,
|
225
221
|
# should think of ways to optimize.
|
226
|
-
"""List files and directories to be excluded.
|
222
|
+
"""List files and directories to be excluded.
|
223
|
+
|
224
|
+
Args:
|
225
|
+
src_dir_path (str): The path to the source directory.
|
226
|
+
|
227
|
+
Returns:
|
228
|
+
A list of relative paths to files and directories to be excluded from
|
229
|
+
the source directory.
|
230
|
+
"""
|
227
231
|
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
228
232
|
skyignore_path = os.path.join(expand_src_dir_path,
|
229
233
|
constants.SKY_IGNORE_FILE)
|
230
234
|
# Fail fast if the source is a file.
|
235
|
+
if not os.path.exists(expand_src_dir_path):
|
236
|
+
raise ValueError(f'{src_dir_path} does not exist.')
|
231
237
|
if os.path.isfile(expand_src_dir_path):
|
232
238
|
raise ValueError(f'{src_dir_path} is a file, not a directory.')
|
233
239
|
if os.path.exists(skyignore_path):
|
@@ -235,12 +241,14 @@ def get_excluded_files(src_dir_path: str) -> List[str]:
|
|
235
241
|
f'Excluded files to sync to cluster based on '
|
236
242
|
f'{constants.SKY_IGNORE_FILE}.'
|
237
243
|
f'{colorama.Style.RESET_ALL}')
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
+
excluded_paths = get_excluded_files_from_skyignore(src_dir_path)
|
245
|
+
else:
|
246
|
+
logger.debug(f' {colorama.Style.DIM}'
|
247
|
+
f'Excluded files to sync to cluster based on '
|
248
|
+
f'{constants.GIT_IGNORE_FILE}.'
|
249
|
+
f'{colorama.Style.RESET_ALL}')
|
250
|
+
excluded_paths = get_excluded_files_from_gitignore(src_dir_path)
|
251
|
+
return excluded_paths
|
244
252
|
|
245
253
|
|
246
254
|
def zip_files_and_folders(items: List[str],
|
@@ -277,7 +285,8 @@ def zip_files_and_folders(items: List[str],
|
|
277
285
|
zipf.write(item)
|
278
286
|
elif os.path.isdir(item):
|
279
287
|
excluded_files = set([
|
280
|
-
os.path.join(item, f
|
288
|
+
os.path.join(item, f.rstrip('/'))
|
289
|
+
for f in get_excluded_files(item)
|
281
290
|
])
|
282
291
|
for root, dirs, files in os.walk(item, followlinks=False):
|
283
292
|
# Modify dirs in-place to control os.walk()'s traversal
|
sky/execution.py
CHANGED
@@ -159,9 +159,9 @@ def _execute(
|
|
159
159
|
no_setup: bool; whether to skip setup commands or not when (re-)launching.
|
160
160
|
clone_disk_from: Optional[str]; if set, clone the disk from the specified
|
161
161
|
cluster.
|
162
|
-
|
162
|
+
skip_unnecessary_provisioning: bool; if True, compare the calculated
|
163
163
|
cluster config to the current cluster's config. If they match, shortcut
|
164
|
-
provisioning even if we have Stage.PROVISION.
|
164
|
+
provisioning and setup, even if we have Stage.PROVISION and Stage.SETUP.
|
165
165
|
|
166
166
|
Returns:
|
167
167
|
job_id: Optional[int]; the job ID of the submitted job. None if the
|
@@ -303,12 +303,13 @@ def _execute(
|
|
303
303
|
task.sync_storage_mounts()
|
304
304
|
|
305
305
|
try:
|
306
|
+
provisioning_skipped = False
|
306
307
|
if Stage.PROVISION in stages:
|
307
308
|
assert handle is None or skip_unnecessary_provisioning, (
|
308
309
|
'Provisioning requested, but handle is already set. PROVISION '
|
309
310
|
'should be excluded from stages or '
|
310
311
|
'skip_unecessary_provisioning should be set. ')
|
311
|
-
handle = backend.provision(
|
312
|
+
(handle, provisioning_skipped) = backend.provision(
|
312
313
|
task,
|
313
314
|
task.best_resources,
|
314
315
|
dryrun=dryrun,
|
@@ -341,7 +342,11 @@ def _execute(
|
|
341
342
|
if no_setup:
|
342
343
|
logger.info('Setup commands skipped.')
|
343
344
|
elif Stage.SETUP in stages and not dryrun:
|
344
|
-
|
345
|
+
if skip_unnecessary_provisioning and provisioning_skipped:
|
346
|
+
logger.debug('Unnecessary provisioning was skipped, so '
|
347
|
+
'skipping setup as well.')
|
348
|
+
else:
|
349
|
+
backend.setup(handle, task, detach_setup=detach_setup)
|
345
350
|
|
346
351
|
if Stage.PRE_EXEC in stages and not dryrun:
|
347
352
|
if idle_minutes_to_autostop is not None:
|
@@ -523,6 +528,8 @@ def launch(
|
|
523
528
|
Stage.PROVISION,
|
524
529
|
Stage.SYNC_WORKDIR,
|
525
530
|
Stage.SYNC_FILE_MOUNTS,
|
531
|
+
# Setup will be skipped if provisioning was skipped.
|
532
|
+
Stage.SETUP,
|
526
533
|
Stage.PRE_EXEC,
|
527
534
|
Stage.EXEC,
|
528
535
|
Stage.DOWN,
|
sky/jobs/server/server.py
CHANGED
@@ -161,7 +161,11 @@ async def dashboard(request: fastapi.Request,
|
|
161
161
|
response = await client.request('GET',
|
162
162
|
dashboard_url,
|
163
163
|
timeout=5)
|
164
|
-
|
164
|
+
if response.is_success:
|
165
|
+
break # Connection successful, proceed with the request
|
166
|
+
# Raise an HTTPException here which will be caught by the
|
167
|
+
# following except block to retry with new connection
|
168
|
+
response.raise_for_status()
|
165
169
|
except Exception as e: # pylint: disable=broad-except
|
166
170
|
# We catch all exceptions to gracefully handle unknown
|
167
171
|
# errors and retry or raise an HTTPException to the client.
|
sky/provision/do/utils.py
CHANGED
@@ -15,6 +15,7 @@ from sky.adaptors import do
|
|
15
15
|
from sky.provision import common
|
16
16
|
from sky.provision import constants as provision_constants
|
17
17
|
from sky.provision.do import constants
|
18
|
+
from sky.utils import annotations
|
18
19
|
from sky.utils import common_utils
|
19
20
|
|
20
21
|
logger = sky_logging.init_logger(__name__)
|
@@ -31,7 +32,6 @@ MAX_BACKOFF_FACTOR = 10
|
|
31
32
|
MAX_ATTEMPTS = 6
|
32
33
|
SSH_KEY_NAME_ON_DO = f'sky-key-{common_utils.get_user_hash()}'
|
33
34
|
|
34
|
-
CREDENTIALS_PATH = '~/.config/doctl/config.yaml'
|
35
35
|
_client = None
|
36
36
|
_ssh_key_id = None
|
37
37
|
|
@@ -40,31 +40,34 @@ class DigitalOceanError(Exception):
|
|
40
40
|
pass
|
41
41
|
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
CREDENTIALS_PATH = None
|
43
|
+
@annotations.lru_cache(scope='request')
|
44
|
+
def get_credentials_path():
|
45
|
+
credentials_path = None
|
47
46
|
credentials_found = 0
|
48
47
|
for path in POSSIBLE_CREDENTIALS_PATHS:
|
49
48
|
if os.path.exists(path):
|
50
|
-
CREDENTIALS_PATH = path
|
51
|
-
credentials_found += 1
|
52
49
|
logger.debug(f'Digital Ocean credential path found at {path}')
|
53
|
-
|
54
|
-
|
55
|
-
if
|
56
|
-
|
57
|
-
|
58
|
-
f'the following paths {POSSIBLE_CREDENTIALS_PATHS}')
|
50
|
+
credentials_path = path
|
51
|
+
credentials_found += 1
|
52
|
+
if credentials_found > 1:
|
53
|
+
logger.debug('More than 1 credential file found')
|
54
|
+
return credentials_path
|
59
55
|
|
56
|
+
|
57
|
+
def _init_client():
|
58
|
+
global _client
|
59
|
+
assert _client is None
|
60
60
|
# attempt default context
|
61
|
-
|
61
|
+
if get_credentials_path() is None:
|
62
|
+
raise DigitalOceanError(
|
63
|
+
'No credentials found, please run `doctl auth init`')
|
64
|
+
credentials = common_utils.read_yaml(get_credentials_path())
|
62
65
|
default_token = credentials.get('access-token', None)
|
63
66
|
if default_token is not None:
|
64
67
|
try:
|
65
68
|
test_client = do.pydo.Client(token=default_token)
|
66
69
|
test_client.droplets.list()
|
67
|
-
logger.debug('
|
70
|
+
logger.debug('Trying `default` context')
|
68
71
|
_client = test_client
|
69
72
|
return _client
|
70
73
|
except do.exceptions().HttpResponseError:
|
@@ -76,7 +79,7 @@ def _init_client():
|
|
76
79
|
try:
|
77
80
|
test_client = do.pydo.Client(token=api_token)
|
78
81
|
test_client.droplets.list()
|
79
|
-
logger.debug(f'
|
82
|
+
logger.debug(f'Using "{context}" context')
|
80
83
|
_client = test_client
|
81
84
|
break
|
82
85
|
except do.exceptions().HttpResponseError:
|
sky/provision/gcp/config.py
CHANGED
@@ -571,35 +571,45 @@ def get_usable_vpc_and_subnet(
|
|
571
571
|
|
572
572
|
specific_vpc_to_use = config.provider_config.get('vpc_name', None)
|
573
573
|
if specific_vpc_to_use is not None:
|
574
|
+
if '/' in specific_vpc_to_use:
|
575
|
+
# VPC can also be specified in the format PROJECT_ID/VPC_NAME.
|
576
|
+
# This enables use of shared VPCs.
|
577
|
+
split_vpc_value = specific_vpc_to_use.split('/')
|
578
|
+
if len(split_vpc_value) != 2:
|
579
|
+
raise ValueError(f'Invalid VPC name: {specific_vpc_to_use}. '
|
580
|
+
'Please specify the VPC name in the format '
|
581
|
+
'PROJECT_ID/VPC_NAME.')
|
582
|
+
project_id = split_vpc_value[0]
|
583
|
+
specific_vpc_to_use = split_vpc_value[1]
|
584
|
+
|
574
585
|
vpcnets_all = _list_vpcnets(project_id,
|
575
586
|
compute,
|
576
587
|
filter=f'name={specific_vpc_to_use}')
|
577
|
-
|
578
|
-
assert (len(vpcnets_all) <=
|
579
|
-
1), (f'{len(vpcnets_all)} VPCs found with the same name '
|
580
|
-
f'{specific_vpc_to_use}')
|
581
|
-
if len(vpcnets_all) == 1:
|
582
|
-
# Skip checking any firewall rules if the user has specified a VPC.
|
583
|
-
logger.info(f'Using user-specified VPC {specific_vpc_to_use!r}.')
|
584
|
-
subnets = _list_subnets(project_id,
|
585
|
-
region,
|
586
|
-
compute,
|
587
|
-
network=specific_vpc_to_use)
|
588
|
-
if not subnets:
|
589
|
-
_skypilot_log_error_and_exit_for_failover(
|
590
|
-
'SUBNET_NOT_FOUND_FOR_VPC',
|
591
|
-
f'No subnet for region {region} found for specified VPC '
|
592
|
-
f'{specific_vpc_to_use!r}. '
|
593
|
-
f'Check the subnets of VPC {specific_vpc_to_use!r} at '
|
594
|
-
'https://console.cloud.google.com/networking/networks')
|
595
|
-
return specific_vpc_to_use, subnets[0]
|
596
|
-
else:
|
588
|
+
if not vpcnets_all:
|
597
589
|
# VPC with this name not found. Error out and let SkyPilot failover.
|
598
590
|
_skypilot_log_error_and_exit_for_failover(
|
599
591
|
'VPC_NOT_FOUND',
|
600
592
|
f'No VPC with name {specific_vpc_to_use!r} is found. '
|
601
593
|
'To fix: specify a correct VPC name.')
|
602
594
|
# Should not reach here.
|
595
|
+
assert False
|
596
|
+
|
597
|
+
# On GCP, VPC names are unique within a project.
|
598
|
+
assert len(vpcnets_all) == 1, (vpcnets_all, specific_vpc_to_use)
|
599
|
+
# Skip checking any firewall rules if the user has specified a VPC.
|
600
|
+
logger.info(f'Using user-specified VPC {specific_vpc_to_use!r}.')
|
601
|
+
subnets = _list_subnets(project_id,
|
602
|
+
region,
|
603
|
+
compute,
|
604
|
+
network=specific_vpc_to_use)
|
605
|
+
if not subnets:
|
606
|
+
_skypilot_log_error_and_exit_for_failover(
|
607
|
+
'SUBNET_NOT_FOUND_FOR_VPC',
|
608
|
+
f'No subnet for region {region} found for specified VPC '
|
609
|
+
f'{specific_vpc_to_use!r}. '
|
610
|
+
f'Check the subnets of VPC {specific_vpc_to_use!r} at '
|
611
|
+
'https://console.cloud.google.com/networking/networks')
|
612
|
+
return specific_vpc_to_use, subnets[0]
|
603
613
|
|
604
614
|
subnets_all = _list_subnets(project_id, region, compute)
|
605
615
|
|
sky/setup_files/dependencies.py
CHANGED
sky/skylet/log_lib.py
CHANGED
@@ -149,6 +149,7 @@ def run_with_log(
|
|
149
149
|
process_stream: bool = True,
|
150
150
|
line_processor: Optional[log_utils.LineProcessor] = None,
|
151
151
|
streaming_prefix: Optional[str] = None,
|
152
|
+
log_cmd: bool = False,
|
152
153
|
**kwargs,
|
153
154
|
) -> Union[int, Tuple[int, str, str]]:
|
154
155
|
"""Runs a command and logs its output to a file.
|
@@ -182,6 +183,9 @@ def run_with_log(
|
|
182
183
|
# the terminal output when typing in the terminal that starts the API
|
183
184
|
# server.
|
184
185
|
stdin = kwargs.pop('stdin', subprocess.DEVNULL)
|
186
|
+
if log_cmd:
|
187
|
+
with open(log_path, 'a', encoding='utf-8') as f:
|
188
|
+
print(f'Running command: {cmd}', file=f)
|
185
189
|
with subprocess.Popen(cmd,
|
186
190
|
stdout=stdout_arg,
|
187
191
|
stderr=stderr_arg,
|
sky/task.py
CHANGED
@@ -552,15 +552,35 @@ class Task:
|
|
552
552
|
estimated_size_gigabytes=estimated_size_gigabytes)
|
553
553
|
|
554
554
|
# Experimental configs.
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
555
|
+
experimental_configs = config.pop('experimental', None)
|
556
|
+
|
557
|
+
# Handle the top-level config field
|
558
|
+
config_override = config.pop('config', None)
|
559
|
+
|
560
|
+
# Handle backward compatibility with experimental.config_overrides
|
561
|
+
# TODO: Remove experimental.config_overrides in 0.11.0.
|
562
|
+
if experimental_configs is not None:
|
563
|
+
exp_config_override = experimental_configs.pop(
|
559
564
|
'config_overrides', None)
|
565
|
+
if exp_config_override is not None:
|
566
|
+
logger.warning(
|
567
|
+
f'{colorama.Fore.YELLOW}`experimental.config_overrides` '
|
568
|
+
'field is deprecated in the task YAML. Use the `config` '
|
569
|
+
f'field to set config overrides.{colorama.Style.RESET_ALL}')
|
570
|
+
if config_override is not None:
|
571
|
+
logger.warning(
|
572
|
+
f'{colorama.Fore.YELLOW}Both top-level `config` and '
|
573
|
+
f'`experimental.config_overrides` are specified. '
|
574
|
+
f'Using top-level `config`.{colorama.Style.RESET_ALL}')
|
575
|
+
else:
|
576
|
+
config_override = exp_config_override
|
560
577
|
logger.debug('Overriding skypilot config with task-level config: '
|
561
|
-
f'{
|
562
|
-
|
563
|
-
|
578
|
+
f'{config_override}')
|
579
|
+
assert not experimental_configs, ('Invalid task args: '
|
580
|
+
f'{experimental_configs.keys()}')
|
581
|
+
|
582
|
+
# Store the final config override for use in resource setup
|
583
|
+
cluster_config_override = config_override
|
564
584
|
|
565
585
|
# Parse resources field.
|
566
586
|
resources_config = config.pop('resources', {})
|
sky/utils/schemas.py
CHANGED
@@ -473,6 +473,8 @@ def _filter_schema(schema: dict, keys_to_keep: List[Tuple[str, ...]]) -> dict:
|
|
473
473
|
|
474
474
|
|
475
475
|
def _experimental_task_schema() -> dict:
|
476
|
+
# TODO: experimental.config_overrides has been deprecated in favor of the
|
477
|
+
# top-level `config` field. Remove in v0.11.0.
|
476
478
|
config_override_schema = _filter_schema(
|
477
479
|
get_config_schema(), constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK)
|
478
480
|
return {
|
@@ -555,6 +557,9 @@ def get_task_schema():
|
|
555
557
|
'file_mounts_mapping': {
|
556
558
|
'type': 'object',
|
557
559
|
},
|
560
|
+
'config': _filter_schema(
|
561
|
+
get_config_schema(),
|
562
|
+
constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK),
|
558
563
|
**_experimental_task_schema(),
|
559
564
|
}
|
560
565
|
}
|
@@ -604,13 +609,6 @@ def get_cluster_schema():
|
|
604
609
|
|
605
610
|
|
606
611
|
_NETWORK_CONFIG_SCHEMA = {
|
607
|
-
'vpc_name': {
|
608
|
-
'oneOf': [{
|
609
|
-
'type': 'string',
|
610
|
-
}, {
|
611
|
-
'type': 'null',
|
612
|
-
}],
|
613
|
-
},
|
614
612
|
'use_internal_ips': {
|
615
613
|
'type': 'boolean',
|
616
614
|
},
|
@@ -767,6 +765,13 @@ def get_config_schema():
|
|
767
765
|
},
|
768
766
|
'security_group_name':
|
769
767
|
(_PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY),
|
768
|
+
'vpc_name': {
|
769
|
+
'oneOf': [{
|
770
|
+
'type': 'string',
|
771
|
+
}, {
|
772
|
+
'type': 'null',
|
773
|
+
}],
|
774
|
+
},
|
770
775
|
**_LABELS_SCHEMA,
|
771
776
|
**_NETWORK_CONFIG_SCHEMA,
|
772
777
|
},
|
@@ -805,6 +810,19 @@ def get_config_schema():
|
|
805
810
|
'enable_gvnic': {
|
806
811
|
'type': 'boolean'
|
807
812
|
},
|
813
|
+
'vpc_name': {
|
814
|
+
'oneOf': [
|
815
|
+
{
|
816
|
+
'type': 'string',
|
817
|
+
# vpc-name or project-id/vpc-name
|
818
|
+
# VPC name and Project ID have -, a-z, and 0-9.
|
819
|
+
'pattern': '^(?:[-a-z0-9]+/)?[-a-z0-9]+$'
|
820
|
+
},
|
821
|
+
{
|
822
|
+
'type': 'null',
|
823
|
+
}
|
824
|
+
],
|
825
|
+
},
|
808
826
|
**_LABELS_SCHEMA,
|
809
827
|
**_NETWORK_CONFIG_SCHEMA,
|
810
828
|
},
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20250410
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -19,7 +19,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
19
|
Classifier: Topic :: System :: Distributed Computing
|
20
20
|
Description-Content-Type: text/markdown
|
21
21
|
License-File: LICENSE
|
22
|
-
Requires-Dist: wheel
|
22
|
+
Requires-Dist: wheel<0.46.0
|
23
23
|
Requires-Dist: cachetools
|
24
24
|
Requires-Dist: click>=7.0
|
25
25
|
Requires-Dist: colorama
|