skypilot-nightly 1.0.0.dev20250408__py3-none-any.whl → 1.0.0.dev20250410__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/azure.py +1 -1
  3. sky/adaptors/nebius.py +5 -27
  4. sky/backends/backend.py +9 -7
  5. sky/backends/cloud_vm_ray_backend.py +7 -7
  6. sky/backends/local_docker_backend.py +3 -3
  7. sky/cloud_stores.py +0 -4
  8. sky/clouds/do.py +4 -5
  9. sky/clouds/gcp.py +5 -3
  10. sky/clouds/nebius.py +22 -12
  11. sky/clouds/service_catalog/data_fetchers/fetch_ibm.py +1 -2
  12. sky/clouds/service_catalog/gcp_catalog.py +37 -10
  13. sky/core.py +6 -6
  14. sky/data/data_utils.py +5 -9
  15. sky/data/mounting_utils.py +1 -1
  16. sky/data/storage.py +25 -31
  17. sky/data/storage_utils.py +27 -18
  18. sky/execution.py +11 -4
  19. sky/jobs/server/server.py +5 -1
  20. sky/provision/do/utils.py +19 -16
  21. sky/provision/gcp/config.py +30 -20
  22. sky/setup_files/dependencies.py +1 -1
  23. sky/skylet/log_lib.py +4 -0
  24. sky/task.py +27 -7
  25. sky/utils/schemas.py +25 -7
  26. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/METADATA +2 -2
  27. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/RECORD +31 -31
  28. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/WHEEL +0 -0
  29. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/entry_points.txt +0 -0
  30. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/licenses/LICENSE +0 -0
  31. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250410.dist-info}/top_level.txt +0 -0
sky/data/storage.py CHANGED
@@ -1616,9 +1616,25 @@ class S3Store(AbstractStore):
1616
1616
  # we exclude .git directory from the sync
1617
1617
  excluded_list = storage_utils.get_excluded_files(src_dir_path)
1618
1618
  excluded_list.append('.git/*')
1619
+
1620
+ # Process exclusion patterns to make them work correctly with aws
1621
+ # s3 sync
1622
+ processed_excludes = []
1623
+ for excluded_path in excluded_list:
1624
+ # Check if the path is a directory exclusion pattern
1625
+ # For AWS S3 sync, directory patterns need to end with "/**" to
1626
+ # exclude all contents
1627
+ if (excluded_path.endswith('/') or os.path.isdir(
1628
+ os.path.join(src_dir_path, excluded_path.rstrip('/')))):
1629
+ # Remove any trailing slash and add '/*' to exclude all
1630
+ # contents
1631
+ processed_excludes.append(f'{excluded_path.rstrip("/")}/*')
1632
+ else:
1633
+ processed_excludes.append(excluded_path)
1634
+
1619
1635
  excludes = ' '.join([
1620
1636
  f'--exclude {shlex.quote(file_name)}'
1621
- for file_name in excluded_list
1637
+ for file_name in processed_excludes
1622
1638
  ])
1623
1639
  src_dir_path = shlex.quote(src_dir_path)
1624
1640
  sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
@@ -4676,7 +4692,6 @@ class NebiusStore(AbstractStore):
4676
4692
  _bucket_sub_path: Optional[str] = None):
4677
4693
  self.client: 'boto3.client.Client'
4678
4694
  self.bucket: 'StorageHandle'
4679
- self.region = region if region is not None else nebius.DEFAULT_REGION
4680
4695
  super().__init__(name, source, region, is_sky_managed,
4681
4696
  sync_on_reconstruction, _bucket_sub_path)
4682
4697
 
@@ -4749,7 +4764,7 @@ class NebiusStore(AbstractStore):
4749
4764
  StorageBucketGetError: If fetching existing bucket fails
4750
4765
  StorageInitError: If general initialization fails.
4751
4766
  """
4752
- self.client = data_utils.create_nebius_client(self.region)
4767
+ self.client = data_utils.create_nebius_client()
4753
4768
  self.bucket, is_new_bucket = self._get_bucket()
4754
4769
  if self.is_sky_managed is None:
4755
4770
  # If is_sky_managed is not specified, then this is a new storage
@@ -4846,12 +4861,10 @@ class NebiusStore(AbstractStore):
4846
4861
  f'--include {shlex.quote(file_name)}'
4847
4862
  for file_name in file_names
4848
4863
  ])
4849
- endpoint_url = nebius.create_endpoint(self.region)
4850
4864
  base_dir_path = shlex.quote(base_dir_path)
4851
4865
  sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
4852
4866
  f'{includes} {base_dir_path} '
4853
4867
  f's3://{self.name}{sub_path} '
4854
- f'--endpoint={endpoint_url} '
4855
4868
  f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4856
4869
  return sync_command
4857
4870
 
@@ -4863,12 +4876,10 @@ class NebiusStore(AbstractStore):
4863
4876
  f'--exclude {shlex.quote(file_name)}'
4864
4877
  for file_name in excluded_list
4865
4878
  ])
4866
- endpoint_url = nebius.create_endpoint(self.region)
4867
4879
  src_dir_path = shlex.quote(src_dir_path)
4868
4880
  sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
4869
4881
  f'{src_dir_path} '
4870
4882
  f's3://{self.name}{sub_path}/{dest_dir_name} '
4871
- f'--endpoint={endpoint_url} '
4872
4883
  f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4873
4884
  return sync_command
4874
4885
 
@@ -4927,7 +4938,6 @@ class NebiusStore(AbstractStore):
4927
4938
  """
4928
4939
  nebius_s = nebius.resource('s3')
4929
4940
  bucket = nebius_s.Bucket(self.name)
4930
- endpoint_url = nebius.create_endpoint(self.region)
4931
4941
  try:
4932
4942
  # Try Public bucket case.
4933
4943
  # This line does not error out if the bucket is an external public
@@ -4942,7 +4952,6 @@ class NebiusStore(AbstractStore):
4942
4952
  # user.
4943
4953
  if error_code == '403':
4944
4954
  command = (f'aws s3 ls s3://{self.name} '
4945
- f'--endpoint={endpoint_url} '
4946
4955
  f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4947
4956
  with ux_utils.print_exception_no_traceback():
4948
4957
  raise exceptions.StorageBucketGetError(
@@ -4954,7 +4963,7 @@ class NebiusStore(AbstractStore):
4954
4963
  raise exceptions.StorageBucketGetError(
4955
4964
  'Attempted to use a non-existent bucket as a source: '
4956
4965
  f'{self.source}. Consider using `aws s3 ls '
4957
- f's3://{self.name} --endpoint={endpoint_url}'
4966
+ f's3://{self.name} '
4958
4967
  f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
4959
4968
 
4960
4969
  # If bucket cannot be found in both private and public settings,
@@ -4962,7 +4971,7 @@ class NebiusStore(AbstractStore):
4962
4971
  # Store object is being reconstructed for deletion or re-mount with
4963
4972
  # sky start, and error is raised instead.
4964
4973
  if self.sync_on_reconstruction:
4965
- bucket = self._create_nebius_bucket(self.name, self.region)
4974
+ bucket = self._create_nebius_bucket(self.name)
4966
4975
  return bucket, True
4967
4976
  else:
4968
4977
  # Raised when Storage object is reconstructed for sky storage
@@ -4991,38 +5000,27 @@ class NebiusStore(AbstractStore):
4991
5000
  mount_path: str; Path to mount the bucket to.
4992
5001
  """
4993
5002
  install_cmd = mounting_utils.get_s3_mount_install_cmd()
4994
- endpoint_url = nebius.create_endpoint(self.region)
4995
5003
  nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
5004
+ endpoint_url = self.client.meta.endpoint_url
4996
5005
  mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
4997
- endpoint_url,
4998
5006
  self.bucket.name,
5007
+ endpoint_url,
4999
5008
  mount_path,
5000
5009
  self._bucket_sub_path)
5001
5010
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
5002
5011
  mount_cmd)
5003
5012
 
5004
- def _create_nebius_bucket(self,
5005
- bucket_name: str,
5006
- region='auto') -> StorageHandle:
5007
- """Creates S3 bucket with specific name in specific region
5013
+ def _create_nebius_bucket(self, bucket_name: str) -> StorageHandle:
5014
+ """Creates S3 bucket with specific name
5008
5015
 
5009
5016
  Args:
5010
5017
  bucket_name: str; Name of bucket
5011
- region: str; Region name, e.g. us-west-1, us-east-2
5012
5018
  Raises:
5013
5019
  StorageBucketCreateError: If bucket creation fails.
5014
5020
  """
5015
5021
  nebius_client = self.client
5016
5022
  try:
5017
- if region is None:
5018
- nebius_client.create_bucket(Bucket=bucket_name)
5019
- else:
5020
- location = {'LocationConstraint': region}
5021
- nebius_client.create_bucket(Bucket=bucket_name,
5022
- CreateBucketConfiguration=location)
5023
- logger.info(f' {colorama.Style.DIM}Created Nebius bucket '
5024
- f'{bucket_name!r} in {region}'
5025
- f'{colorama.Style.RESET_ALL}')
5023
+ nebius_client.create_bucket(Bucket=bucket_name)
5026
5024
  except aws.botocore_exceptions().ClientError as e:
5027
5025
  with ux_utils.print_exception_no_traceback():
5028
5026
  raise exceptions.StorageBucketCreateError(
@@ -5070,9 +5068,7 @@ class NebiusStore(AbstractStore):
5070
5068
  # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
5071
5069
  # The fastest way to delete is to run `aws s3 rb --force`,
5072
5070
  # which removes the bucket by force.
5073
- endpoint_url = nebius.create_endpoint(self.region)
5074
5071
  remove_command = (f'aws s3 rb s3://{bucket_name} --force '
5075
- f'--endpoint {endpoint_url} '
5076
5072
  f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5077
5073
 
5078
5074
  success = self._execute_nebius_remove_command(
@@ -5094,10 +5090,8 @@ class NebiusStore(AbstractStore):
5094
5090
  def _delete_nebius_bucket_sub_path(self, bucket_name: str,
5095
5091
  sub_path: str) -> bool:
5096
5092
  """Deletes the sub path from the bucket."""
5097
- endpoint_url = nebius.create_endpoint(self.region)
5098
5093
  remove_command = (
5099
5094
  f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
5100
- f'--endpoint {endpoint_url} '
5101
5095
  f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5102
5096
  return self._execute_nebius_remove_command(
5103
5097
  remove_command, bucket_name, f'Removing objects from '
sky/data/storage_utils.py CHANGED
@@ -4,7 +4,7 @@ import os
4
4
  import pathlib
5
5
  import shlex
6
6
  import subprocess
7
- from typing import Any, Dict, List, Optional, TextIO, Union
7
+ from typing import Any, Dict, List, Optional, Set, TextIO, Union
8
8
  import warnings
9
9
  import zipfile
10
10
 
@@ -71,7 +71,7 @@ def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
71
71
  """List files and patterns ignored by the .skyignore file
72
72
  in the given source directory.
73
73
  """
74
- excluded_list: List[str] = []
74
+ excluded_list: Set[str] = set()
75
75
  expand_src_dir_path = os.path.expanduser(src_dir_path)
76
76
  skyignore_path = os.path.join(expand_src_dir_path,
77
77
  constants.SKY_IGNORE_FILE)
@@ -95,12 +95,12 @@ def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
95
95
  for i in range(len(matching_files)):
96
96
  matching_files[i] = os.path.relpath(
97
97
  matching_files[i], expand_src_dir_path)
98
- excluded_list.extend(matching_files)
98
+ excluded_list.update(matching_files)
99
99
  except IOError as e:
100
100
  logger.warning(f'Error reading {skyignore_path}: '
101
101
  f'{common_utils.format_exception(e, use_bracket=True)}')
102
102
 
103
- return excluded_list
103
+ return list(excluded_list)
104
104
 
105
105
 
106
106
  def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
@@ -111,8 +111,8 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
111
111
  This will also be run for all submodules under the src_dir_path.
112
112
 
113
113
  Returns:
114
- List[str] containing files and patterns to be ignored. Some of the
115
- patterns include, **/mydir/*.txt, !myfile.log, or file-*/.
114
+ List[str] containing files and folders to be ignored. There won't be any
115
+ patterns.
116
116
  """
117
117
  expand_src_dir_path = os.path.expanduser(src_dir_path)
118
118
 
@@ -210,10 +210,6 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
210
210
  return []
211
211
 
212
212
  to_be_excluded = os.path.join(repo, item)
213
- if item.endswith('/'):
214
- # aws s3 sync and gsutil rsync require * to exclude
215
- # files/dirs under the specified directory.
216
- to_be_excluded += '*'
217
213
 
218
214
  excluded_list.append(to_be_excluded)
219
215
 
@@ -223,11 +219,21 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
223
219
  def get_excluded_files(src_dir_path: str) -> List[str]:
224
220
  # TODO: this could return a huge list of files,
225
221
  # should think of ways to optimize.
226
- """List files and directories to be excluded."""
222
+ """List files and directories to be excluded.
223
+
224
+ Args:
225
+ src_dir_path (str): The path to the source directory.
226
+
227
+ Returns:
228
+ A list of relative paths to files and directories to be excluded from
229
+ the source directory.
230
+ """
227
231
  expand_src_dir_path = os.path.expanduser(src_dir_path)
228
232
  skyignore_path = os.path.join(expand_src_dir_path,
229
233
  constants.SKY_IGNORE_FILE)
230
234
  # Fail fast if the source is a file.
235
+ if not os.path.exists(expand_src_dir_path):
236
+ raise ValueError(f'{src_dir_path} does not exist.')
231
237
  if os.path.isfile(expand_src_dir_path):
232
238
  raise ValueError(f'{src_dir_path} is a file, not a directory.')
233
239
  if os.path.exists(skyignore_path):
@@ -235,12 +241,14 @@ def get_excluded_files(src_dir_path: str) -> List[str]:
235
241
  f'Excluded files to sync to cluster based on '
236
242
  f'{constants.SKY_IGNORE_FILE}.'
237
243
  f'{colorama.Style.RESET_ALL}')
238
- return get_excluded_files_from_skyignore(src_dir_path)
239
- logger.debug(f' {colorama.Style.DIM}'
240
- f'Excluded files to sync to cluster based on '
241
- f'{constants.GIT_IGNORE_FILE}.'
242
- f'{colorama.Style.RESET_ALL}')
243
- return get_excluded_files_from_gitignore(src_dir_path)
244
+ excluded_paths = get_excluded_files_from_skyignore(src_dir_path)
245
+ else:
246
+ logger.debug(f' {colorama.Style.DIM}'
247
+ f'Excluded files to sync to cluster based on '
248
+ f'{constants.GIT_IGNORE_FILE}.'
249
+ f'{colorama.Style.RESET_ALL}')
250
+ excluded_paths = get_excluded_files_from_gitignore(src_dir_path)
251
+ return excluded_paths
244
252
 
245
253
 
246
254
  def zip_files_and_folders(items: List[str],
@@ -277,7 +285,8 @@ def zip_files_and_folders(items: List[str],
277
285
  zipf.write(item)
278
286
  elif os.path.isdir(item):
279
287
  excluded_files = set([
280
- os.path.join(item, f) for f in get_excluded_files(item)
288
+ os.path.join(item, f.rstrip('/'))
289
+ for f in get_excluded_files(item)
281
290
  ])
282
291
  for root, dirs, files in os.walk(item, followlinks=False):
283
292
  # Modify dirs in-place to control os.walk()'s traversal
sky/execution.py CHANGED
@@ -159,9 +159,9 @@ def _execute(
159
159
  no_setup: bool; whether to skip setup commands or not when (re-)launching.
160
160
  clone_disk_from: Optional[str]; if set, clone the disk from the specified
161
161
  cluster.
162
- skip_unecessary_provisioning: bool; if True, compare the calculated
162
+ skip_unnecessary_provisioning: bool; if True, compare the calculated
163
163
  cluster config to the current cluster's config. If they match, shortcut
164
- provisioning even if we have Stage.PROVISION.
164
+ provisioning and setup, even if we have Stage.PROVISION and Stage.SETUP.
165
165
 
166
166
  Returns:
167
167
  job_id: Optional[int]; the job ID of the submitted job. None if the
@@ -303,12 +303,13 @@ def _execute(
303
303
  task.sync_storage_mounts()
304
304
 
305
305
  try:
306
+ provisioning_skipped = False
306
307
  if Stage.PROVISION in stages:
307
308
  assert handle is None or skip_unnecessary_provisioning, (
308
309
  'Provisioning requested, but handle is already set. PROVISION '
309
310
  'should be excluded from stages or '
310
311
  'skip_unecessary_provisioning should be set. ')
311
- handle = backend.provision(
312
+ (handle, provisioning_skipped) = backend.provision(
312
313
  task,
313
314
  task.best_resources,
314
315
  dryrun=dryrun,
@@ -341,7 +342,11 @@ def _execute(
341
342
  if no_setup:
342
343
  logger.info('Setup commands skipped.')
343
344
  elif Stage.SETUP in stages and not dryrun:
344
- backend.setup(handle, task, detach_setup=detach_setup)
345
+ if skip_unnecessary_provisioning and provisioning_skipped:
346
+ logger.debug('Unnecessary provisioning was skipped, so '
347
+ 'skipping setup as well.')
348
+ else:
349
+ backend.setup(handle, task, detach_setup=detach_setup)
345
350
 
346
351
  if Stage.PRE_EXEC in stages and not dryrun:
347
352
  if idle_minutes_to_autostop is not None:
@@ -523,6 +528,8 @@ def launch(
523
528
  Stage.PROVISION,
524
529
  Stage.SYNC_WORKDIR,
525
530
  Stage.SYNC_FILE_MOUNTS,
531
+ # Setup will be skipped if provisioning was skipped.
532
+ Stage.SETUP,
526
533
  Stage.PRE_EXEC,
527
534
  Stage.EXEC,
528
535
  Stage.DOWN,
sky/jobs/server/server.py CHANGED
@@ -161,7 +161,11 @@ async def dashboard(request: fastapi.Request,
161
161
  response = await client.request('GET',
162
162
  dashboard_url,
163
163
  timeout=5)
164
- break # Connection successful, proceed with the request
164
+ if response.is_success:
165
+ break # Connection successful, proceed with the request
166
+ # Raise an HTTPException here which will be caught by the
167
+ # following except block to retry with new connection
168
+ response.raise_for_status()
165
169
  except Exception as e: # pylint: disable=broad-except
166
170
  # We catch all exceptions to gracefully handle unknown
167
171
  # errors and retry or raise an HTTPException to the client.
sky/provision/do/utils.py CHANGED
@@ -15,6 +15,7 @@ from sky.adaptors import do
15
15
  from sky.provision import common
16
16
  from sky.provision import constants as provision_constants
17
17
  from sky.provision.do import constants
18
+ from sky.utils import annotations
18
19
  from sky.utils import common_utils
19
20
 
20
21
  logger = sky_logging.init_logger(__name__)
@@ -31,7 +32,6 @@ MAX_BACKOFF_FACTOR = 10
31
32
  MAX_ATTEMPTS = 6
32
33
  SSH_KEY_NAME_ON_DO = f'sky-key-{common_utils.get_user_hash()}'
33
34
 
34
- CREDENTIALS_PATH = '~/.config/doctl/config.yaml'
35
35
  _client = None
36
36
  _ssh_key_id = None
37
37
 
@@ -40,31 +40,34 @@ class DigitalOceanError(Exception):
40
40
  pass
41
41
 
42
42
 
43
- def _init_client():
44
- global _client, CREDENTIALS_PATH
45
- assert _client is None
46
- CREDENTIALS_PATH = None
43
+ @annotations.lru_cache(scope='request')
44
+ def get_credentials_path():
45
+ credentials_path = None
47
46
  credentials_found = 0
48
47
  for path in POSSIBLE_CREDENTIALS_PATHS:
49
48
  if os.path.exists(path):
50
- CREDENTIALS_PATH = path
51
- credentials_found += 1
52
49
  logger.debug(f'Digital Ocean credential path found at {path}')
53
- if not credentials_found > 1:
54
- logger.debug('more than 1 credential file found')
55
- if CREDENTIALS_PATH is None:
56
- raise DigitalOceanError(
57
- 'no credentials file found from '
58
- f'the following paths {POSSIBLE_CREDENTIALS_PATHS}')
50
+ credentials_path = path
51
+ credentials_found += 1
52
+ if credentials_found > 1:
53
+ logger.debug('More than 1 credential file found')
54
+ return credentials_path
59
55
 
56
+
57
+ def _init_client():
58
+ global _client
59
+ assert _client is None
60
60
  # attempt default context
61
- credentials = common_utils.read_yaml(CREDENTIALS_PATH)
61
+ if get_credentials_path() is None:
62
+ raise DigitalOceanError(
63
+ 'No credentials found, please run `doctl auth init`')
64
+ credentials = common_utils.read_yaml(get_credentials_path())
62
65
  default_token = credentials.get('access-token', None)
63
66
  if default_token is not None:
64
67
  try:
65
68
  test_client = do.pydo.Client(token=default_token)
66
69
  test_client.droplets.list()
67
- logger.debug('trying `default` context')
70
+ logger.debug('Trying `default` context')
68
71
  _client = test_client
69
72
  return _client
70
73
  except do.exceptions().HttpResponseError:
@@ -76,7 +79,7 @@ def _init_client():
76
79
  try:
77
80
  test_client = do.pydo.Client(token=api_token)
78
81
  test_client.droplets.list()
79
- logger.debug(f'using {context} context')
82
+ logger.debug(f'Using "{context}" context')
80
83
  _client = test_client
81
84
  break
82
85
  except do.exceptions().HttpResponseError:
@@ -571,35 +571,45 @@ def get_usable_vpc_and_subnet(
571
571
 
572
572
  specific_vpc_to_use = config.provider_config.get('vpc_name', None)
573
573
  if specific_vpc_to_use is not None:
574
+ if '/' in specific_vpc_to_use:
575
+ # VPC can also be specified in the format PROJECT_ID/VPC_NAME.
576
+ # This enables use of shared VPCs.
577
+ split_vpc_value = specific_vpc_to_use.split('/')
578
+ if len(split_vpc_value) != 2:
579
+ raise ValueError(f'Invalid VPC name: {specific_vpc_to_use}. '
580
+ 'Please specify the VPC name in the format '
581
+ 'PROJECT_ID/VPC_NAME.')
582
+ project_id = split_vpc_value[0]
583
+ specific_vpc_to_use = split_vpc_value[1]
584
+
574
585
  vpcnets_all = _list_vpcnets(project_id,
575
586
  compute,
576
587
  filter=f'name={specific_vpc_to_use}')
577
- # On GCP, VPC names are unique, so it'd be 0 or 1 VPC found.
578
- assert (len(vpcnets_all) <=
579
- 1), (f'{len(vpcnets_all)} VPCs found with the same name '
580
- f'{specific_vpc_to_use}')
581
- if len(vpcnets_all) == 1:
582
- # Skip checking any firewall rules if the user has specified a VPC.
583
- logger.info(f'Using user-specified VPC {specific_vpc_to_use!r}.')
584
- subnets = _list_subnets(project_id,
585
- region,
586
- compute,
587
- network=specific_vpc_to_use)
588
- if not subnets:
589
- _skypilot_log_error_and_exit_for_failover(
590
- 'SUBNET_NOT_FOUND_FOR_VPC',
591
- f'No subnet for region {region} found for specified VPC '
592
- f'{specific_vpc_to_use!r}. '
593
- f'Check the subnets of VPC {specific_vpc_to_use!r} at '
594
- 'https://console.cloud.google.com/networking/networks')
595
- return specific_vpc_to_use, subnets[0]
596
- else:
588
+ if not vpcnets_all:
597
589
  # VPC with this name not found. Error out and let SkyPilot failover.
598
590
  _skypilot_log_error_and_exit_for_failover(
599
591
  'VPC_NOT_FOUND',
600
592
  f'No VPC with name {specific_vpc_to_use!r} is found. '
601
593
  'To fix: specify a correct VPC name.')
602
594
  # Should not reach here.
595
+ assert False
596
+
597
+ # On GCP, VPC names are unique within a project.
598
+ assert len(vpcnets_all) == 1, (vpcnets_all, specific_vpc_to_use)
599
+ # Skip checking any firewall rules if the user has specified a VPC.
600
+ logger.info(f'Using user-specified VPC {specific_vpc_to_use!r}.')
601
+ subnets = _list_subnets(project_id,
602
+ region,
603
+ compute,
604
+ network=specific_vpc_to_use)
605
+ if not subnets:
606
+ _skypilot_log_error_and_exit_for_failover(
607
+ 'SUBNET_NOT_FOUND_FOR_VPC',
608
+ f'No subnet for region {region} found for specified VPC '
609
+ f'{specific_vpc_to_use!r}. '
610
+ f'Check the subnets of VPC {specific_vpc_to_use!r} at '
611
+ 'https://console.cloud.google.com/networking/networks')
612
+ return specific_vpc_to_use, subnets[0]
603
613
 
604
614
  subnets_all = _list_subnets(project_id, region, compute)
605
615
 
@@ -9,7 +9,7 @@ import sys
9
9
  from typing import Dict, List
10
10
 
11
11
  install_requires = [
12
- 'wheel',
12
+ 'wheel<0.46.0', # https://github.com/skypilot-org/skypilot/issues/5153
13
13
  'cachetools',
14
14
  # NOTE: ray requires click>=7.0.
15
15
  'click >= 7.0',
sky/skylet/log_lib.py CHANGED
@@ -149,6 +149,7 @@ def run_with_log(
149
149
  process_stream: bool = True,
150
150
  line_processor: Optional[log_utils.LineProcessor] = None,
151
151
  streaming_prefix: Optional[str] = None,
152
+ log_cmd: bool = False,
152
153
  **kwargs,
153
154
  ) -> Union[int, Tuple[int, str, str]]:
154
155
  """Runs a command and logs its output to a file.
@@ -182,6 +183,9 @@ def run_with_log(
182
183
  # the terminal output when typing in the terminal that starts the API
183
184
  # server.
184
185
  stdin = kwargs.pop('stdin', subprocess.DEVNULL)
186
+ if log_cmd:
187
+ with open(log_path, 'a', encoding='utf-8') as f:
188
+ print(f'Running command: {cmd}', file=f)
185
189
  with subprocess.Popen(cmd,
186
190
  stdout=stdout_arg,
187
191
  stderr=stderr_arg,
sky/task.py CHANGED
@@ -552,15 +552,35 @@ class Task:
552
552
  estimated_size_gigabytes=estimated_size_gigabytes)
553
553
 
554
554
  # Experimental configs.
555
- experimnetal_configs = config.pop('experimental', None)
556
- cluster_config_override = None
557
- if experimnetal_configs is not None:
558
- cluster_config_override = experimnetal_configs.pop(
555
+ experimental_configs = config.pop('experimental', None)
556
+
557
+ # Handle the top-level config field
558
+ config_override = config.pop('config', None)
559
+
560
+ # Handle backward compatibility with experimental.config_overrides
561
+ # TODO: Remove experimental.config_overrides in 0.11.0.
562
+ if experimental_configs is not None:
563
+ exp_config_override = experimental_configs.pop(
559
564
  'config_overrides', None)
565
+ if exp_config_override is not None:
566
+ logger.warning(
567
+ f'{colorama.Fore.YELLOW}`experimental.config_overrides` '
568
+ 'field is deprecated in the task YAML. Use the `config` '
569
+ f'field to set config overrides.{colorama.Style.RESET_ALL}')
570
+ if config_override is not None:
571
+ logger.warning(
572
+ f'{colorama.Fore.YELLOW}Both top-level `config` and '
573
+ f'`experimental.config_overrides` are specified. '
574
+ f'Using top-level `config`.{colorama.Style.RESET_ALL}')
575
+ else:
576
+ config_override = exp_config_override
560
577
  logger.debug('Overriding skypilot config with task-level config: '
561
- f'{cluster_config_override}')
562
- assert not experimnetal_configs, ('Invalid task args: '
563
- f'{experimnetal_configs.keys()}')
578
+ f'{config_override}')
579
+ assert not experimental_configs, ('Invalid task args: '
580
+ f'{experimental_configs.keys()}')
581
+
582
+ # Store the final config override for use in resource setup
583
+ cluster_config_override = config_override
564
584
 
565
585
  # Parse resources field.
566
586
  resources_config = config.pop('resources', {})
sky/utils/schemas.py CHANGED
@@ -473,6 +473,8 @@ def _filter_schema(schema: dict, keys_to_keep: List[Tuple[str, ...]]) -> dict:
473
473
 
474
474
 
475
475
  def _experimental_task_schema() -> dict:
476
+ # TODO: experimental.config_overrides has been deprecated in favor of the
477
+ # top-level `config` field. Remove in v0.11.0.
476
478
  config_override_schema = _filter_schema(
477
479
  get_config_schema(), constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK)
478
480
  return {
@@ -555,6 +557,9 @@ def get_task_schema():
555
557
  'file_mounts_mapping': {
556
558
  'type': 'object',
557
559
  },
560
+ 'config': _filter_schema(
561
+ get_config_schema(),
562
+ constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK),
558
563
  **_experimental_task_schema(),
559
564
  }
560
565
  }
@@ -604,13 +609,6 @@ def get_cluster_schema():
604
609
 
605
610
 
606
611
  _NETWORK_CONFIG_SCHEMA = {
607
- 'vpc_name': {
608
- 'oneOf': [{
609
- 'type': 'string',
610
- }, {
611
- 'type': 'null',
612
- }],
613
- },
614
612
  'use_internal_ips': {
615
613
  'type': 'boolean',
616
614
  },
@@ -767,6 +765,13 @@ def get_config_schema():
767
765
  },
768
766
  'security_group_name':
769
767
  (_PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY),
768
+ 'vpc_name': {
769
+ 'oneOf': [{
770
+ 'type': 'string',
771
+ }, {
772
+ 'type': 'null',
773
+ }],
774
+ },
770
775
  **_LABELS_SCHEMA,
771
776
  **_NETWORK_CONFIG_SCHEMA,
772
777
  },
@@ -805,6 +810,19 @@ def get_config_schema():
805
810
  'enable_gvnic': {
806
811
  'type': 'boolean'
807
812
  },
813
+ 'vpc_name': {
814
+ 'oneOf': [
815
+ {
816
+ 'type': 'string',
817
+ # vpc-name or project-id/vpc-name
818
+ # VPC name and Project ID have -, a-z, and 0-9.
819
+ 'pattern': '^(?:[-a-z0-9]+/)?[-a-z0-9]+$'
820
+ },
821
+ {
822
+ 'type': 'null',
823
+ }
824
+ ],
825
+ },
808
826
  **_LABELS_SCHEMA,
809
827
  **_NETWORK_CONFIG_SCHEMA,
810
828
  },
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250408
3
+ Version: 1.0.0.dev20250410
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -19,7 +19,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Classifier: Topic :: System :: Distributed Computing
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
- Requires-Dist: wheel
22
+ Requires-Dist: wheel<0.46.0
23
23
  Requires-Dist: cachetools
24
24
  Requires-Dist: click>=7.0
25
25
  Requires-Dist: colorama