skypilot-nightly 1.0.0.dev20240930__py3-none-any.whl → 1.0.0.dev20241001__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '8dd003176336dd00f90f0f599eb6622edd5ac1f6'
8
+ _SKYPILOT_COMMIT_SHA = 'b1f22c4d5fe0a3cc25d1df0a8a05d4230a28b702'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20240930'
38
+ __version__ = '1.0.0.dev20241001'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/clouds/aws.py CHANGED
@@ -225,6 +225,9 @@ class AWS(clouds.Cloud):
225
225
  if acc_name == 'K80':
226
226
  image_id = service_catalog.get_image_id_from_tag(
227
227
  'skypilot:k80-ubuntu-2004', region_name, clouds='aws')
228
+ if acc_name in ['Trainium', 'Inferentia']:
229
+ image_id = service_catalog.get_image_id_from_tag(
230
+ 'skypilot:neuron-ubuntu-2204', region_name, clouds='aws')
228
231
  if image_id is not None:
229
232
  return image_id
230
233
  # Raise ResourcesUnavailableError to make sure the failover in
@@ -379,26 +379,33 @@ def get_all_regions_instance_types_df(regions: Set[str]) -> 'pd.DataFrame':
379
379
  #
380
380
  # Deep Learning AMI GPU PyTorch 1.10.0 (Ubuntu 18.04) 20211208
381
381
  # Nvidia driver: 470.57.02, CUDA Version: 11.4
382
- _GPU_UBUNTU_DATE_PYTORCH = [
383
- ('gpu', '20.04', '20231103', '2.1.0'),
384
- ('gpu', '18.04', '20221114', '1.10.0'),
385
- ('k80', '20.04', '20211208', '1.10.0'),
386
- ('k80', '18.04', '20211208', '1.10.0'),
382
+ #
383
+ # Neuron (Inferentia / Trainium):
384
+ # https://aws.amazon.com/releasenotes/aws-deep-learning-ami-base-neuron-ubuntu-20-04/ # pylint: disable=line-too-long
385
+ # Deep Learning Base Neuron AMI (Ubuntu 20.04) 20240923
386
+ # TODO(tian): find out the driver version.
387
+ # Neuron driver:
388
+ _GPU_DESC_UBUNTU_DATE = [
389
+ ('gpu', 'AMI GPU PyTorch 2.1.0', '20.04', '20231103'),
390
+ ('gpu', 'AMI GPU PyTorch 1.10.0', '18.04', '20221114'),
391
+ ('k80', 'AMI GPU PyTorch 1.10.0', '20.04', '20211208'),
392
+ ('k80', 'AMI GPU PyTorch 1.10.0', '18.04', '20211208'),
393
+ ('neuron', 'Base Neuron AMI', '22.04', '20240923'),
387
394
  ]
388
395
 
389
396
 
390
- def _fetch_image_id(region: str, ubuntu_version: str, creation_date: str,
391
- pytorch_version: str) -> Optional[str]:
397
+ def _fetch_image_id(region: str, description: str, ubuntu_version: str,
398
+ creation_date: str) -> Optional[str]:
392
399
  try:
393
400
  image = subprocess.check_output(f"""\
394
401
  aws ec2 describe-images --region {region} --owners amazon \\
395
- --filters 'Name=name,Values="Deep Learning AMI GPU PyTorch {pytorch_version} (Ubuntu {ubuntu_version}) {creation_date}"' \\
402
+ --filters 'Name=name,Values="Deep Learning {description} (Ubuntu {ubuntu_version}) {creation_date}"' \\
396
403
  'Name=state,Values=available' --query 'Images[:1].ImageId' --output text
397
404
  """,
398
405
  shell=True)
399
406
  except subprocess.CalledProcessError as e:
400
- print(f'Failed {region}, {ubuntu_version}, {creation_date}. '
401
- 'Trying next date.')
407
+ print(f'Failed {region}, {description}, {ubuntu_version}, '
408
+ f'{creation_date}. Trying next date.')
402
409
  print(f'{type(e)}: {e}')
403
410
  image_id = None
404
411
  else:
@@ -407,21 +414,21 @@ def _fetch_image_id(region: str, ubuntu_version: str, creation_date: str,
407
414
  return image_id
408
415
 
409
416
 
410
- def _get_image_row(
411
- region: str, gpu: str, ubuntu_version: str, date: str,
412
- pytorch_version) -> Tuple[str, str, str, str, Optional[str], str]:
413
- print(f'Getting image for {region}, {ubuntu_version}, {gpu}')
414
- image_id = _fetch_image_id(region, ubuntu_version, date, pytorch_version)
417
+ def _get_image_row(region: str, gpu: str, description: str, ubuntu_version: str,
418
+ date: str) -> Tuple[str, str, str, str, Optional[str], str]:
419
+ print(f'Getting image for {region}, {description}, {ubuntu_version}, {gpu}')
420
+ image_id = _fetch_image_id(region, description, ubuntu_version, date)
415
421
  if image_id is None:
416
422
  # not found
417
- print(f'Failed to find image for {region}, {ubuntu_version}, {gpu}')
423
+ print(f'Failed to find image for {region}, {description}, '
424
+ f'{ubuntu_version}, {gpu}')
418
425
  tag = f'skypilot:{gpu}-ubuntu-{ubuntu_version.replace(".", "")}'
419
426
  return tag, region, 'ubuntu', ubuntu_version, image_id, date
420
427
 
421
428
 
422
429
  def get_all_regions_images_df(regions: Set[str]) -> 'pd.DataFrame':
423
430
  image_metas = [
424
- (r, *i) for r, i in itertools.product(regions, _GPU_UBUNTU_DATE_PYTORCH)
431
+ (r, *i) for r, i in itertools.product(regions, _GPU_DESC_UBUNTU_DATE)
425
432
  ]
426
433
  with mp_pool.Pool() as pool:
427
434
  results = pool.starmap(_get_image_row, image_metas)
sky/resources.py CHANGED
@@ -966,20 +966,22 @@ class Resources:
966
966
  """
967
967
  if not self._labels:
968
968
  return
969
-
970
- if self.cloud is None:
971
- # Because each cloud has its own label format, we cannot validate
972
- # the labels without knowing the cloud.
973
- with ux_utils.print_exception_no_traceback():
974
- raise ValueError(
975
- 'Cloud must be specified when labels are provided.')
976
-
977
- # Check if the label key value pairs are valid.
969
+ if self.cloud is not None:
970
+ validated_clouds = [self.cloud]
971
+ else:
972
+ # If no specific cloud is set, validate label against ALL clouds.
973
+ # The label will be dropped if invalid for any one of the cloud
974
+ validated_clouds = sky_check.get_cached_enabled_clouds_or_refresh()
978
975
  invalid_table = log_utils.create_table(['Label', 'Reason'])
979
976
  for key, value in self._labels.items():
980
- valid, err_msg = self.cloud.is_label_valid(key, value)
981
- if not valid:
982
- invalid_table.add_row([f'{key}: {value}', err_msg])
977
+ for cloud in validated_clouds:
978
+ valid, err_msg = cloud.is_label_valid(key, value)
979
+ if not valid:
980
+ invalid_table.add_row([
981
+ f'{key}: {value}',
982
+ f'Label rejected due to {cloud}: {err_msg}'
983
+ ])
984
+ break
983
985
  if len(invalid_table.rows) > 0:
984
986
  with ux_utils.print_exception_no_traceback():
985
987
  raise ValueError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20240930
3
+ Version: 1.0.0.dev20241001
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,4 +1,4 @@
1
- sky/__init__.py,sha256=cH9RVL_szwM_I98WA4U94dbFCJVrxUPxcinlLdUU8Ps,5854
1
+ sky/__init__.py,sha256=KBftRzx6giOgDIRTtKtHuEcmtof_64nk7UMMQzMsLRQ,5854
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=TfKkVnmRIetATSEVQFp-rOOIRGqVig2i8faSQQt_ixA,20974
4
4
  sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
@@ -10,7 +10,7 @@ sky/exceptions.py,sha256=s7j0iCa1Ec0rU1ABb9EAhqn2qFm22bmKQV_ckgRlMGk,8720
10
10
  sky/execution.py,sha256=97yhNh5BKBh2ZJW8GefGID_4KCYU-arnvemSJB9rf6U,25552
11
11
  sky/global_user_state.py,sha256=PywEmUutF97XBgRMClR6IS5_KM8JJC0oA1LsPUZebp0,28681
12
12
  sky/optimizer.py,sha256=YGBhJPlcvylYON7MLrYEMtBOqJLt4LdlguQclVvvl4E,58677
13
- sky/resources.py,sha256=_959wcQnoiAYesslN9BPXWABFaQfc_TFXPO_o7SPlxI,67325
13
+ sky/resources.py,sha256=b9yaZvZkL-QZdElQLHsEZ2jhKgId2ixG8M2Z8DLBBKU,67450
14
14
  sky/sky_logging.py,sha256=I59__M9taBjDim15ie0m25Vtn6itLtR9Ao8W9FS36Xs,4253
15
15
  sky/skypilot_config.py,sha256=E3g65cX3P3dT9b5N0GgFBG6yB0FXwIGpisKoozmJmWU,9094
16
16
  sky/status_lib.py,sha256=J7Jb4_Dz0v2T64ttOdyUgpokvl4S0sBJrMfH7Fvo51A,1457
@@ -40,7 +40,7 @@ sky/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  sky/benchmark/benchmark_state.py,sha256=X8CXmuU9KgsDRhKedhFgjeRMUFWtQsjFs1qECvPG2yg,8723
41
41
  sky/benchmark/benchmark_utils.py,sha256=oJOzJ4fs2sruxYh4Tl1NZ5fi2-3oWfXtoeCIAq2hgjw,26136
42
42
  sky/clouds/__init__.py,sha256=WuNIJEnZmBO72tU5awgaaL3rdvFRSkgaYNNeuY68dXo,1356
43
- sky/clouds/aws.py,sha256=6nNwj3MHFzYTonLwx-QmXNvPlNZttS3dr8ULAoN79D0,48615
43
+ sky/clouds/aws.py,sha256=0cpFokzfzEUNnRKMaY198GjrUTXIO0adu6uVArRrlBA,48814
44
44
  sky/clouds/azure.py,sha256=Yp_a1Lzvq4s47eRMeyVheDv9pC0hSPogCiTMYf-a5ZE,28687
45
45
  sky/clouds/cloud.py,sha256=PPk-Cbf1YbJT8bswcQLtPBtko02OWrRGJKkLzDpytTI,34858
46
46
  sky/clouds/cloud_registry.py,sha256=4yQMv-iBSgyN5aNL4Qxbn0JVE-dkVoEUIgj7S1z9S_Q,955
@@ -74,7 +74,7 @@ sky/clouds/service_catalog/scp_catalog.py,sha256=4XnaZE5Q4XrrNnDnVhsHkH6jxmWXBeQ
74
74
  sky/clouds/service_catalog/vsphere_catalog.py,sha256=yJLWu9SQep-PRn1YdeQ7ZoNqQHTAxJtxf7y6FBrfSW0,4391
75
75
  sky/clouds/service_catalog/data_fetchers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  sky/clouds/service_catalog/data_fetchers/analyze.py,sha256=VdksJQs3asFE8H5T3ZV1FJas2xD9WEX6c-V5p7y-wp4,2084
77
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py,sha256=Fcm9_IkmTBXM9gp5VohufOvVVB-ZeRf_sSCHRyDWrdY,22424
77
+ sky/clouds/service_catalog/data_fetchers/fetch_aws.py,sha256=6gpRtQaQtvT1cMpiVBacNXXQAjBC5MWpAUI-1ELYg0U,22850
78
78
  sky/clouds/service_catalog/data_fetchers/fetch_azure.py,sha256=jsSVqbSbBIw_IYmO-y2u4co20AJ-JF713KFjUKdO_VA,12272
79
79
  sky/clouds/service_catalog/data_fetchers/fetch_cudo.py,sha256=52P48lvWN0s1ArjeLPeLemPRpxjSRcHincRle0nqdm4,3440
80
80
  sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py,sha256=35nO_VaDOgp5W13kt_lIANSk_CNf7gBiZGJ5fGyZu6o,6808
@@ -273,9 +273,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
273
273
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
274
274
  sky/utils/kubernetes/rsync_helper.sh,sha256=Ma-N9a271fTfdgP5-8XIQL7KPf8IPUo-uY004PCdUFo,747
275
275
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
276
- skypilot_nightly-1.0.0.dev20240930.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
277
- skypilot_nightly-1.0.0.dev20240930.dist-info/METADATA,sha256=4jdmVmU3ivnsimCvn6_1InwQ87p84MnVLUE_UP_L458,18948
278
- skypilot_nightly-1.0.0.dev20240930.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
279
- skypilot_nightly-1.0.0.dev20240930.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
280
- skypilot_nightly-1.0.0.dev20240930.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
281
- skypilot_nightly-1.0.0.dev20240930.dist-info/RECORD,,
276
+ skypilot_nightly-1.0.0.dev20241001.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
277
+ skypilot_nightly-1.0.0.dev20241001.dist-info/METADATA,sha256=x7BEUsYqWAeufLCtSK-NBoRQxL9jyZ58xR00gGBqC8c,18948
278
+ skypilot_nightly-1.0.0.dev20241001.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
279
+ skypilot_nightly-1.0.0.dev20241001.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
280
+ skypilot_nightly-1.0.0.dev20241001.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
281
+ skypilot_nightly-1.0.0.dev20241001.dist-info/RECORD,,