skypilot-nightly 1.0.0.dev20250403__py3-none-any.whl → 1.0.0.dev20250404__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '7149e721d257cfc890526c9738290724882ce82e'
8
+ _SKYPILOT_COMMIT_SHA = 'dfd12c30b89ae96f5596418fed58547c159ff40a'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250403'
38
+ __version__ = '1.0.0.dev20250404'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/check.py CHANGED
@@ -289,6 +289,12 @@ def _print_checked_cloud(
289
289
  cloud_tuple: The cloud to print the capabilities for.
290
290
  cloud_capabilities: The capabilities for the cloud.
291
291
  """
292
+
293
+ def _yellow_color(str_to_format: str) -> str:
294
+ return (f'{colorama.Fore.LIGHTYELLOW_EX}'
295
+ f'{str_to_format}'
296
+ f'{colorama.Style.RESET_ALL}')
297
+
292
298
  cloud_repr, cloud = cloud_tuple
293
299
  # Print the capabilities for the cloud.
294
300
  # consider cloud enabled if any capability is enabled.
@@ -319,7 +325,7 @@ def _print_checked_cloud(
319
325
  if activated_account is not None:
320
326
  echo(f' Activated account: {activated_account}')
321
327
  for reason, caps in hints_to_capabilities.items():
322
- echo(f' Hint [{", ".join(caps)}]: {reason}')
328
+ echo(f' Hint [{", ".join(caps)}]: {_yellow_color(reason)}')
323
329
  for reason, caps in reasons_to_capabilities.items():
324
330
  echo(f' Reason [{", ".join(caps)}]: {reason}')
325
331
 
sky/clouds/kubernetes.py CHANGED
@@ -679,7 +679,8 @@ class Kubernetes(clouds.Cloud):
679
679
  success = False
680
680
  for context in existing_allowed_contexts:
681
681
  try:
682
- check_result = kubernetes_utils.check_credentials(context)
682
+ check_result = kubernetes_utils.check_credentials(
683
+ context, run_optional_checks=True)
683
684
  if check_result[0]:
684
685
  success = True
685
686
  if check_result[1] is not None:
@@ -689,15 +690,6 @@ class Kubernetes(clouds.Cloud):
689
690
  except Exception as e: # pylint: disable=broad-except
690
691
  return (False, f'Credential check failed for {context}: '
691
692
  f'{common_utils.format_exception(e)}')
692
- unlabeled_nodes = kubernetes_utils.get_unlabeled_accelerator_nodes(
693
- context)
694
- if len(unlabeled_nodes) > 0:
695
- hints.append(f'Context {context} has {len(unlabeled_nodes)} '
696
- f'unlabeled nodes with accelerators. '
697
- f'To label these nodes, run '
698
- f'`python -m sky.utils.kubernetes.gpu_labeler '
699
- f'--context {context}` from the project root '
700
- f'directory.')
701
693
  if success:
702
694
  return (True, cls._format_credential_check_results(hints, reasons))
703
695
  return (False, 'Failed to find available context with working '
@@ -328,9 +328,9 @@ def get_common_gpus() -> List[str]:
328
328
  'A100',
329
329
  'A100-80GB',
330
330
  'H100',
331
+ 'H200',
331
332
  'L4',
332
333
  'L40S',
333
- 'P100',
334
334
  'T4',
335
335
  'V100',
336
336
  'V100-32GB',
@@ -272,7 +272,8 @@ def get_gke_accelerator_name(accelerator: str) -> str:
272
272
  if accelerator == 'H100':
273
273
  # H100 is named as H100-80GB in GKE.
274
274
  accelerator = 'H100-80GB'
275
- if accelerator in ('A100-80GB', 'L4', 'H100-80GB', 'H100-MEGA-80GB'):
275
+ if accelerator in ('A100-80GB', 'L4', 'H100-80GB', 'H100-MEGA-80GB',
276
+ 'B200'):
276
277
  # A100-80GB, L4, H100-80GB and H100-MEGA-80GB
277
278
  # have a different name pattern.
278
279
  return 'nvidia-{}'.format(accelerator.lower())
@@ -1096,7 +1097,9 @@ def get_accelerator_label_key_value(
1096
1097
  ResourcesUnavailableError: Can be raised from the following conditions:
1097
1098
  - The cluster does not have GPU/TPU resources
1098
1099
  (nvidia.com/gpu, google.com/tpu)
1099
- - The cluster does not have GPU/TPU labels setup correctly
1100
+ - The cluster has GPU/TPU resources, but no node in the cluster has
1101
+ an accelerator label.
1102
+ - The cluster has a node with an invalid accelerator label value.
1100
1103
  - The cluster doesn't have any nodes with acc_type GPU/TPU
1101
1104
  """
1102
1105
  # Check if the cluster has GPU resources
@@ -1291,7 +1294,8 @@ def get_external_ip(network_mode: Optional[
1291
1294
 
1292
1295
 
1293
1296
  def check_credentials(context: Optional[str],
1294
- timeout: int = kubernetes.API_TIMEOUT) -> \
1297
+ timeout: int = kubernetes.API_TIMEOUT,
1298
+ run_optional_checks: bool = False) -> \
1295
1299
  Tuple[bool, Optional[str]]:
1296
1300
  """Check if the credentials in kubeconfig file are valid
1297
1301
 
@@ -1333,6 +1337,9 @@ def check_credentials(context: Optional[str],
1333
1337
  f'{common_utils.format_exception(e, use_bracket=True)}')
1334
1338
 
1335
1339
  # If we reach here, the credentials are valid and Kubernetes cluster is up.
1340
+ if not run_optional_checks:
1341
+ return True, None
1342
+
1336
1343
  # We now do softer checks to check if exec based auth is used and to
1337
1344
  # see if the cluster is GPU-enabled.
1338
1345
 
@@ -1344,16 +1351,36 @@ def check_credentials(context: Optional[str],
1344
1351
  # `sky launch --gpus <gpu>` and the optimizer does not list Kubernetes as a
1345
1352
  # provider if their cluster GPUs are not setup correctly.
1346
1353
  gpu_msg = ''
1347
- try:
1348
- get_accelerator_label_key_value(context,
1349
- acc_type='',
1350
- acc_count=0,
1351
- check_mode=True)
1352
- except exceptions.ResourcesUnavailableError as e:
1353
- # If GPUs are not available, we return cluster as enabled (since it can
1354
- # be a CPU-only cluster) but we also return the exception message which
1355
- # serves as a hint for how to enable GPU access.
1356
- gpu_msg = str(e)
1354
+ unlabeled_nodes = get_unlabeled_accelerator_nodes(context)
1355
+ if unlabeled_nodes:
1356
+ gpu_msg = (f'Cluster has {len(unlabeled_nodes)} nodes with '
1357
+ f'accelerators that are not labeled. '
1358
+ f'To label the nodes, run '
1359
+ f'`python -m sky.utils.kubernetes.gpu_labeler '
1360
+ f'--context {context}`')
1361
+ else:
1362
+ try:
1363
+ # This function raises a ResourcesUnavailableError in three cases:
1364
+ # 1. If no node in cluster has GPU/TPU resource in its capacity.
1365
+ # (e.g. google.com/tpu, nvidia.com/gpu)
1366
+ # 2. If at least one node in cluster has GPU/TPU resource in its
1367
+ # capacity, but no node in the cluster has an accelerator label.
1368
+ # 3. If an accelerator label on a node is invalid.
1369
+ # Exception 2 is a special case of a cluster having at least one
1370
+ # unlabelled node, which is caught in
1371
+ # `get_unlabeled_accelerator_nodes`.
1372
+ # Therefore, if `get_unlabeled_accelerator_nodes` detects unlabelled
1373
+ # nodes, we skip this check.
1374
+ get_accelerator_label_key_value(context,
1375
+ acc_type='',
1376
+ acc_count=0,
1377
+ check_mode=True)
1378
+ except exceptions.ResourcesUnavailableError as e:
1379
+ # If GPUs are not available, we return cluster as enabled
1380
+ # (since it can be a CPU-only cluster) but we also return the
1381
+ # exception message which serves as a hint for how to enable
1382
+ # GPU access.
1383
+ gpu_msg = str(e)
1357
1384
  if exec_msg and gpu_msg:
1358
1385
  return True, f'{gpu_msg}\n Additionally, {exec_msg}'
1359
1386
  elif gpu_msg:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250403
3
+ Version: 1.0.0.dev20250404
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,7 +1,7 @@
1
- sky/__init__.py,sha256=jAriaLe3W4tnwwVPeiJ2hAxmMnsB0NRcf-MQ2yLMyQk,6428
1
+ sky/__init__.py,sha256=DAFp3ES0ncxRm0D0gyRcS1gr1FFsn4gkUD6kWcFNHlg,6428
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=ND011K_-Ud1dVZF37A9KrwYir_ihJXcHc7iDWmuBc8Q,22872
4
- sky/check.py,sha256=oktScSPsHIyO7ZrVHy3QaybB6-s_D6eMEjmICAiUtDo,15902
4
+ sky/check.py,sha256=PPNQnaaZBA9_aogJpN4gnG4XWnTqkd74c-rBYDkDRDY,16101
5
5
  sky/cli.py,sha256=Zcio2ak6zX_5_N_lshDUqCvoV6NEOmGS6Tp6AgS9VAk,222446
6
6
  sky/cloud_stores.py,sha256=cmKdSoB4bmwrd-Z1NCZBFb6IIJt0jKVxkGPoX86280s,26606
7
7
  sky/core.py,sha256=G3n6z0dyvoU4FJVGnnTu3kFdu_EtQC1l57er5voRAX0,47926
@@ -55,7 +55,7 @@ sky/clouds/do.py,sha256=P38l4otp2AuDReUH9Ii621ht9s-NIyb7-R37jbtjHk8,11580
55
55
  sky/clouds/fluidstack.py,sha256=jIqW1MLe55MVME1PATZm8e6_FsiTnJawW7OdytPW0aM,12666
56
56
  sky/clouds/gcp.py,sha256=sUJ9LXUnMxYm6OYZ5P-z1dJHxgVILuC3OW3eFSTNCv8,56919
57
57
  sky/clouds/ibm.py,sha256=XtuPN8QgrwJdb1qb_b-7KwAE2tf_N9wh9eEfi2tcg-s,22013
58
- sky/clouds/kubernetes.py,sha256=VuPHstRysRZH0UTYZIExY2Gtd2ItQn9I5EboqxyuvV0,36717
58
+ sky/clouds/kubernetes.py,sha256=SNRilcl_JYvaMDoYw0jnMtfaSi7FKQycCiJVmX6-4f4,36216
59
59
  sky/clouds/lambda_cloud.py,sha256=rR2YrZ6flEbKKpQAm60eKNjiMDYvH2hqzaCo3Hx4Ffw,12916
60
60
  sky/clouds/nebius.py,sha256=4D7C2NQYI-BNhXWNOyAXNAZj7-5nN33VQW1sxfSGt9w,14662
61
61
  sky/clouds/oci.py,sha256=YO4kjSsHBmAVH4z1TuVP72zfmC0BXte4E0xIyZir9N4,27622
@@ -64,7 +64,7 @@ sky/clouds/runpod.py,sha256=P486CMN-Mt3R-7jMVd3XIGjLWYy0X5B74dK_IgAp2Cg,12149
64
64
  sky/clouds/scp.py,sha256=6OucFxDIOZFA1Z_QZwGqblW4zdBljhUjhamssvchu_0,15971
65
65
  sky/clouds/vast.py,sha256=1AZaM71dI837oeuMZEXN4muHsH3CKRLjBAPcPy1QqV4,11296
66
66
  sky/clouds/vsphere.py,sha256=yRLQESpuSOdfs4KaVTMSQJqf_3mr4VysGqSo1zs1ZtU,12453
67
- sky/clouds/service_catalog/__init__.py,sha256=Fbm8kKpVTCoIumnY-FWL0X4xMpKu_qwYAxyuVXu4IBQ,14997
67
+ sky/clouds/service_catalog/__init__.py,sha256=WCYMAhek36Ee1HjJL9LP287a70hv2NMA_8plcZDI4Z4,14997
68
68
  sky/clouds/service_catalog/aws_catalog.py,sha256=PbYD37rU_8m-Y_5xTglW21ppxI0GecM1sdO1yXuPwHE,13518
69
69
  sky/clouds/service_catalog/azure_catalog.py,sha256=5Q51x_WEKvQ2YSgJvZHRH3URlbwIstYuwpjaWW_wJlw,8149
70
70
  sky/clouds/service_catalog/common.py,sha256=6TCUE1kg2FvUZXpV2ktrnsRchUYNTGx8rq16m5Nztek,27766
@@ -166,7 +166,7 @@ sky/provision/kubernetes/constants.py,sha256=dZCUV8FOO9Gct80sdqeubKnxeW3CGl-u5mx
166
166
  sky/provision/kubernetes/instance.py,sha256=oag17OtuiqU-1RjkgW9NvEpxSGUFIYdI7M61S-YmPu8,50503
167
167
  sky/provision/kubernetes/network.py,sha256=AtcOM8wPs_-UlQJhGEQGP6Lh4HIgdx63Y0iWEhP5jyc,12673
168
168
  sky/provision/kubernetes/network_utils.py,sha256=6uck1aBkgtm-gGBitU3_hEUp8j14ZuG_4Xo70ReZYXs,11654
169
- sky/provision/kubernetes/utils.py,sha256=IR2AKbZA-ZuKtLRuqHZsiA1N2HmQTJKz5IqOQIVEk4U,125753
169
+ sky/provision/kubernetes/utils.py,sha256=D7xfk6RjqLAsERpJMxQ8ozu-D3pRTz_bRszKQeMsVno,127188
170
170
  sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml,sha256=AMzYzlY0JIlfBWj5eX054Rc1XDW2thUcLSOGMJVhIdA,229
171
171
  sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml,sha256=RtTq4F1QUmR2Uunb6zuuRaPhV7hpesz4saHjn3Ncsb4,2010
172
172
  sky/provision/lambda_cloud/__init__.py,sha256=6EEvSgtUeEiup9ivIFevHmgv0GqleroO2X0K7TRa2nE,612
@@ -350,9 +350,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
350
350
  sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=HPVgNt-wbCVPd9dpDFiA7t2mzQLpjXHJ61eiwRbEr-c,10378
351
351
  sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
352
352
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
353
- skypilot_nightly-1.0.0.dev20250403.dist-info/licenses/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
354
- skypilot_nightly-1.0.0.dev20250403.dist-info/METADATA,sha256=YkgmWuWgAbuQRFP3Zfk3QpnqS_0KN7x50GuSoVI9JwU,18552
355
- skypilot_nightly-1.0.0.dev20250403.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
356
- skypilot_nightly-1.0.0.dev20250403.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
357
- skypilot_nightly-1.0.0.dev20250403.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
358
- skypilot_nightly-1.0.0.dev20250403.dist-info/RECORD,,
353
+ skypilot_nightly-1.0.0.dev20250404.dist-info/licenses/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
354
+ skypilot_nightly-1.0.0.dev20250404.dist-info/METADATA,sha256=HjDx6IDyOIoJ7lFpgxXPO2n8tIyTUh87PO4FAp-ZWcc,18552
355
+ skypilot_nightly-1.0.0.dev20250404.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
356
+ skypilot_nightly-1.0.0.dev20250404.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
357
+ skypilot_nightly-1.0.0.dev20250404.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
358
+ skypilot_nightly-1.0.0.dev20250404.dist-info/RECORD,,