skypilot-nightly 1.0.0.dev20241212__py3-none-any.whl → 1.0.0.dev20241214__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '1ed40e3174646ba835423b9308d8d6489f83b6bc'
8
+ _SKYPILOT_COMMIT_SHA = '346646949beead6d93f793851e959da4b56246b0'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241212'
38
+ __version__ = '1.0.0.dev20241214'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -1,6 +1,6 @@
1
1
  """Constants used for service catalog."""
2
2
  HOSTED_CATALOG_DIR_URL = 'https://raw.githubusercontent.com/skypilot-org/skypilot-catalog/master/catalogs' # pylint: disable=line-too-long
3
- CATALOG_SCHEMA_VERSION = 'v5'
3
+ CATALOG_SCHEMA_VERSION = 'v6'
4
4
  CATALOG_DIR = '~/.sky/catalogs'
5
5
  ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
6
6
  'kubernetes', 'runpod', 'vsphere', 'cudo', 'fluidstack',
@@ -476,9 +476,6 @@ def _get_gpus_for_zone(zone: str) -> 'pd.DataFrame':
476
476
  gpu_name = gpu_name.upper()
477
477
  if 'H100-80GB' in gpu_name:
478
478
  gpu_name = 'H100'
479
- if count != 8:
480
- # H100 only has 8 cards.
481
- continue
482
479
  if 'H100-MEGA-80GB' in gpu_name:
483
480
  gpu_name = 'H100-MEGA'
484
481
  if count != 8:
@@ -97,6 +97,9 @@ _ACC_INSTANCE_TYPE_DICTS = {
97
97
  8: ['g2-standard-96'],
98
98
  },
99
99
  'H100': {
100
+ 1: ['a3-highgpu-1g'],
101
+ 2: ['a3-highgpu-2g'],
102
+ 4: ['a3-highgpu-4g'],
100
103
  8: ['a3-highgpu-8g'],
101
104
  },
102
105
  'H100-MEGA': {
@@ -289,7 +292,9 @@ def get_instance_type_for_accelerator(
289
292
 
290
293
  if acc_name in _ACC_INSTANCE_TYPE_DICTS:
291
294
  df = _df[_df['InstanceType'].notna()]
292
- instance_types = _ACC_INSTANCE_TYPE_DICTS[acc_name][acc_count]
295
+ instance_types = _ACC_INSTANCE_TYPE_DICTS[acc_name].get(acc_count, None)
296
+ if instance_types is None:
297
+ return None, []
293
298
  df = df[df['InstanceType'].isin(instance_types)]
294
299
 
295
300
  # Check the cpus and memory specified by the user.
@@ -879,27 +879,62 @@ def _terminate_node(namespace: str, context: Optional[str],
879
879
  pod_name: str) -> None:
880
880
  """Terminate a pod."""
881
881
  logger.debug('terminate_instances: calling delete_namespaced_pod')
882
- try:
883
- kubernetes.core_api(context).delete_namespaced_service(
884
- pod_name, namespace, _request_timeout=config_lib.DELETION_TIMEOUT)
885
- kubernetes.core_api(context).delete_namespaced_service(
886
- f'{pod_name}-ssh',
887
- namespace,
888
- _request_timeout=config_lib.DELETION_TIMEOUT)
889
- except kubernetes.api_exception():
890
- pass
882
+
883
+ def _delete_k8s_resource_with_retry(delete_func: Callable,
884
+ resource_type: str,
885
+ resource_name: str) -> None:
886
+ """Helper to delete Kubernetes resources with 404 handling and retries.
887
+
888
+ Args:
889
+ delete_func: Function to call to delete the resource
890
+ resource_type: Type of resource being deleted (e.g. 'service'),
891
+ used in logging
892
+ resource_name: Name of the resource being deleted, used in logging
893
+ """
894
+ max_retries = 3
895
+ retry_delay = 5 # seconds
896
+
897
+ for attempt in range(max_retries):
898
+ try:
899
+ delete_func()
900
+ return
901
+ except kubernetes.api_exception() as e:
902
+ if e.status == 404:
903
+ logger.warning(
904
+ f'terminate_instances: Tried to delete {resource_type} '
905
+ f'{resource_name}, but the {resource_type} was not '
906
+ 'found (404).')
907
+ return
908
+ elif attempt < max_retries - 1:
909
+ logger.warning(f'terminate_instances: Failed to delete '
910
+ f'{resource_type} {resource_name} (attempt '
911
+ f'{attempt + 1}/{max_retries}). Error: {e}. '
912
+ f'Retrying in {retry_delay} seconds...')
913
+ time.sleep(retry_delay)
914
+ else:
915
+ raise
916
+
917
+ # Delete services for the pod
918
+ for service_name in [pod_name, f'{pod_name}-ssh']:
919
+ _delete_k8s_resource_with_retry(
920
+ delete_func=lambda name=service_name: kubernetes.core_api(
921
+ context).delete_namespaced_service(name=name,
922
+ namespace=namespace,
923
+ _request_timeout=config_lib.
924
+ DELETION_TIMEOUT),
925
+ resource_type='service',
926
+ resource_name=service_name)
927
+
891
928
  # Note - delete pod after all other resources are deleted.
892
929
  # This is to ensure there are no leftover resources if this down is run
893
930
  # from within the pod, e.g., for autodown.
894
- try:
895
- kubernetes.core_api(context).delete_namespaced_pod(
896
- pod_name, namespace, _request_timeout=config_lib.DELETION_TIMEOUT)
897
- except kubernetes.api_exception() as e:
898
- if e.status == 404:
899
- logger.warning('terminate_instances: Tried to delete pod '
900
- f'{pod_name}, but the pod was not found (404).')
901
- else:
902
- raise
931
+ _delete_k8s_resource_with_retry(
932
+ delete_func=lambda: kubernetes.core_api(context).delete_namespaced_pod(
933
+ name=pod_name,
934
+ namespace=namespace,
935
+ _request_timeout=config_lib.DELETION_TIMEOUT),
936
+ resource_type='pod',
937
+ resource_name=pod_name)
903
938
 
904
939
 
905
940
  def terminate_instances(
sky/sky_logging.py CHANGED
@@ -15,6 +15,7 @@ _show_logging_prefix = (env_options.Options.SHOW_DEBUG_INFO.get() or
15
15
  not env_options.Options.MINIMIZE_LOGGING.get())
16
16
  _FORMAT = '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
17
17
  _DATE_FORMAT = '%m-%d %H:%M:%S'
18
+ _SENSITIVE_LOGGER = ['sky.provisioner', 'sky.optimizer']
18
19
 
19
20
 
20
21
  class NewLineFormatter(logging.Formatter):
@@ -75,6 +76,23 @@ def _setup_logger():
75
76
  # Setting this will avoid the message
76
77
  # being propagated to the parent logger.
77
78
  _root_logger.propagate = False
79
+ if env_options.Options.SUPPRESS_SENSITIVE_LOG.get():
80
+ # If the sensitive log is enabled, we reinitialize a new handler
81
+ # and force set the level to INFO to suppress the debug logs
82
+ # for certain loggers.
83
+ for logger_name in _SENSITIVE_LOGGER:
84
+ logger = logging.getLogger(logger_name)
85
+ handler_to_logger = RichSafeStreamHandler(sys.stdout)
86
+ handler_to_logger.flush = sys.stdout.flush # type: ignore
87
+ logger.addHandler(handler_to_logger)
88
+ logger.setLevel(logging.INFO)
89
+ if _show_logging_prefix:
90
+ handler_to_logger.setFormatter(FORMATTER)
91
+ else:
92
+ handler_to_logger.setFormatter(NO_PREFIX_FORMATTER)
93
+ # Do not propagate to the parent logger to avoid parent
94
+ # logger printing the logs.
95
+ logger.propagate = False
78
96
 
79
97
 
80
98
  def reload_logger():
sky/utils/env_options.py CHANGED
@@ -11,6 +11,7 @@ class Options(enum.Enum):
11
11
  SHOW_DEBUG_INFO = ('SKYPILOT_DEBUG', False)
12
12
  DISABLE_LOGGING = ('SKYPILOT_DISABLE_USAGE_COLLECTION', False)
13
13
  MINIMIZE_LOGGING = ('SKYPILOT_MINIMIZE_LOGGING', True)
14
+ SUPPRESS_SENSITIVE_LOG = ('SKYPILOT_SUPPRESS_SENSITIVE_LOG', False)
14
15
  # Internal: this is used to skip the cloud user identity check, which is
15
16
  # used to protect cluster operations in a multi-identity scenario.
16
17
  # Currently, this is only used in the job and serve controller, as there
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20241212
3
+ Version: 1.0.0.dev20241214
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,4 +1,4 @@
1
- sky/__init__.py,sha256=BSUiTgI24KPNL66tZ50gHcfp58wm0j6iLb_aP5dUT2g,5944
1
+ sky/__init__.py,sha256=952AxZEJvtF5tit7QrsMn94iTibGwYuSM1gkl_VxM64,5944
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=kACHmiZgWgRpYd1wx1ofbXRMErfMcFmWrkw4a9NxYrY,20988
4
4
  sky/check.py,sha256=D3Y3saIFAYVvPxuBHnVgJEO0fUVDxgjwuMBaO-D778k,9472
@@ -11,7 +11,7 @@ sky/execution.py,sha256=dpbk1kGRkGHT0FCJKGvjqeV3qIGEN2K20NDZbVrcAvI,28483
11
11
  sky/global_user_state.py,sha256=m2LJsXkh8eAvvz0ADnSP6idfYWZTA_Xi3uxwR3DrJxo,30241
12
12
  sky/optimizer.py,sha256=GjvKQIBtY3NlULzau_9tfa7V2KYVJRrmNrjKVIWCPIQ,59753
13
13
  sky/resources.py,sha256=4T-zQK0OSLC1z5-sLFluEQJ_Y8CAJX3jb4ZSPedwy1s,70352
14
- sky/sky_logging.py,sha256=oLmTmwkuucIto3LHXLJfMcyRpYSkmZAZa5XzQPA5IHk,4434
14
+ sky/sky_logging.py,sha256=SSHibLotMOQKYX_XKj4Va2PTt6dKGZNCWvGm3FuM-Sc,5373
15
15
  sky/skypilot_config.py,sha256=E3g65cX3P3dT9b5N0GgFBG6yB0FXwIGpisKoozmJmWU,9094
16
16
  sky/status_lib.py,sha256=J7Jb4_Dz0v2T64ttOdyUgpokvl4S0sBJrMfH7Fvo51A,1457
17
17
  sky/task.py,sha256=KDsTBIxYpkCOPHv3_ei5H3LDMiGHvDeS9_2HeL6yyLA,49766
@@ -60,10 +60,10 @@ sky/clouds/service_catalog/aws_catalog.py,sha256=j33lNC5GXWK6CiGWZORCnumGlRODmCA
60
60
  sky/clouds/service_catalog/azure_catalog.py,sha256=5Q51x_WEKvQ2YSgJvZHRH3URlbwIstYuwpjaWW_wJlw,8149
61
61
  sky/clouds/service_catalog/common.py,sha256=fCGabkqCMmzNaeT4_--V4K5GsGdf8v_pzYHGYNRDot4,27709
62
62
  sky/clouds/service_catalog/config.py,sha256=ylzqewdEBjDg4awvFek6ldYmFrnvD2bVGLZuLPvEVYA,1793
63
- sky/clouds/service_catalog/constants.py,sha256=ai2yOlsVqBnEpbxaEHXt61COsHBLwOfw6GZXntEPj7k,411
63
+ sky/clouds/service_catalog/constants.py,sha256=u_SjIr3gJI6P-xpRgNAzZG2Z4M5UfE1PL7z4atfQncE,411
64
64
  sky/clouds/service_catalog/cudo_catalog.py,sha256=V_takvL6dWTGQaTLCEvjKIotCDPnMujiNUZ87kZKGVI,4673
65
65
  sky/clouds/service_catalog/fluidstack_catalog.py,sha256=21-cvrYEYTIi7n3ZNF2e7_0QX-PF4BkhlVJUWQOvKrY,5059
66
- sky/clouds/service_catalog/gcp_catalog.py,sha256=v_5fsB3dB9oD8U7lBKnCe5ii6AUWEOiQjNarMnU_qLA,24379
66
+ sky/clouds/service_catalog/gcp_catalog.py,sha256=jJEfWjZ4ItsE657LjIf9mruJVZERFegCD5Qtu20AFNc,24542
67
67
  sky/clouds/service_catalog/ibm_catalog.py,sha256=1iK0KvbI82U7sySb7chr-qm_16x3tTnZ6nIo7o76ouc,4493
68
68
  sky/clouds/service_catalog/kubernetes_catalog.py,sha256=D0DvhVlK6Z6HJcZHPOWqRNAbXgFaQOKUnS_xkmqzukA,12550
69
69
  sky/clouds/service_catalog/lambda_catalog.py,sha256=2R-ccu63BbdvO6X80MtxiniA-jLewXb6I0Ye1rYD9fY,5302
@@ -78,7 +78,7 @@ sky/clouds/service_catalog/data_fetchers/fetch_aws.py,sha256=ro2zazdkDF6z9bE7QFy
78
78
  sky/clouds/service_catalog/data_fetchers/fetch_azure.py,sha256=K5jyfCAR5d-Hg78tDhmqpz0DQl79ndCW1ZRhBDLcBdM,12796
79
79
  sky/clouds/service_catalog/data_fetchers/fetch_cudo.py,sha256=52P48lvWN0s1ArjeLPeLemPRpxjSRcHincRle0nqdm4,3440
80
80
  sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py,sha256=yKuAFbjBRNz_e2RNNDT_aHHAuKQ86Ac7GKgIie5O6Pg,7273
81
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py,sha256=VrTTkMF5AjiplfDmvPBW-otR3oXGU3-oFouVMfIua4Q,33447
81
+ sky/clouds/service_catalog/data_fetchers/fetch_gcp.py,sha256=4ltAlVWFDcLMRoUQHT8NhkYy6ujwYOueYCeUVp37Rz0,33342
82
82
  sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py,sha256=MN54h0CAGPHQAeF2eTmuESq3b0-d1kDARRUM6OkivCk,4962
83
83
  sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py,sha256=SF_gTU74qg6L-DSWneCAbqP0lwZXaaDi5otiMIJbrw0,21462
84
84
  sky/clouds/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -137,7 +137,7 @@ sky/provision/gcp/instance_utils.py,sha256=veRBr6Oziv0KaUdC4acuWeaOremNV0gMYCCHa
137
137
  sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
138
138
  sky/provision/kubernetes/__init__.py,sha256=y6yVfii81WYG3ROxv4hiIj-ydinS5-xGxLvXnARVQoI,719
139
139
  sky/provision/kubernetes/config.py,sha256=WEKcFXXhe89bLGAvoMiBvTDxdxkpTIA6ezrj2vmzldc,29072
140
- sky/provision/kubernetes/instance.py,sha256=2zd_Z09amOsi0vPZjQYMJCkCWbN2YecMLL9HkmUuPrM,48414
140
+ sky/provision/kubernetes/instance.py,sha256=ryJHnFvE2WH2zC4sBC7DjH0yt1ScTC7Sqx8xJXzKI-c,50024
141
141
  sky/provision/kubernetes/network.py,sha256=EpNjRQ131CXepqbdkoRKFu4szVrm0oKEpv1l8EgOkjU,12364
142
142
  sky/provision/kubernetes/network_utils.py,sha256=t1FS3K400fetH7cBuRgQJZl5_jEeMshsvsYmnMUcq8k,11399
143
143
  sky/provision/kubernetes/utils.py,sha256=cnhmVcy8ri8iKMr404iugxBR2gQIXZiJVCxXwi3vglc,102225
@@ -256,7 +256,7 @@ sky/utils/control_master_utils.py,sha256=90hnxiAUP20gbJ9e3MERh7rb04ZO_I3LsljNjR2
256
256
  sky/utils/controller_utils.py,sha256=t6PkhrGt7fRUzRqdSox0a-825Cy9b-xACk64exr5HRc,40698
257
257
  sky/utils/dag_utils.py,sha256=pVX3lGDDcYTcGoH_1jEWzl9767Y4mwlIEYIzoyHO6gM,6105
258
258
  sky/utils/db_utils.py,sha256=AOvMmBEN9cF4I7CoXihPCtus4mU2VDGjBQSVMMgzKlA,2786
259
- sky/utils/env_options.py,sha256=3oAaUPxowL6vI2XmxXrH56V7Myj9IJWsL-MXFmRFVdI,1294
259
+ sky/utils/env_options.py,sha256=E5iwRFBUY2Iq6e0y0c1Mv5OSQ4MRNdk0-p38xUyVerc,1366
260
260
  sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
261
261
  sky/utils/log_utils.py,sha256=oZYF45uC7GFjAqO-Je-aiX6zhtq91TP-KKaIbQNF-jY,14024
262
262
  sky/utils/resources_utils.py,sha256=Xqi7gxPYw2y5wl5okUI5zx5LEij0hJF_V3Zi8q7TXYg,7890
@@ -279,9 +279,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
279
279
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
280
280
  sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
281
281
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
282
- skypilot_nightly-1.0.0.dev20241212.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
283
- skypilot_nightly-1.0.0.dev20241212.dist-info/METADATA,sha256=JusT-4ZZvxLeBa4Wrt9o6g8PReUHuWS1kSym2uX3hDI,20319
284
- skypilot_nightly-1.0.0.dev20241212.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
285
- skypilot_nightly-1.0.0.dev20241212.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
286
- skypilot_nightly-1.0.0.dev20241212.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
287
- skypilot_nightly-1.0.0.dev20241212.dist-info/RECORD,,
282
+ skypilot_nightly-1.0.0.dev20241214.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
283
+ skypilot_nightly-1.0.0.dev20241214.dist-info/METADATA,sha256=BDAqiSBeCr0fJIBAs7J2UTyWswclFpu5N3Crv5LO1hk,20319
284
+ skypilot_nightly-1.0.0.dev20241214.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
285
+ skypilot_nightly-1.0.0.dev20241214.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
286
+ skypilot_nightly-1.0.0.dev20241214.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
287
+ skypilot_nightly-1.0.0.dev20241214.dist-info/RECORD,,