skypilot-nightly 1.0.0.dev20241025__py3-none-any.whl → 1.0.0.dev20241026__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '057bc4b44755ac1e9dadc680e022c369e8ddff52'
8
+ _SKYPILOT_COMMIT_SHA = '0e915d3430d8027aa40b766605bb13c889ffc62f'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241025'
38
+ __version__ = '1.0.0.dev20241026'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/cli.py CHANGED
@@ -3519,7 +3519,7 @@ def jobs():
3519
3519
  default=None,
3520
3520
  type=str,
3521
3521
  hidden=True,
3522
- help=('Alias for --name, the name of the spot job.'))
3522
+ help=('Alias for --name, the name of the managed job.'))
3523
3523
  @click.option('--job-recovery',
3524
3524
  default=None,
3525
3525
  type=str,
sky/clouds/azure.py CHANGED
@@ -39,9 +39,9 @@ _DEFAULT_AZURE_UBUNTU_HPC_IMAGE_GB = 30
39
39
  _DEFAULT_AZURE_UBUNTU_2004_IMAGE_GB = 150
40
40
  _DEFAULT_SKYPILOT_IMAGE_GB = 30
41
41
 
42
- _DEFAULT_CPU_IMAGE_ID = 'skypilot:gpu-ubuntu-2204'
43
- _DEFAULT_GPU_IMAGE_ID = 'skypilot:gpu-ubuntu-2204'
44
- _DEFAULT_V1_IMAGE_ID = 'skypilot:v1-ubuntu-2004'
42
+ _DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-v2'
43
+ _DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-v2'
44
+ _DEFAULT_V1_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-v1'
45
45
  _DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-ubuntu-2004'
46
46
  _FALLBACK_IMAGE_ID = 'skypilot:gpu-ubuntu-2204'
47
47
 
sky/clouds/oci.py CHANGED
@@ -468,7 +468,11 @@ class OCI(clouds.Cloud):
468
468
  api_key_file = oci_cfg[
469
469
  'key_file'] if 'key_file' in oci_cfg else 'BadConf'
470
470
  sky_cfg_file = oci_utils.oci_config.get_sky_user_config_file()
471
- except (ImportError, oci_adaptor.oci.exceptions.ConfigFileNotFound):
471
+ # Must catch ImportError before any oci_adaptor.oci.exceptions
472
+ # because oci_adaptor.oci.exceptions can throw ImportError.
473
+ except ImportError:
474
+ return {}
475
+ except oci_adaptor.oci.exceptions.ConfigFileNotFound:
472
476
  return {}
473
477
 
474
478
  # OCI config and API key file are mandatory
@@ -7,11 +7,14 @@ import re
7
7
  from typing import Dict, List, Optional, Tuple
8
8
 
9
9
  from sky import clouds as cloud_lib
10
+ from sky import sky_logging
10
11
  from sky.clouds import Azure
11
12
  from sky.clouds.service_catalog import common
12
13
  from sky.utils import resources_utils
13
14
  from sky.utils import ux_utils
14
15
 
16
+ logger = sky_logging.init_logger(__name__)
17
+
15
18
  # This list should match the list of regions in
16
19
  # skypilot image generation Packer script's replication_regions
17
20
  # sky/clouds/service_catalog/images/skypilot-azure-cpu-ubuntu.pkr.hcl
@@ -191,9 +194,16 @@ def list_accelerators(
191
194
 
192
195
  def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
193
196
  """Returns the image id from the tag."""
194
- # Azure images are not region-specific.
195
- del region # Unused.
196
- return common.get_image_id_from_tag_impl(_image_df, tag, None)
197
+ global _image_df
198
+ image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
199
+ if image_id is None:
200
+ # Refresh the image catalog and try again, if the image tag is not
201
+ # found.
202
+ logger.debug('Refreshing the image catalog and trying again.')
203
+ _image_df = common.read_catalog('azure/images.csv',
204
+ pull_frequency_hours=0)
205
+ image_id = common.get_image_id_from_tag_impl(_image_df, tag, region)
206
+ return image_id
197
207
 
198
208
 
199
209
  def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
sky/jobs/controller.py CHANGED
@@ -215,7 +215,7 @@ class JobsController:
215
215
  end_time=end_time,
216
216
  callback_func=callback_func)
217
217
  logger.info(
218
- f'Spot job {self._job_id} (task: {task_id}) SUCCEEDED. '
218
+ f'Managed job {self._job_id} (task: {task_id}) SUCCEEDED. '
219
219
  f'Cleaning up the cluster {cluster_name}.')
220
220
  # Only clean up the cluster, not the storages, because tasks may
221
221
  # share storages.
@@ -1,5 +1,6 @@
1
1
  """Kubernetes instance provisioning."""
2
2
  import copy
3
+ import json
3
4
  import time
4
5
  from typing import Any, Dict, List, Optional
5
6
  import uuid
@@ -425,6 +426,70 @@ def _label_pod(namespace: str, context: Optional[str], pod_name: str,
425
426
  _request_timeout=kubernetes.API_TIMEOUT)
426
427
 
427
428
 
429
+ def _create_namespaced_pod_with_retries(namespace: str, pod_spec: dict,
430
+ context: Optional[str]) -> Any:
431
+ """Attempts to create a Kubernetes Pod and handle any errors.
432
+
433
+ Currently, we handle errors due to the AppArmor annotation and retry if
434
+ it fails due to the `FieldValueForbidden` error.
435
+ See https://github.com/skypilot-org/skypilot/issues/4174 for details.
436
+
437
+ Returns: The created Pod object.
438
+ """
439
+ try:
440
+ # Attempt to create the Pod with the AppArmor annotation
441
+ pod = kubernetes.core_api(context).create_namespaced_pod(
442
+ namespace, pod_spec)
443
+ return pod
444
+ except kubernetes.api_exception() as e:
445
+ try:
446
+ error_body = json.loads(e.body)
447
+ error_message = error_body.get('message', '')
448
+ except json.JSONDecodeError:
449
+ error_message = str(e.body)
450
+ # Check if the error is due to the AppArmor annotation and retry.
451
+ # We add an AppArmor annotation to set it as unconfined in our
452
+ # base template in kubernetes-ray.yml.j2. This is required for
453
+ # FUSE to work in the pod on most Kubernetes distributions.
454
+ # However, some distributions do not support the AppArmor annotation
455
+ # and will fail to create the pod. In this case, we retry without
456
+ # the annotation.
457
+ if (e.status == 422 and 'FieldValueForbidden' in error_message and
458
+ 'AppArmorProfile: nil' in error_message):
459
+ logger.warning('AppArmor annotation caused pod creation to fail. '
460
+ 'Retrying without the annotation. '
461
+ 'Note: this may cause bucket mounting to fail.')
462
+
463
+ # Remove the AppArmor annotation
464
+ annotations = pod_spec.get('metadata', {}).get('annotations', {})
465
+ if ('container.apparmor.security.beta.kubernetes.io/ray-node'
466
+ in annotations):
467
+ del annotations[
468
+ 'container.apparmor.security.beta.kubernetes.io/ray-node']
469
+ pod_spec['metadata']['annotations'] = annotations
470
+ logger.info('AppArmor annotation removed from Pod spec.')
471
+ else:
472
+ logger.warning('AppArmor annotation not found in pod spec, '
473
+ 'retrying will not help. '
474
+ f'Current annotations: {annotations}')
475
+ raise e
476
+
477
+ # Retry Pod creation without the AppArmor annotation
478
+ try:
479
+ pod = kubernetes.core_api(context).create_namespaced_pod(
480
+ namespace, pod_spec)
481
+ logger.info(f'Pod {pod.metadata.name} created successfully '
482
+ 'without AppArmor annotation.')
483
+ return pod
484
+ except kubernetes.api_exception() as retry_exception:
485
+ logger.info('Failed to create Pod without AppArmor annotation: '
486
+ f'{retry_exception}')
487
+ raise retry_exception
488
+ else:
489
+ # Re-raise the exception if it's a different error
490
+ raise e
491
+
492
+
428
493
  def _create_pods(region: str, cluster_name_on_cloud: str,
429
494
  config: common.ProvisionConfig) -> common.ProvisionRecord:
430
495
  """Create pods based on the config."""
@@ -546,8 +611,7 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
546
611
  }
547
612
  }
548
613
 
549
- pod = kubernetes.core_api(context).create_namespaced_pod(
550
- namespace, pod_spec)
614
+ pod = _create_namespaced_pod_with_retries(namespace, pod_spec, context)
551
615
  created_pods[pod.metadata.name] = pod
552
616
  if head_pod_name is None:
553
617
  head_pod_name = pod.metadata.name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20241025
3
+ Version: 1.0.0.dev20241026
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,8 +1,8 @@
1
- sky/__init__.py,sha256=GSmePpdUEM88IzUu72hNEsvulpIRguwT7aXuz3ked5s,5882
1
+ sky/__init__.py,sha256=N9HA1yPUbTF3VIZz0NOVgz7dHAcWh0f_GR82a0uJYe8,5882
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=pAdCT60OxxiXI9KXDyP2lQ9u9vMc6aMtq5Xi2h_hbdw,20984
4
4
  sky/check.py,sha256=D3Y3saIFAYVvPxuBHnVgJEO0fUVDxgjwuMBaO-D778k,9472
5
- sky/cli.py,sha256=4HOGW3LTDlPNXHqvTykcM8iMWOCdAK90l6w34DYBIsg,210357
5
+ sky/cli.py,sha256=VoPwWKGeNZZcFNLvw3VPR_F0WpKnM5EvfffNS8kcKc0,210360
6
6
  sky/cloud_stores.py,sha256=RjFgmRhUh1Kk__f6g3KxzLp9s7dA0pFK4W1AukEuUaw,21153
7
7
  sky/core.py,sha256=DW9OGE2kS2CmsvQ1grrpRnNFS3woMGWSHu5GE99e-I4,38190
8
8
  sky/dag.py,sha256=WLFWr5hfrwjd31uYlNvI-zWUk7tLaT_gzJn4LzbVtkE,2780
@@ -41,7 +41,7 @@ sky/benchmark/benchmark_state.py,sha256=X8CXmuU9KgsDRhKedhFgjeRMUFWtQsjFs1qECvPG
41
41
  sky/benchmark/benchmark_utils.py,sha256=eb-i6zYoo-Zkod-T9qtCu1FcYLw--Yyos1SyibUPZNE,26194
42
42
  sky/clouds/__init__.py,sha256=WuNIJEnZmBO72tU5awgaaL3rdvFRSkgaYNNeuY68dXo,1356
43
43
  sky/clouds/aws.py,sha256=XJVbOSkVVUHp9HbHDp0rFdHX113JHbY-3sgokGdNJVE,49527
44
- sky/clouds/azure.py,sha256=FklG_CEvOXLkZVoEYSCcNtPsQpq-2w6AJovzbLKun0w,30162
44
+ sky/clouds/azure.py,sha256=SOaJQ-E6ENJJviveNjY6OU4CmV5VMIqJEV5A1Rltqpg,30178
45
45
  sky/clouds/cloud.py,sha256=BBu1G-gkmylffldL50cvJ2DkDJ8vjVPziOPUAsvgJ2o,34948
46
46
  sky/clouds/cloud_registry.py,sha256=oLoYFjm_SDTgdHokY7b6A5Utq80HXRQNxV0fLjDdVsQ,2361
47
47
  sky/clouds/cudo.py,sha256=H4VyMo5wWGAv2MXZ3xsbWjlZA_cZYnt4ecNlTOOao8Y,13147
@@ -50,14 +50,14 @@ sky/clouds/gcp.py,sha256=m_dH04HqgU-DdW4R9wrSr66IpPt9JMKHEvHEGGFpeRo,54655
50
50
  sky/clouds/ibm.py,sha256=M8QdjeSFlwssfoY2aOodxG4q5R3eT9K-4lTPDHYvEYI,21476
51
51
  sky/clouds/kubernetes.py,sha256=j3imm_sbtyyZXvJ6qbqZmXok2C9OQIcGpyulljbTSJ4,28696
52
52
  sky/clouds/lambda_cloud.py,sha256=11dKUSunHUgaPZ1t8O85X29_NJ-o26sCt5DjwAPFgl4,12697
53
- sky/clouds/oci.py,sha256=Ve3MqVHay9oHRuK6vaCd3Rxz8fD54nfM_DKA4Qzf8l4,26963
53
+ sky/clouds/oci.py,sha256=gefhbQlFLW1K4pyPOp9fHnTawe_Ozy8m3nPUYlVszSc,27137
54
54
  sky/clouds/paperspace.py,sha256=lmUZPYAblaqiBmGQwCunccMiTF_dVA1o3vqY9Q_Nc28,10921
55
55
  sky/clouds/runpod.py,sha256=lstUC6f4JDhtcH9NfwkbpCJMmfmvMigoanhPXPbTYds,11540
56
56
  sky/clouds/scp.py,sha256=2KLTuNSMdBzK8CLwSesv7efOuiLidIMoyNG4AOt5Sqw,15870
57
57
  sky/clouds/vsphere.py,sha256=7eZFYIDtY5sX_ATr8h7kwwkY9t8Z-EYMJ9HCjoRBoxI,12309
58
58
  sky/clouds/service_catalog/__init__.py,sha256=e0K-c64jQV9d6zly5OnIXMsYaZXs_Ko9osAbDaRlOOw,14743
59
59
  sky/clouds/service_catalog/aws_catalog.py,sha256=1wX1-wOMw2LZ7RkV_Ah7c42RLRYm-m5_GAXzn32M5a8,13038
60
- sky/clouds/service_catalog/azure_catalog.py,sha256=DOAzAhI5eHRHTzYDBrlNmfh3YByAoR-A9kBVeh6ZXvs,7689
60
+ sky/clouds/service_catalog/azure_catalog.py,sha256=JpULm-1WYkpkFwePjtYsxYxo7h_DFkMMsdrQxpgiKH0,8127
61
61
  sky/clouds/service_catalog/common.py,sha256=PA3llB0zZh4v0DO_gDDCKGhRIBx16CAp2WJZNxhjNOA,27266
62
62
  sky/clouds/service_catalog/config.py,sha256=ylzqewdEBjDg4awvFek6ldYmFrnvD2bVGLZuLPvEVYA,1793
63
63
  sky/clouds/service_catalog/constants.py,sha256=ai2yOlsVqBnEpbxaEHXt61COsHBLwOfw6GZXntEPj7k,411
@@ -95,7 +95,7 @@ sky/data/storage.py,sha256=x8YYY4zVBdit_5oAR_MXV-TM9qDefV_ZV4z0irv6ZaU,163102
95
95
  sky/data/storage_utils.py,sha256=cM3kxlffYE7PnJySDu8huyUsMX_JYsf9uer8r5OYsjo,9556
96
96
  sky/jobs/__init__.py,sha256=yucibSB_ZimtJMvOhMxn6ZqwBIYNfcwmc6pSXtCqmNQ,1483
97
97
  sky/jobs/constants.py,sha256=YLgcCg_RHSYr_rfsI_4UIdXk78KKKOK29Oem88t5j8I,1350
98
- sky/jobs/controller.py,sha256=k28bbicxtML6p1YxSetk-1nhBHPCubpvLWJsh7TtU9c,26701
98
+ sky/jobs/controller.py,sha256=JcgHsghFGweTlvj4_-tdHSpeT015EQTrqWrXkjGsJBA,26704
99
99
  sky/jobs/core.py,sha256=RkBFaKDlovmdzqlOAgQ0xAimZFgo4pXq3qaQkAvGsGk,16908
100
100
  sky/jobs/recovery_strategy.py,sha256=UOEaVGSpRbCnCzlD8cgyjhCPIBIeBeCXCutoSic5aiA,25545
101
101
  sky/jobs/state.py,sha256=C6R5Yq7ftBqGPa_71tUjflBMKAaJ1FTTdbgjAwmbJsI,23231
@@ -137,7 +137,7 @@ sky/provision/gcp/instance_utils.py,sha256=veRBr6Oziv0KaUdC4acuWeaOremNV0gMYCCHa
137
137
  sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
138
138
  sky/provision/kubernetes/__init__.py,sha256=y6yVfii81WYG3ROxv4hiIj-ydinS5-xGxLvXnARVQoI,719
139
139
  sky/provision/kubernetes/config.py,sha256=WEKcFXXhe89bLGAvoMiBvTDxdxkpTIA6ezrj2vmzldc,29072
140
- sky/provision/kubernetes/instance.py,sha256=FOt77bFSKwi12J1_1qXhUrKiCqLfKWFgcRa1cLlNFlU,38453
140
+ sky/provision/kubernetes/instance.py,sha256=1dN2vdh-ZdeIe39ZxH5DAnnc8kXHWpzD6q-f14-8cDE,41576
141
141
  sky/provision/kubernetes/network.py,sha256=EpNjRQ131CXepqbdkoRKFu4szVrm0oKEpv1l8EgOkjU,12364
142
142
  sky/provision/kubernetes/network_utils.py,sha256=t1FS3K400fetH7cBuRgQJZl5_jEeMshsvsYmnMUcq8k,11399
143
143
  sky/provision/kubernetes/utils.py,sha256=2N5c4yA7CEn4DjvCiUO73W4XDEjgixcJRVdgs913QQE,89523
@@ -274,9 +274,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
274
274
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
275
275
  sky/utils/kubernetes/rsync_helper.sh,sha256=hyYDaYSNxYaNvzUQBzC8AidB7nDeojizjkzc_CTxycY,1077
276
276
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
277
- skypilot_nightly-1.0.0.dev20241025.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
278
- skypilot_nightly-1.0.0.dev20241025.dist-info/METADATA,sha256=siLhZo4MgO_jZOW2C51DpjE_Uxw1MOaDZyaemct3w1g,19540
279
- skypilot_nightly-1.0.0.dev20241025.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
280
- skypilot_nightly-1.0.0.dev20241025.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
281
- skypilot_nightly-1.0.0.dev20241025.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
282
- skypilot_nightly-1.0.0.dev20241025.dist-info/RECORD,,
277
+ skypilot_nightly-1.0.0.dev20241026.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
278
+ skypilot_nightly-1.0.0.dev20241026.dist-info/METADATA,sha256=Gt6EEjuIDVBchJXkKxLBtgvYeJab6tYp-FhMeIPw9hc,19540
279
+ skypilot_nightly-1.0.0.dev20241026.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
280
+ skypilot_nightly-1.0.0.dev20241026.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
281
+ skypilot_nightly-1.0.0.dev20241026.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
282
+ skypilot_nightly-1.0.0.dev20241026.dist-info/RECORD,,