skypilot-nightly 1.0.0.dev20241020__py3-none-any.whl → 1.0.0.dev20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'c6ae536d8dfedc3bbcf427a81480382b9d5f4c29'
8
+ _SKYPILOT_COMMIT_SHA = '3c3bcee5cfe720a96ab67f4049a557a79e7f077f'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241020'
38
+ __version__ = '1.0.0.dev20241021'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/clouds/azure.py CHANGED
@@ -329,7 +329,6 @@ class Azure(clouds.Cloud):
329
329
  runcmd:
330
330
  - sed -i 's/#Banner none/Banner none/' /etc/ssh/sshd_config
331
331
  - echo '\\nif [ ! -f "/tmp/__restarted" ]; then\\n sudo systemctl restart ssh\\n sleep 2\\n touch /tmp/__restarted\\nfi' >> /home/skypilot:ssh_user/.bashrc
332
- - usermod -aG docker skypilot:ssh_user
333
332
  write_files:
334
333
  - path: /etc/apt/apt.conf.d/20auto-upgrades
335
334
  content: |
sky/clouds/gcp.py CHANGED
@@ -477,6 +477,9 @@ class GCP(clouds.Cloud):
477
477
  'runtime_version']
478
478
  resources_vars['tpu_node_name'] = r.accelerator_args.get(
479
479
  'tpu_name')
480
+ # TPU VMs require privileged mode for docker containers to
481
+ # access TPU devices.
482
+ resources_vars['docker_run_options'] = ['--privileged']
480
483
  else:
481
484
  # Convert to GCP names:
482
485
  # https://cloud.google.com/compute/docs/gpus
@@ -37,10 +37,6 @@ class Lambda(clouds.Cloud):
37
37
  _CLOUD_UNSUPPORTED_FEATURES = {
38
38
  clouds.CloudImplementationFeatures.STOP: 'Lambda cloud does not support stopping VMs.',
39
39
  clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER: f'Migrating disk is currently not supported on {_REPR}.',
40
- clouds.CloudImplementationFeatures.DOCKER_IMAGE: (
41
- f'Docker image is currently not supported on {_REPR}. '
42
- 'You can try running docker command inside the `run` section in task.yaml.'
43
- ),
44
40
  clouds.CloudImplementationFeatures.SPOT_INSTANCE: f'Spot instances are not supported in {_REPR}.',
45
41
  clouds.CloudImplementationFeatures.IMAGE_ID: f'Specifying image ID is not supported in {_REPR}.',
46
42
  clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER: f'Custom disk tiers are not supported in {_REPR}.',
@@ -173,12 +169,20 @@ class Lambda(clouds.Cloud):
173
169
  else:
174
170
  custom_resources = None
175
171
 
176
- return {
172
+ resources_vars = {
177
173
  'instance_type': resources.instance_type,
178
174
  'custom_resources': custom_resources,
179
175
  'region': region.name,
180
176
  }
181
177
 
178
+ if acc_dict is not None:
179
+ # Lambda cloud's docker runtime information does not contain
180
+ # 'nvidia-container-runtime', causing no GPU option is added to
181
+ # the docker run command. We patch this by adding it here.
182
+ resources_vars['docker_run_options'] = ['--gpus all']
183
+
184
+ return resources_vars
185
+
182
186
  def _get_feasible_launchable_resources(
183
187
  self, resources: 'resources_lib.Resources'
184
188
  ) -> 'resources_utils.FeasibleResources':
sky/clouds/oci.py CHANGED
@@ -17,6 +17,8 @@ History:
17
17
  make_deploy_resources_variables(): Bug fix for specify the image_id as
18
18
  the ocid of the image in the task.yaml file, in this case the image_id
19
19
  for the node config should be set to the ocid instead of a dict.
20
+ - Hysun He (hysun.he@oracle.com) @ Oct 13, 2024:
21
+ Support more OS types additional to ubuntu for OCI resources.
20
22
  """
21
23
  import json
22
24
  import logging
@@ -295,10 +297,21 @@ class OCI(clouds.Cloud):
295
297
  cpus=None if cpus is None else float(cpus),
296
298
  disk_tier=resources.disk_tier)
297
299
 
300
+ image_str = self._get_image_str(image_id=resources.image_id,
301
+ instance_type=resources.instance_type,
302
+ region=region.name)
303
+
304
+ # pylint: disable=import-outside-toplevel
305
+ from sky.clouds.service_catalog import oci_catalog
306
+ os_type = oci_catalog.get_image_os_from_tag(tag=image_str,
307
+ region=region.name)
308
+ logger.debug(f'OS type for the image {image_str} is {os_type}')
309
+
298
310
  return {
299
311
  'instance_type': instance_type,
300
312
  'custom_resources': custom_resources,
301
313
  'region': region.name,
314
+ 'os_type': os_type,
302
315
  'cpus': str(cpus),
303
316
  'memory': resources.memory,
304
317
  'disk_size': resources.disk_size,
@@ -501,59 +514,45 @@ class OCI(clouds.Cloud):
501
514
  region_name: str,
502
515
  instance_type: str,
503
516
  ) -> str:
504
- if image_id is None:
505
- return self._get_default_image(region_name=region_name,
506
- instance_type=instance_type)
507
- if None in image_id:
508
- image_id_str = image_id[None]
509
- else:
510
- assert region_name in image_id, image_id
511
- image_id_str = image_id[region_name]
517
+ image_id_str = self._get_image_str(image_id=image_id,
518
+ instance_type=instance_type,
519
+ region=region_name)
520
+
512
521
  if image_id_str.startswith('skypilot:'):
513
522
  image_id_str = service_catalog.get_image_id_from_tag(image_id_str,
514
523
  region_name,
515
524
  clouds='oci')
516
- if image_id_str is None:
517
- logger.critical(
518
- '! Real image_id not found! - {region_name}:{image_id}')
519
- # Raise ResourcesUnavailableError to make sure the failover
520
- # in CloudVMRayBackend will be correctly triggered.
521
- # TODO(zhwu): This is a information leakage to the cloud
522
- # implementor, we need to find a better way to handle this.
523
- raise exceptions.ResourcesUnavailableError(
524
- '! ERR: No image found in catalog for region '
525
- f'{region_name}. Try setting a valid image_id.')
525
+
526
+ # Image_id should be impossible be None, except for the case when
527
+ # user specify an image tag which does not exist in the image.csv
528
+ # catalog file which only possible in "test" / "evaluation" phase.
529
+ # Therefore, we use assert here.
530
+ assert image_id_str is not None
526
531
 
527
532
  logger.debug(f'Got real image_id {image_id_str}')
528
533
  return image_id_str
529
534
 
530
- def _get_default_image(self, region_name: str, instance_type: str) -> str:
535
+ def _get_image_str(self, image_id: Optional[Dict[Optional[str], str]],
536
+ instance_type: str, region: str):
537
+ if image_id is None:
538
+ image_str = self._get_default_image_tag(instance_type)
539
+ elif None in image_id:
540
+ image_str = image_id[None]
541
+ else:
542
+ assert region in image_id, image_id
543
+ image_str = image_id[region]
544
+ return image_str
545
+
546
+ def _get_default_image_tag(self, instance_type: str) -> str:
531
547
  acc = self.get_accelerators_from_instance_type(instance_type)
532
548
 
533
549
  if acc is None:
534
550
  image_tag = oci_utils.oci_config.get_default_image_tag()
535
- image_id_str = service_catalog.get_image_id_from_tag(image_tag,
536
- region_name,
537
- clouds='oci')
538
551
  else:
539
552
  assert len(acc) == 1, acc
540
553
  image_tag = oci_utils.oci_config.get_default_gpu_image_tag()
541
- image_id_str = service_catalog.get_image_id_from_tag(image_tag,
542
- region_name,
543
- clouds='oci')
544
554
 
545
- if image_id_str is not None:
546
- logger.debug(
547
- f'Got default image_id {image_id_str} from tag {image_tag}')
548
- return image_id_str
549
-
550
- # Raise ResourcesUnavailableError to make sure the failover in
551
- # CloudVMRayBackend will be correctly triggered.
552
- # TODO(zhwu): This is a information leakage to the cloud implementor,
553
- # we need to find a better way to handle this.
554
- raise exceptions.ResourcesUnavailableError(
555
- 'ERR: No image found in catalog for region '
556
- f'{region_name}. Try update your default image_id settings.')
555
+ return image_tag
557
556
 
558
557
  def get_vpu_from_disktier(
559
558
  self, cpus: Optional[float],
@@ -7,6 +7,8 @@ History:
7
7
  - Hysun He (hysun.he@oracle.com) @ Apr, 2023: Initial implementation
8
8
  - Hysun He (hysun.he@oracle.com) @ Jun, 2023: Reduce retry times by
9
9
  excluding those unsubscribed regions.
10
+ - Hysun He (hysun.he@oracle.com) @ Oct 14, 2024: Bug fix for validation
11
+ of the Marketplace images
10
12
  """
11
13
 
12
14
  import logging
@@ -206,4 +208,24 @@ def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
206
208
 
207
209
  def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
208
210
  """Returns whether the image tag is valid."""
211
+ # Oct.14, 2024 by Hysun He: Marketplace images are region neutral, so don't
212
+ # check with region for the Marketplace images.
213
+ df = _image_df[_image_df['Tag'].str.fullmatch(tag)]
214
+ if df.empty:
215
+ return False
216
+ app_catalog_listing_id = df['AppCatalogListingId'].iloc[0]
217
+ if app_catalog_listing_id:
218
+ return True
209
219
  return common.is_image_tag_valid_impl(_image_df, tag, region)
220
+
221
+
222
+ def get_image_os_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
223
+ del region
224
+ df = _image_df[_image_df['Tag'].str.fullmatch(tag)]
225
+ if df.empty:
226
+ os_type = oci_utils.oci_config.get_default_image_os()
227
+ else:
228
+ os_type = df['OS'].iloc[0]
229
+
230
+ logger.debug(f'Operation system for the image {tag} is {os_type}')
231
+ return os_type
@@ -1,7 +1,9 @@
1
1
  """OCI Configuration.
2
2
  History:
3
- - Zhanghao Wu @ Oct 2023: Formatting and refactoring
4
3
  - Hysun He (hysun.he@oracle.com) @ Apr, 2023: Initial implementation
4
+ - Zhanghao Wu @ Oct 2023: Formatting and refactoring
5
+ - Hysun He (hysun.he@oracle.com) @ Oct, 2024: Add default image OS
6
+ configuration.
5
7
  """
6
8
  import logging
7
9
  import os
@@ -121,5 +123,13 @@ class OCIConfig:
121
123
  return skypilot_config.get_nested(
122
124
  ('oci', 'default', 'oci_config_profile'), 'DEFAULT')
123
125
 
126
+ @classmethod
127
+ def get_default_image_os(cls) -> str:
128
+ # Get the default image OS. Instead of hardcoding, we give a choice to
129
+ # set the default image OS type in the sky's user-config file. (if not
130
+ # specified, use the hardcode one at last)
131
+ return skypilot_config.get_nested(('oci', 'default', 'image_os_type'),
132
+ 'ubuntu')
133
+
124
134
 
125
135
  oci_config = OCIConfig()
@@ -253,12 +253,13 @@ class DockerInitializer:
253
253
  # issue with nvidia container toolkit:
254
254
  # https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
255
255
  self._run(
256
- '[ -f /etc/docker/daemon.json ] || '
256
+ '{ which jq || sudo apt update && sudo apt install -y jq; } && '
257
+ '{ [ -f /etc/docker/daemon.json ] || '
257
258
  'echo "{}" | sudo tee /etc/docker/daemon.json;'
258
259
  'sudo jq \'.["exec-opts"] = ["native.cgroupdriver=cgroupfs"]\' '
259
260
  '/etc/docker/daemon.json > /tmp/daemon.json;'
260
261
  'sudo mv /tmp/daemon.json /etc/docker/daemon.json;'
261
- 'sudo systemctl restart docker')
262
+ 'sudo systemctl restart docker; } || true')
262
263
  user_docker_run_options = self.docker_config.get('run_options', [])
263
264
  start_command = docker_start_cmds(
264
265
  specific_image,
@@ -335,7 +336,11 @@ class DockerInitializer:
335
336
 
336
337
  def _check_docker_installed(self):
337
338
  no_exist = 'NoExist'
339
+ # SkyPilot: Add the current user to the docker group first (if needed),
340
+ # before checking if docker is installed to avoid permission issues.
338
341
  cleaned_output = self._run(
342
+ 'id -nG $USER | grep -qw docker || '
343
+ 'sudo usermod -aG docker $USER > /dev/null 2>&1;'
339
344
  f'command -v {self.docker_cmd} || echo {no_exist!r}')
340
345
  if no_exist in cleaned_output or 'docker' not in cleaned_output:
341
346
  logger.error(
@@ -424,8 +429,8 @@ class DockerInitializer:
424
429
  def _check_container_exited(self) -> bool:
425
430
  if self.initialized:
426
431
  return True
427
- output = (self._run(check_docker_running_cmd(self.container_name,
428
- self.docker_cmd),
429
- wait_for_docker_daemon=True))
430
- return 'false' in output.lower(
431
- ) and 'no such object' not in output.lower()
432
+ output = self._run(check_docker_running_cmd(self.container_name,
433
+ self.docker_cmd),
434
+ wait_for_docker_daemon=True)
435
+ return ('false' in output.lower() and
436
+ 'no such object' not in output.lower())
@@ -132,6 +132,8 @@ class PaperspaceCloudClient:
132
132
  'apt-get update \n'
133
133
  'apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin \n' # pylint: disable=line-too-long
134
134
  'fi \n'
135
+ # TODO(tian): Maybe remove this as well since we are now adding
136
+ # users to docker group in the DockerInitializer. Need to test.
135
137
  'usermod -aG docker paperspace \n'
136
138
  f'echo "{public_key}" >> /home/paperspace/.ssh/authorized_keys \n')
137
139
  try:
sky/resources.py CHANGED
@@ -842,12 +842,6 @@ class Resources:
842
842
 
843
843
  if self.extract_docker_image() is not None:
844
844
  # TODO(tian): validate the docker image exists / of reasonable size
845
- if self.accelerators is not None:
846
- for acc in self.accelerators.keys():
847
- if acc.lower().startswith('tpu'):
848
- with ux_utils.print_exception_no_traceback():
849
- raise ValueError(
850
- 'Docker image is not supported for TPU VM.')
851
845
  if self.cloud is not None:
852
846
  self.cloud.check_features_are_supported(
853
847
  self, {clouds.CloudImplementationFeatures.DOCKER_IMAGE})
@@ -1032,6 +1026,12 @@ class Resources:
1032
1026
  self.accelerators is not None):
1033
1027
  initial_setup_commands = [constants.DISABLE_GPU_ECC_COMMAND]
1034
1028
 
1029
+ docker_image = self.extract_docker_image()
1030
+
1031
+ # Cloud specific variables
1032
+ cloud_specific_variables = self.cloud.make_deploy_resources_variables(
1033
+ self, cluster_name, region, zones, dryrun)
1034
+
1035
1035
  # Docker run options
1036
1036
  docker_run_options = skypilot_config.get_nested(
1037
1037
  ('docker', 'run_options'),
@@ -1039,18 +1039,17 @@ class Resources:
1039
1039
  override_configs=self.cluster_config_overrides)
1040
1040
  if isinstance(docker_run_options, str):
1041
1041
  docker_run_options = [docker_run_options]
1042
+ # Special accelerator runtime might require additional docker run
1043
+ # options. e.g., for TPU, we need --privileged.
1044
+ if 'docker_run_options' in cloud_specific_variables:
1045
+ docker_run_options.extend(
1046
+ cloud_specific_variables['docker_run_options'])
1042
1047
  if docker_run_options and isinstance(self.cloud, clouds.Kubernetes):
1043
1048
  logger.warning(
1044
1049
  f'{colorama.Style.DIM}Docker run options are specified, '
1045
1050
  'but ignored for Kubernetes: '
1046
1051
  f'{" ".join(docker_run_options)}'
1047
1052
  f'{colorama.Style.RESET_ALL}')
1048
-
1049
- docker_image = self.extract_docker_image()
1050
-
1051
- # Cloud specific variables
1052
- cloud_specific_variables = self.cloud.make_deploy_resources_variables(
1053
- self, cluster_name, region, zones, dryrun)
1054
1053
  return dict(
1055
1054
  cloud_specific_variables,
1056
1055
  **{
sky/serve/serve_utils.py CHANGED
@@ -246,9 +246,11 @@ def set_service_status_and_active_versions_from_replica(
246
246
  update_mode: UpdateMode) -> None:
247
247
  record = serve_state.get_service_from_name(service_name)
248
248
  if record is None:
249
- raise ValueError('The service is up-ed in an old version and does not '
250
- 'support update. Please `sky serve down` '
251
- 'it first and relaunch the service.')
249
+ with ux_utils.print_exception_no_traceback():
250
+ raise ValueError(
251
+ 'The service is up-ed in an old version and does not '
252
+ 'support update. Please `sky serve down` '
253
+ 'it first and relaunch the service.')
252
254
  if record['status'] == serve_state.ServiceStatus.SHUTTING_DOWN:
253
255
  # When the service is shutting down, there is a period of time which the
254
256
  # controller still responds to the request, and the replica is not
@@ -289,7 +291,8 @@ def update_service_status() -> None:
289
291
  def update_service_encoded(service_name: str, version: int, mode: str) -> str:
290
292
  service_status = _get_service_status(service_name)
291
293
  if service_status is None:
292
- raise ValueError(f'Service {service_name!r} does not exist.')
294
+ with ux_utils.print_exception_no_traceback():
295
+ raise ValueError(f'Service {service_name!r} does not exist.')
293
296
  controller_port = service_status['controller_port']
294
297
  resp = requests.post(
295
298
  _CONTROLLER_URL.format(CONTROLLER_PORT=controller_port) +
@@ -299,15 +302,21 @@ def update_service_encoded(service_name: str, version: int, mode: str) -> str:
299
302
  'mode': mode,
300
303
  })
301
304
  if resp.status_code == 404:
302
- raise ValueError('The service is up-ed in an old version and does not '
303
- 'support update. Please `sky serve down` '
304
- 'it first and relaunch the service. ')
305
+ with ux_utils.print_exception_no_traceback():
306
+ raise ValueError(
307
+ 'The service is up-ed in an old version and does not '
308
+ 'support update. Please `sky serve down` '
309
+ 'it first and relaunch the service. ')
305
310
  elif resp.status_code == 400:
306
- raise ValueError(f'Client error during service update: {resp.text}')
311
+ with ux_utils.print_exception_no_traceback():
312
+ raise ValueError(f'Client error during service update: {resp.text}')
307
313
  elif resp.status_code == 500:
308
- raise RuntimeError(f'Server error during service update: {resp.text}')
314
+ with ux_utils.print_exception_no_traceback():
315
+ raise RuntimeError(
316
+ f'Server error during service update: {resp.text}')
309
317
  elif resp.status_code != 200:
310
- raise ValueError(f'Failed to update service: {resp.text}')
318
+ with ux_utils.print_exception_no_traceback():
319
+ raise ValueError(f'Failed to update service: {resp.text}')
311
320
 
312
321
  service_msg = resp.json()['message']
313
322
  return common_utils.encode_payload(service_msg)
@@ -674,8 +683,9 @@ def stream_replica_logs(service_name: str, replica_id: int,
674
683
  for info in replica_info:
675
684
  if info.replica_id == replica_id:
676
685
  return info.status
677
- raise ValueError(
678
- _FAILED_TO_FIND_REPLICA_MSG.format(replica_id=replica_id))
686
+ with ux_utils.print_exception_no_traceback():
687
+ raise ValueError(
688
+ _FAILED_TO_FIND_REPLICA_MSG.format(replica_id=replica_id))
679
689
 
680
690
  finish_stream = (
681
691
  lambda: _get_replica_status() != serve_state.ReplicaStatus.PROVISIONING)
@@ -5,6 +5,26 @@ max_workers: {{num_nodes - 1}}
5
5
  upscaling_speed: {{num_nodes - 1}}
6
6
  idle_timeout_minutes: 60
7
7
 
8
+ {%- if docker_image is not none %}
9
+ docker:
10
+ image: {{docker_image}}
11
+ container_name: {{docker_container_name}}
12
+ run_options:
13
+ - --ulimit nofile=1048576:1048576
14
+ {%- for run_option in docker_run_options %}
15
+ - {{run_option}}
16
+ {%- endfor %}
17
+ {%- if docker_login_config is not none %}
18
+ docker_login_config:
19
+ username: |-
20
+ {{docker_login_config.username}}
21
+ password: |-
22
+ {{docker_login_config.password}}
23
+ server: |-
24
+ {{docker_login_config.server}}
25
+ {%- endif %}
26
+ {%- endif %}
27
+
8
28
  provider:
9
29
  type: external
10
30
  module: sky.provision.lambda
@@ -16,7 +16,11 @@ provider:
16
16
  disable_launch_config_check: true
17
17
 
18
18
  auth:
19
+ {% if os_type == "ubuntu" %}
19
20
  ssh_user: ubuntu
21
+ {% else %}
22
+ ssh_user: opc
23
+ {% endif %}
20
24
  ssh_private_key: {{ssh_private_key}}
21
25
 
22
26
  available_node_types:
@@ -85,14 +89,20 @@ setup_commands:
85
89
  # Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase.
86
90
  # Line 'mkdir -p ..': disable host key check
87
91
  # Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys`
88
- - sudo systemctl stop unattended-upgrades || true;
92
+ - echo "setup commands runs at $(date)" > /tmp/provision.tmp.out || true;
93
+ {%- if os_type == "ubuntu" %}
94
+ sudo systemctl stop unattended-upgrades || true;
89
95
  sudo systemctl disable unattended-upgrades || true;
90
96
  sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true;
91
97
  sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true;
92
98
  sudo pkill -9 apt-get;
93
99
  sudo pkill -9 dpkg;
94
100
  sudo dpkg --configure -a;
95
- ([ `sudo lshw -class display | grep "NVIDIA Corporation" | wc -l` -gt 0 ]) && (sudo which nvidia-smi > /dev/null || ( sudo apt-get install nvidia-driver-530-open -y && sudo apt-get install nvidia-driver-525-server -y ) || true);
101
+ {%- else %}
102
+ sudo /usr/libexec/oci-growfs -y || true;
103
+ sudo systemctl stop firewalld || true;
104
+ sudo systemctl disable firewalld || true;
105
+ {%- endif %}
96
106
  mkdir -p ~/.ssh; touch ~/.ssh/config;
97
107
  {{ conda_installation_commands }}
98
108
  {{ ray_skypilot_installation_commands }}
@@ -502,8 +502,10 @@ class SSHCommandRunner(CommandRunner):
502
502
  if self.ssh_control_name is not None:
503
503
  control_path = _ssh_control_path(self.ssh_control_name)
504
504
  if control_path is not None:
505
+ # Suppress the `Exit request sent.` output for this comamnd
506
+ # which would interrupt the CLI spinner.
505
507
  cmd = (f'ssh -O exit -S {control_path}/%C '
506
- f'{self.ssh_user}@{self.ip}')
508
+ f'{self.ssh_user}@{self.ip} > /dev/null 2>&1')
507
509
  logger.debug(f'Closing cached connection {control_path!r} with '
508
510
  f'cmd: {cmd}')
509
511
  log_lib.run_with_log(cmd,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20241020
3
+ Version: 1.0.0.dev20241021
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,4 +1,4 @@
1
- sky/__init__.py,sha256=njbGTeVXmuel8rNQYbPE9POlsdZUizsa3jEcGfwJklE,5854
1
+ sky/__init__.py,sha256=3RKD64rxAs9PlurlTvQY9SSAILIPEznrfVw4n_oyctk,5854
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=pAdCT60OxxiXI9KXDyP2lQ9u9vMc6aMtq5Xi2h_hbdw,20984
4
4
  sky/check.py,sha256=jLMIIJrseaZj1_o5WkbaD9XdyXIlCaT6pyAaIFdhdmA,9079
@@ -10,7 +10,7 @@ sky/exceptions.py,sha256=D7WARzYRt4dGjXo6gI-gzkoodZbKF1D-qncm_DbHB28,8846
10
10
  sky/execution.py,sha256=CbrKMgfc2JgLqZqwPvmYKxbWAQKYqHpOLpUEOb-k2m0,24679
11
11
  sky/global_user_state.py,sha256=PywEmUutF97XBgRMClR6IS5_KM8JJC0oA1LsPUZebp0,28681
12
12
  sky/optimizer.py,sha256=OzxWiA6ZC0tyJ1eNMy4e72vitjfLKfbOLF9ywZOccXU,59343
13
- sky/resources.py,sha256=b9yaZvZkL-QZdElQLHsEZ2jhKgId2ixG8M2Z8DLBBKU,67450
13
+ sky/resources.py,sha256=Qk_CYvLO8OFsnRLqXu-nG6qXfJEZ2aBMzxFJHYaXTvE,67398
14
14
  sky/sky_logging.py,sha256=oLmTmwkuucIto3LHXLJfMcyRpYSkmZAZa5XzQPA5IHk,4434
15
15
  sky/skypilot_config.py,sha256=E3g65cX3P3dT9b5N0GgFBG6yB0FXwIGpisKoozmJmWU,9094
16
16
  sky/status_lib.py,sha256=J7Jb4_Dz0v2T64ttOdyUgpokvl4S0sBJrMfH7Fvo51A,1457
@@ -41,16 +41,16 @@ sky/benchmark/benchmark_state.py,sha256=X8CXmuU9KgsDRhKedhFgjeRMUFWtQsjFs1qECvPG
41
41
  sky/benchmark/benchmark_utils.py,sha256=eb-i6zYoo-Zkod-T9qtCu1FcYLw--Yyos1SyibUPZNE,26194
42
42
  sky/clouds/__init__.py,sha256=WuNIJEnZmBO72tU5awgaaL3rdvFRSkgaYNNeuY68dXo,1356
43
43
  sky/clouds/aws.py,sha256=XJVbOSkVVUHp9HbHDp0rFdHX113JHbY-3sgokGdNJVE,49527
44
- sky/clouds/azure.py,sha256=Yp_a1Lzvq4s47eRMeyVheDv9pC0hSPogCiTMYf-a5ZE,28687
44
+ sky/clouds/azure.py,sha256=jTgynKU5tuOyBe97n2I7_k9P0Sw0QFU-6wLDLFwQhfM,28634
45
45
  sky/clouds/cloud.py,sha256=PPk-Cbf1YbJT8bswcQLtPBtko02OWrRGJKkLzDpytTI,34858
46
46
  sky/clouds/cloud_registry.py,sha256=4yQMv-iBSgyN5aNL4Qxbn0JVE-dkVoEUIgj7S1z9S_Q,955
47
47
  sky/clouds/cudo.py,sha256=H4VyMo5wWGAv2MXZ3xsbWjlZA_cZYnt4ecNlTOOao8Y,13147
48
48
  sky/clouds/fluidstack.py,sha256=iOmoOx52yTrHKMzwBDaxFJCfNo79M61d5tj-Np24Lyc,12436
49
- sky/clouds/gcp.py,sha256=lUImS2WJIcUOtrgrVz8zaR4yPGqALqZ0lSmLbjN9xLU,54470
49
+ sky/clouds/gcp.py,sha256=m_dH04HqgU-DdW4R9wrSr66IpPt9JMKHEvHEGGFpeRo,54655
50
50
  sky/clouds/ibm.py,sha256=M8QdjeSFlwssfoY2aOodxG4q5R3eT9K-4lTPDHYvEYI,21476
51
51
  sky/clouds/kubernetes.py,sha256=aWoXWR-S4puZHzuUHroLKxLdTpkqU7j75dQlXECnsmE,28679
52
- sky/clouds/lambda_cloud.py,sha256=VtJ2mmwMT1X4zrzgt3FXM61zmrrgoELZHFgsdYVesPY,12562
53
- sky/clouds/oci.py,sha256=WXtxKwDBgi3He4ayi4qzJ4Y659Bi6xU8hWmYLHwiQYs,27371
52
+ sky/clouds/lambda_cloud.py,sha256=11dKUSunHUgaPZ1t8O85X29_NJ-o26sCt5DjwAPFgl4,12697
53
+ sky/clouds/oci.py,sha256=ecVgcbCVJwDLtaYXs-yGDzwPYRr23KvjnzFOXwaY2O0,26914
54
54
  sky/clouds/paperspace.py,sha256=lmUZPYAblaqiBmGQwCunccMiTF_dVA1o3vqY9Q_Nc28,10921
55
55
  sky/clouds/runpod.py,sha256=lstUC6f4JDhtcH9NfwkbpCJMmfmvMigoanhPXPbTYds,11540
56
56
  sky/clouds/scp.py,sha256=2KLTuNSMdBzK8CLwSesv7efOuiLidIMoyNG4AOt5Sqw,15870
@@ -67,7 +67,7 @@ sky/clouds/service_catalog/gcp_catalog.py,sha256=v_5fsB3dB9oD8U7lBKnCe5ii6AUWEOi
67
67
  sky/clouds/service_catalog/ibm_catalog.py,sha256=0dzjmXABFECzaAuIa0E6pVINhVK6-G6U52Mj-L45gK8,4472
68
68
  sky/clouds/service_catalog/kubernetes_catalog.py,sha256=6OocEUkgyJtBgHwzu4RPsvru6pj6RwGU-4uSFNQmsSM,8254
69
69
  sky/clouds/service_catalog/lambda_catalog.py,sha256=BAhUGqHj8aVe1zUhEQNO7bQUhcd9jAespGvPyQubTJY,5281
70
- sky/clouds/service_catalog/oci_catalog.py,sha256=tcV8_rsv_7_aTlcfTkq0XKdKRTFgwh8-rjyxVzPiYwQ,7744
70
+ sky/clouds/service_catalog/oci_catalog.py,sha256=AG1mOgc-iWaX4zapONWMZPNd2RCKCsaNOyFc0eq_LFU,8551
71
71
  sky/clouds/service_catalog/paperspace_catalog.py,sha256=W8GgGlPbbWViELQ8EZfmIkxSbeQcCmMRUX4ecIIYDsk,3768
72
72
  sky/clouds/service_catalog/runpod_catalog.py,sha256=NwZlolzihZeRxQKYIDhoXeUkJ3BSH1S6B_DszNDXT1g,4184
73
73
  sky/clouds/service_catalog/scp_catalog.py,sha256=4XnaZE5Q4XrrNnDnVhsHkH6jxmWXBeQqa9QqKqHKjSI,5174
@@ -84,7 +84,7 @@ sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py,sha256=SF_gTU74qg6L-DS
84
84
  sky/clouds/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  sky/clouds/utils/aws_utils.py,sha256=W5BRC-2F_VY4BymRA1kS6-MufsI3V8cfY_hv--4gJBU,1986
86
86
  sky/clouds/utils/gcp_utils.py,sha256=Xc_COjJfDt__oqVwrCw7ejY2B7ptHjMjDVb8obcpJ6s,6968
87
- sky/clouds/utils/oci_utils.py,sha256=LT_RtPQ2B1wlSF0e9PSD3NWxFFIzovcZeDjO-dyOghU,4482
87
+ sky/clouds/utils/oci_utils.py,sha256=t-5QEQEs8swN683AAp-oDD6yQJOQqVBbsVcHkNyqnbU,4968
88
88
  sky/clouds/utils/scp_utils.py,sha256=RUp7NwyhKygOoVOwvdAOGdoQNSJjryOG6WSExCf-yas,15812
89
89
  sky/data/__init__.py,sha256=Nhaf1NURisXpZuwWANa2IuCyppIuc720FRwqSE2oEwY,184
90
90
  sky/data/data_transfer.py,sha256=MBmjey9_p2L3IKNKTi8um09SlZe32n4wK3CkVnlTVvo,7346
@@ -105,7 +105,7 @@ sky/jobs/dashboard/templates/index.html,sha256=DBKMYEkkJ6sgLYod9ro7drgL8Y_neDsCx
105
105
  sky/provision/__init__.py,sha256=UhYsGRribEyK1--PPT0Dom9051jlpdn8UCNhO8qpPOc,6262
106
106
  sky/provision/common.py,sha256=E8AlSUFcn0FYQq1erNmoVfMAdsF9tP2yxfyk-9PLvQU,10286
107
107
  sky/provision/constants.py,sha256=DvHj3wpqdpaSBHMOGIfVWLLWGJoz0eOQAx73DwYMNEk,531
108
- sky/provision/docker_utils.py,sha256=Z7vDUs9Yjqks_CsWrACcTgABIZuFi3EJVFwkU0WsdD0,18832
108
+ sky/provision/docker_utils.py,sha256=cKYasCwbMf6C2_0vTxg2GvbrnhFvko-xDl1frfm7wxc,19199
109
109
  sky/provision/instance_setup.py,sha256=n1Px_KOYZl7Rf1WLXrfTTHyqxyA8_5QTN9BNLjQRkgc,22427
110
110
  sky/provision/logging.py,sha256=yZWgejrFBhhRjAtvFu5N5bRXIMK5TuwNjp1vKQqz2pw,2103
111
111
  sky/provision/metadata_utils.py,sha256=LrxeV4wD2QPzNdXV_npj8q-pr35FatxBBjF_jSbpOT0,4013
@@ -151,7 +151,7 @@ sky/provision/paperspace/__init__.py,sha256=1nbUPWio7UA5gCQkO_rfEDfgXT17u5OtuByx
151
151
  sky/provision/paperspace/config.py,sha256=oNmffSt-V466pE0DmML8hOCX1CiA24jAqE5JEKuqpyI,1541
152
152
  sky/provision/paperspace/constants.py,sha256=NcLJGivJxshJwhR28yVHysWQ2gtMAkTVmHC91d3kyKM,957
153
153
  sky/provision/paperspace/instance.py,sha256=q_V01DZSMXLfy63Zwt6AQotq02JuXQZb5CHS_JttlwE,12046
154
- sky/provision/paperspace/utils.py,sha256=Bl3POslZjtZU_wbBIXid7ubhRy2j5kpsesR85q7MN5w,9428
154
+ sky/provision/paperspace/utils.py,sha256=uOmxbDKjV6skFizC4gYXSxRuEqso5ck2kF7MbtNmhEs,9580
155
155
  sky/provision/runpod/__init__.py,sha256=6HYvHI27EaLrX1SS0vWVhdLu5HDBeZCdvAeDJuwM5pk,556
156
156
  sky/provision/runpod/config.py,sha256=9ulZJVL7nHuxhTdoj8D7lNn7SdicJ5zc6FIcHIG9tcg,321
157
157
  sky/provision/runpod/instance.py,sha256=ucmFQEzapbxylsl6K9EUo7bHTZYzvfECo6tpJc-MFrw,9577
@@ -180,7 +180,7 @@ sky/serve/load_balancer.py,sha256=aUfDsgUT_fYrchCwJCeunMPXmAkwJAY58BEu-IN2FaA,11
180
180
  sky/serve/load_balancing_policies.py,sha256=ExdwH_pxPYpJ6CkoTQCOPSa4lzwbq1LFFMKzmIu8ryk,2331
181
181
  sky/serve/replica_managers.py,sha256=1xYDK9Te5wFEF5hUK0gyNIUib0MY-HScLHUBDlTSl-k,57774
182
182
  sky/serve/serve_state.py,sha256=5BZSKKKxQRk-0mku17Ch4Veu4qOhaFvaOJY3zrZCkLw,19315
183
- sky/serve/serve_utils.py,sha256=egGb4HB4yMyFISqZgMWnoHH8AfuLGt3xq4raU8V0qds,39755
183
+ sky/serve/serve_utils.py,sha256=9tqh7i-99Kll-24sKhfjEzjTOnGXWJQdeqIyNkFVoMo,40180
184
184
  sky/serve/service.py,sha256=fkfJvNJ2BO6rfV0TblZG-QkOXaCyZlpkwbGgrsTzf2w,11872
185
185
  sky/serve/service_spec.py,sha256=iRhW95SERvb4NWtV10uCuhgvW31HuSAmZZ55OX0WK8s,15309
186
186
  sky/setup_files/MANIFEST.in,sha256=CXz8lIJMgWlH9TvYgzIL3vPFtSDoQq-UMfD9K62rtH4,590
@@ -230,9 +230,9 @@ sky/templates/kubernetes-loadbalancer.yml.j2,sha256=IxrNYM366N01bbkJEbZ_UPYxUP8w
230
230
  sky/templates/kubernetes-port-forward-proxy-command.sh,sha256=HlG7CPBBedCVBlL9qv0erW_eKm6Irj0LFyaAWuJW_lc,3148
231
231
  sky/templates/kubernetes-ray.yml.j2,sha256=Wq9luXc6-t141uyHbtOy1IDmLMM0PBbePTZfZEtAKw0,18160
232
232
  sky/templates/kubernetes-ssh-jump.yml.j2,sha256=k5W5sOIMppU7dDkJMwPlqsUcb92y7L5_TVG3hkgMy8M,2747
233
- sky/templates/lambda-ray.yml.j2,sha256=oMbrfv3zHoD1v1XXMLCLK1vB7wLBU1Z_jNpC4-5lGVo,3985
233
+ sky/templates/lambda-ray.yml.j2,sha256=HyvO_tX2vxwSsc4IFVSqGuIbjLMk0bevP9bcxb8ZQII,4498
234
234
  sky/templates/local-ray.yml.j2,sha256=FNHeyHF6nW9nU9QLIZceUWfvrFTTcO51KqhTnYCEFaA,1185
235
- sky/templates/oci-ray.yml.j2,sha256=5XfIobW9XuspIpEhI4vFIEcJEFCdtFJqEGfX03zL6DE,7032
235
+ sky/templates/oci-ray.yml.j2,sha256=E-xnadts-x88vYRI1QGFzgfGGKFospmo2N9d_0cPN5I,7144
236
236
  sky/templates/paperspace-ray.yml.j2,sha256=HQjZNamrB_a4fOMCxQXSVdV5JIHtbGtAE0JzEO8uuVQ,4021
237
237
  sky/templates/runpod-ray.yml.j2,sha256=p3BtYBHzROtNJqnjEo1xCmGSJQfCZYdarWszhDYyl0Q,3697
238
238
  sky/templates/scp-ray.yml.j2,sha256=I9u8Ax-lit-d6UrCC9BVU8avst8w1cwK6TrzZBcz_JM,5608
@@ -245,7 +245,7 @@ sky/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
245
245
  sky/utils/accelerator_registry.py,sha256=BO4iYH5bV80Xyp4EPfO0n1D3LL0FvESCy7xm59Je3_o,3798
246
246
  sky/utils/admin_policy_utils.py,sha256=zFCu1OFIrZRfQNY0JFRO1502WFfdqZhwAU_QgM4fO9U,5943
247
247
  sky/utils/cluster_yaml_utils.py,sha256=1wRRYqI1kI-eFs1pMW4r_FFjHJ0zamq6v2RRI-Gtx5E,849
248
- sky/utils/command_runner.py,sha256=ZIu4aur4yxtjHu60Na9o90Iu-g48_yeWXo-NNfmzs-w,34634
248
+ sky/utils/command_runner.py,sha256=TEFJlmIGzlZxZppcBdwDK4AscM0-08L2XRFwQIRK9OA,34784
249
249
  sky/utils/command_runner.pyi,sha256=mJOzCgcYZAfHwnY_6Wf1YwlTEJGb9ihzc2f0rE0Kw98,7751
250
250
  sky/utils/common_utils.py,sha256=MwFhIcvCEMBo7kbENUjN3qRNO5SoMV0fzAORc65c5x0,24525
251
251
  sky/utils/controller_utils.py,sha256=V05hiLJIjqqXssYzs_Gchk4-tijgpMgLJsRW8ymhS-E,40625
@@ -274,9 +274,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=KPqp23B-zQ2SZK03jdHeF9fLTog
274
274
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
275
275
  sky/utils/kubernetes/rsync_helper.sh,sha256=aRMa_0JRHtXFOPtEg4rFAwR1t57wvvAoGZhn3H3BtGk,1059
276
276
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
277
- skypilot_nightly-1.0.0.dev20241020.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
278
- skypilot_nightly-1.0.0.dev20241020.dist-info/METADATA,sha256=L8MmlJIr14EcjFiMMWqkaWvJsVZnm_SmhgzceSuhdRs,19540
279
- skypilot_nightly-1.0.0.dev20241020.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
280
- skypilot_nightly-1.0.0.dev20241020.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
281
- skypilot_nightly-1.0.0.dev20241020.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
282
- skypilot_nightly-1.0.0.dev20241020.dist-info/RECORD,,
277
+ skypilot_nightly-1.0.0.dev20241021.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
278
+ skypilot_nightly-1.0.0.dev20241021.dist-info/METADATA,sha256=mv3zgaXoDB_-9jX9Sk5tEP5oiCfv5xZ76gMbFothu4g,19540
279
+ skypilot_nightly-1.0.0.dev20241021.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
280
+ skypilot_nightly-1.0.0.dev20241021.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
281
+ skypilot_nightly-1.0.0.dev20241021.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
282
+ skypilot_nightly-1.0.0.dev20241021.dist-info/RECORD,,