skypilot-nightly 1.0.0.dev20250211__py3-none-any.whl → 1.0.0.dev20250213__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '1064e8a074909c5d13f862357ed0e3186079a8e8'
8
+ _SKYPILOT_COMMIT_SHA = 'c49961417a83b049b3f3435a252c8ec5ea0fb5e6'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250211'
38
+ __version__ = '1.0.0.dev20250213'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/cli.py CHANGED
@@ -623,7 +623,8 @@ def _launch_with_confirm(
623
623
  click.confirm(prompt, default=True, abort=True, show_default=True)
624
624
 
625
625
  if not confirm_shown:
626
- click.secho(f'Running task on cluster {cluster}...', fg='yellow')
626
+ click.secho('Running on cluster: ', fg='cyan', nl=False)
627
+ click.secho(cluster)
627
628
 
628
629
  sky.launch(
629
630
  dag,
@@ -722,7 +723,6 @@ def _pop_and_ignore_fields_in_override_params(
722
723
  def _make_task_or_dag_from_entrypoint_with_overrides(
723
724
  entrypoint: Tuple[str, ...],
724
725
  *,
725
- entrypoint_name: str = 'Task',
726
726
  name: Optional[str] = None,
727
727
  workdir: Optional[str] = None,
728
728
  cloud: Optional[str] = None,
@@ -754,19 +754,15 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
754
754
  entrypoint: Optional[str]
755
755
  if is_yaml:
756
756
  # Treat entrypoint as a yaml.
757
- click.secho(f'{entrypoint_name} from YAML spec: ',
758
- fg='yellow',
759
- nl=False)
760
- click.secho(entrypoint, bold=True)
757
+ click.secho('YAML to run: ', fg='cyan', nl=False)
758
+ click.secho(entrypoint)
761
759
  else:
762
760
  if not entrypoint:
763
761
  entrypoint = None
764
762
  else:
765
763
  # Treat entrypoint as a bash command.
766
- click.secho(f'{entrypoint_name} from command: ',
767
- fg='yellow',
768
- nl=False)
769
- click.secho(entrypoint, bold=True)
764
+ click.secho('Command to run: ', fg='cyan', nl=False)
765
+ click.secho(entrypoint)
770
766
 
771
767
  override_params = _parse_override_params(cloud=cloud,
772
768
  region=region,
@@ -1333,7 +1329,8 @@ def exec(
1333
1329
  'supports a single task only.')
1334
1330
  task = task_or_dag
1335
1331
 
1336
- click.secho(f'Executing task on cluster {cluster}...', fg='yellow')
1332
+ click.secho('Submitting job to cluster: ', fg='cyan', nl=False)
1333
+ click.secho(cluster)
1337
1334
  sky.exec(task, backend=backend, cluster_name=cluster, detach_run=detach_run)
1338
1335
 
1339
1336
 
@@ -1982,7 +1979,7 @@ def cost_report(all: bool): # pylint: disable=redefined-builtin
1982
1979
  def queue(clusters: List[str], skip_finished: bool, all_users: bool):
1983
1980
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
1984
1981
  """Show the job queue for cluster(s)."""
1985
- click.secho('Fetching and parsing job queue...', fg='yellow')
1982
+ click.secho('Fetching and parsing job queue...', fg='cyan')
1986
1983
  if clusters:
1987
1984
  clusters = _get_glob_clusters(clusters)
1988
1985
  else:
@@ -3785,7 +3782,7 @@ def jobs_queue(all: bool, refresh: bool, skip_finished: bool):
3785
3782
  watch -n60 sky jobs queue
3786
3783
 
3787
3784
  """
3788
- click.secho('Fetching managed job statuses...', fg='yellow')
3785
+ click.secho('Fetching managed jobs...', fg='cyan')
3789
3786
  with rich_utils.safe_status(
3790
3787
  ux_utils.spinner_message('Checking managed jobs')):
3791
3788
  _, msg = _get_managed_jobs(refresh=refresh,
@@ -3938,7 +3935,7 @@ def jobs_dashboard(port: Optional[int]):
3938
3935
  # see if the controller is UP first, which is slow; (2) not have to run SSH
3939
3936
  # port forwarding first (we'd just launch a local dashboard which would make
3940
3937
  # REST API calls to the controller dashboard server).
3941
- click.secho('Checking if jobs controller is up...', fg='yellow')
3938
+ click.secho('Checking if jobs controller is up...', fg='cyan')
3942
3939
  hint = ('Dashboard is not available if jobs controller is not up. Run a '
3943
3940
  'managed job first.')
3944
3941
  backend_utils.is_controller_accessible(
@@ -4032,7 +4029,6 @@ def _generate_task_with_service(
4032
4029
  disk_size=disk_size,
4033
4030
  disk_tier=disk_tier,
4034
4031
  ports=ports,
4035
- entrypoint_name='Service',
4036
4032
  )
4037
4033
  if isinstance(task, sky.Dag):
4038
4034
  raise click.UsageError(
@@ -4197,7 +4193,7 @@ def serve_up(
4197
4193
  ports=ports,
4198
4194
  not_supported_cmd='sky serve up',
4199
4195
  )
4200
- click.secho('Service Spec:', fg='cyan')
4196
+ click.secho('Service spec:', fg='cyan')
4201
4197
  click.echo(task.service)
4202
4198
 
4203
4199
  click.secho('Each replica will use the following resources (estimated):',
@@ -4315,7 +4311,7 @@ def serve_update(
4315
4311
  ports=ports,
4316
4312
  not_supported_cmd='sky serve update',
4317
4313
  )
4318
- click.secho('Service Spec:', fg='cyan')
4314
+ click.secho('Service spec:', fg='cyan')
4319
4315
  click.echo(task.service)
4320
4316
 
4321
4317
  click.secho('New replica will use the following resources (estimated):',
@@ -4767,7 +4763,7 @@ def benchmark_launch(
4767
4763
  'Please provide a YAML file.')
4768
4764
  assert config is not None, (is_yaml, config)
4769
4765
 
4770
- click.secho('Benchmarking a task from YAML spec: ', fg='yellow', nl=False)
4766
+ click.secho('Benchmarking a task from YAML: ', fg='cyan', nl=False)
4771
4767
  click.secho(entrypoint, bold=True)
4772
4768
 
4773
4769
  candidates = _get_candidate_configs(entrypoint)
sky/clouds/kubernetes.py CHANGED
@@ -464,7 +464,9 @@ class Kubernetes(clouds.Cloud):
464
464
  # CPU resources on the node instead within the pod.
465
465
  custom_ray_options = {
466
466
  'object-store-memory': 500000000,
467
- 'num-cpus': str(int(cpus)),
467
+ # 'num-cpus' must be an integer, but we should not set it to 0 if
468
+ # cpus is <1.
469
+ 'num-cpus': str(max(int(cpus), 1)),
468
470
  }
469
471
  deploy_vars = {
470
472
  'instance_type': resources.instance_type,
sky/clouds/runpod.py CHANGED
@@ -177,6 +177,11 @@ class RunPod(clouds.Cloud):
177
177
  hourly_cost = self.instance_type_to_hourly_cost(
178
178
  instance_type=instance_type, use_spot=use_spot)
179
179
 
180
+ # default to root
181
+ docker_username_for_runpod = (resources.docker_username_for_runpod
182
+ if resources.docker_username_for_runpod
183
+ is not None else 'root')
184
+
180
185
  return {
181
186
  'instance_type': instance_type,
182
187
  'custom_resources': custom_resources,
@@ -184,6 +189,7 @@ class RunPod(clouds.Cloud):
184
189
  'image_id': image_id,
185
190
  'use_spot': use_spot,
186
191
  'bid_per_gpu': str(hourly_cost),
192
+ 'docker_username_for_runpod': docker_username_for_runpod,
187
193
  }
188
194
 
189
195
  def _get_feasible_launchable_resources(
@@ -28,6 +28,7 @@ REGIONS = [
28
28
  'asia-northeast-2',
29
29
  'us-east-1',
30
30
  'us-east-2',
31
+ 'us-east-3',
31
32
  'us-west-2',
32
33
  'us-west-1',
33
34
  'us-south-1',
sky/execution.py CHANGED
@@ -259,8 +259,8 @@ def _execute(
259
259
  bold = colorama.Style.BRIGHT
260
260
  reset = colorama.Style.RESET_ALL
261
261
  logger.info(
262
- f'{yellow}Launching an unmanaged spot task, which does not '
263
- f'automatically recover from preemptions.{reset}\n{yellow}To '
262
+ f'{yellow}Launching a spot job that does not '
263
+ f'automatically recover from preemptions. To '
264
264
  'get automatic recovery, use managed job instead: '
265
265
  f'{reset}{bold}sky jobs launch{reset} {yellow}or{reset} '
266
266
  f'{bold}sky.jobs.launch(){reset}.')
sky/optimizer.py CHANGED
@@ -884,10 +884,8 @@ class Optimizer:
884
884
  # Add a new line for better readability, when there are multiple
885
885
  # tasks.
886
886
  logger.info('')
887
- logger.info(
888
- f'{colorama.Style.BRIGHT}Considered resources {task_str}'
889
- f'({task.num_nodes} node{plural}):'
890
- f'{colorama.Style.RESET_ALL}')
887
+ logger.info(f'Considered resources {task_str}'
888
+ f'({task.num_nodes} node{plural}):')
891
889
 
892
890
  # Only print 1 row per cloud.
893
891
  # The following code is to generate the table
@@ -15,9 +15,12 @@ from sky.provision import docker_utils
15
15
  from sky.provision import logging as provision_logging
16
16
  from sky.provision import metadata_utils
17
17
  from sky.skylet import constants
18
+ from sky.usage import constants as usage_constants
19
+ from sky.usage import usage_lib
18
20
  from sky.utils import accelerator_registry
19
21
  from sky.utils import command_runner
20
22
  from sky.utils import common_utils
23
+ from sky.utils import env_options
21
24
  from sky.utils import subprocess_utils
22
25
  from sky.utils import timeline
23
26
  from sky.utils import ux_utils
@@ -67,6 +70,30 @@ MAYBE_SKYLET_RESTART_CMD = (f'{constants.ACTIVATE_SKY_REMOTE_PYTHON_ENV}; '
67
70
  'sky.skylet.attempt_skylet;')
68
71
 
69
72
 
73
+ def _set_usage_run_id_cmd() -> str:
74
+ """Gets the command to set the usage run id.
75
+
76
+ The command saves the current usage run id to the file, so that the skylet
77
+ can use it to report the heartbeat.
78
+
79
+ We use a function instead of a constant so that the usage run id is the
80
+ latest one when the function is called.
81
+ """
82
+ return (
83
+ f'cat {usage_constants.USAGE_RUN_ID_FILE} || '
84
+ # The run id is retrieved locally for the current run, so that the
85
+ # remote cluster will be set with the same run id as the initial
86
+ # launch operation.
87
+ f'echo "{usage_lib.messages.usage.run_id}" > '
88
+ f'{usage_constants.USAGE_RUN_ID_FILE}')
89
+
90
+
91
+ def _set_skypilot_env_var_cmd() -> str:
92
+ """Sets the skypilot environment variables on the remote machine."""
93
+ env_vars = env_options.Options.all_options()
94
+ return '; '.join([f'export {k}={v}' for k, v in env_vars.items()])
95
+
96
+
70
97
  def _auto_retry(should_retry: Callable[[Exception], bool] = lambda _: True):
71
98
  """Decorator that retries the function if it fails.
72
99
 
@@ -450,11 +477,17 @@ def start_skylet_on_head_node(cluster_name: str,
450
477
  logger.info(f'Running command on head node: {MAYBE_SKYLET_RESTART_CMD}')
451
478
  # We need to source bashrc for skylet to make sure the autostop event can
452
479
  # access the path to the cloud CLIs.
453
- returncode, stdout, stderr = head_runner.run(MAYBE_SKYLET_RESTART_CMD,
454
- stream_logs=False,
455
- require_outputs=True,
456
- log_path=log_path_abs,
457
- source_bashrc=True)
480
+ set_usage_run_id_cmd = _set_usage_run_id_cmd()
481
+ # Set the skypilot environment variables, including the usage type, debug
482
+ # info, and other options.
483
+ set_skypilot_env_var_cmd = _set_skypilot_env_var_cmd()
484
+ returncode, stdout, stderr = head_runner.run(
485
+ f'{set_usage_run_id_cmd}; {set_skypilot_env_var_cmd}; '
486
+ f'{MAYBE_SKYLET_RESTART_CMD}',
487
+ stream_logs=False,
488
+ require_outputs=True,
489
+ log_path=log_path_abs,
490
+ source_bashrc=True)
458
491
  if returncode:
459
492
  raise RuntimeError('Failed to start skylet on the head node '
460
493
  f'(exit code {returncode}). Error: '
@@ -2178,52 +2178,54 @@ def get_kubernetes_node_info(
2178
2178
 
2179
2179
  lf, _ = detect_gpu_label_formatter(context)
2180
2180
  if not lf:
2181
- label_key = None
2181
+ label_keys = []
2182
2182
  else:
2183
2183
  label_keys = lf.get_label_keys()
2184
2184
 
2185
2185
  node_info_dict: Dict[str, KubernetesNodeInfo] = {}
2186
2186
 
2187
- for label_key in label_keys:
2188
- for node in nodes:
2189
- allocated_qty = 0
2187
+ for node in nodes:
2188
+ accelerator_name = None
2189
+ # Determine the accelerator name from the node labels and pick the
2190
+ # first one found. We assume that the node has only one accelerator type
2191
+ # (e.g., either GPU or TPU).
2192
+ for label_key in label_keys:
2190
2193
  if lf is not None and label_key in node.metadata.labels:
2191
2194
  accelerator_name = lf.get_accelerator_from_label_value(
2192
2195
  node.metadata.labels.get(label_key))
2193
- else:
2194
- accelerator_name = None
2196
+ break
2195
2197
 
2196
- accelerator_count = get_node_accelerator_count(
2197
- node.status.allocatable)
2198
+ allocated_qty = 0
2199
+ accelerator_count = get_node_accelerator_count(node.status.allocatable)
2198
2200
 
2199
- if pods is None:
2200
- accelerators_available = -1
2201
+ if pods is None:
2202
+ accelerators_available = -1
2201
2203
 
2202
- else:
2203
- for pod in pods:
2204
- # Get all the pods running on the node
2205
- if (pod.spec.node_name == node.metadata.name and
2206
- pod.status.phase in ['Running', 'Pending']):
2207
- # Iterate over all the containers in the pod and sum the
2208
- # GPU requests
2209
- for container in pod.spec.containers:
2210
- if container.resources.requests:
2211
- allocated_qty += get_node_accelerator_count(
2212
- container.resources.requests)
2213
-
2214
- accelerators_available = accelerator_count - allocated_qty
2215
-
2216
- # Exclude multi-host TPUs from being processed.
2217
- # TODO(Doyoung): Remove the logic when adding support for
2218
- # multi-host TPUs.
2219
- if is_multi_host_tpu(node.metadata.labels):
2220
- continue
2204
+ else:
2205
+ for pod in pods:
2206
+ # Get all the pods running on the node
2207
+ if (pod.spec.node_name == node.metadata.name and
2208
+ pod.status.phase in ['Running', 'Pending']):
2209
+ # Iterate over all the containers in the pod and sum the
2210
+ # GPU requests
2211
+ for container in pod.spec.containers:
2212
+ if container.resources.requests:
2213
+ allocated_qty += get_node_accelerator_count(
2214
+ container.resources.requests)
2215
+
2216
+ accelerators_available = accelerator_count - allocated_qty
2217
+
2218
+ # Exclude multi-host TPUs from being processed.
2219
+ # TODO(Doyoung): Remove the logic when adding support for
2220
+ # multi-host TPUs.
2221
+ if is_multi_host_tpu(node.metadata.labels):
2222
+ continue
2221
2223
 
2222
- node_info_dict[node.metadata.name] = KubernetesNodeInfo(
2223
- name=node.metadata.name,
2224
- accelerator_type=accelerator_name,
2225
- total={'accelerator_count': int(accelerator_count)},
2226
- free={'accelerators_available': int(accelerators_available)})
2224
+ node_info_dict[node.metadata.name] = KubernetesNodeInfo(
2225
+ name=node.metadata.name,
2226
+ accelerator_type=accelerator_name,
2227
+ total={'accelerator_count': int(accelerator_count)},
2228
+ free={'accelerators_available': int(accelerators_available)})
2227
2229
 
2228
2230
  return node_info_dict
2229
2231
 
@@ -450,6 +450,13 @@ def _post_provision_setup(
450
450
  logger.info(f'{indent_str}{colorama.Style.DIM}{vm_str}{plural} {verb} '
451
451
  f'up.{colorama.Style.RESET_ALL}')
452
452
 
453
+ # It's promised by the cluster config that docker_config does not
454
+ # exist for docker-native clouds, i.e. they provide docker containers
455
+ # instead of full VMs, like Kubernetes and RunPod, as it requires some
456
+ # special handlings to run docker inside their docker virtualization.
457
+ # For their Docker image settings, we do them when provisioning the
458
+ # cluster. See provision/{cloud}/instance.py:get_cluster_info for more
459
+ # details.
453
460
  if docker_config:
454
461
  status.update(
455
462
  ux_utils.spinner_message(
@@ -186,7 +186,7 @@ def delete_pod_template(template_name: str) -> None:
186
186
  runpod.runpod.api.graphql.run_graphql_query(
187
187
  f'mutation {{deleteTemplate(templateName: "{template_name}")}}')
188
188
  except runpod.runpod.error.QueryError as e:
189
- logger.warning(f'Failed to delete template {template_name}: {e}'
189
+ logger.warning(f'Failed to delete template {template_name}: {e} '
190
190
  'Please delete it manually.')
191
191
 
192
192
 
@@ -195,8 +195,9 @@ def delete_register_auth(registry_auth_id: str) -> None:
195
195
  try:
196
196
  runpod.runpod.delete_container_registry_auth(registry_auth_id)
197
197
  except runpod.runpod.error.QueryError as e:
198
- logger.warning(f'Failed to delete registry auth {registry_auth_id}: {e}'
199
- 'Please delete it manually.')
198
+ logger.warning(
199
+ f'Failed to delete registry auth {registry_auth_id}: {e} '
200
+ 'Please delete it manually.')
200
201
 
201
202
 
202
203
  def _create_template_for_docker_login(
sky/resources.py CHANGED
@@ -67,6 +67,7 @@ class Resources:
67
67
  # Internal use only.
68
68
  # pylint: disable=invalid-name
69
69
  _docker_login_config: Optional[docker_utils.DockerLoginConfig] = None,
70
+ _docker_username_for_runpod: Optional[str] = None,
70
71
  _is_image_managed: Optional[bool] = None,
71
72
  _requires_fuse: Optional[bool] = None,
72
73
  _cluster_config_overrides: Optional[Dict[str, Any]] = None,
@@ -148,6 +149,9 @@ class Resources:
148
149
  _docker_login_config: the docker configuration to use. This includes
149
150
  the docker username, password, and registry server. If None, skip
150
151
  docker login.
152
+ _docker_username_for_runpod: the login username for the docker
153
+ containers. This is used by RunPod to set the ssh user for the
154
+ docker containers.
151
155
  _requires_fuse: whether the task requires FUSE mounting support. This
152
156
  is used internally by certain cloud implementations to do additional
153
157
  setup for FUSE mounting. This flag also safeguards against using
@@ -234,6 +238,12 @@ class Resources:
234
238
 
235
239
  self._docker_login_config = _docker_login_config
236
240
 
241
+ # TODO(andyl): This ctor param seems to be unused.
242
+ # We always use `Task.set_resources` and `Resources.copy` to set the
243
+ # `docker_username_for_runpod`. But to keep the consistency with
244
+ # `_docker_login_config`, we keep it here.
245
+ self._docker_username_for_runpod = _docker_username_for_runpod
246
+
237
247
  self._requires_fuse = _requires_fuse
238
248
 
239
249
  self._cluster_config_overrides = _cluster_config_overrides
@@ -479,6 +489,10 @@ class Resources:
479
489
  def requires_fuse(self, value: Optional[bool]) -> None:
480
490
  self._requires_fuse = value
481
491
 
492
+ @property
493
+ def docker_username_for_runpod(self) -> Optional[str]:
494
+ return self._docker_username_for_runpod
495
+
482
496
  def _set_cpus(
483
497
  self,
484
498
  cpus: Union[None, int, float, str],
@@ -1065,6 +1079,10 @@ class Resources:
1065
1079
  cloud_specific_variables = self.cloud.make_deploy_resources_variables(
1066
1080
  self, cluster_name, region, zones, num_nodes, dryrun)
1067
1081
 
1082
+ # TODO(andyl): Should we print some warnings if users' envs share
1083
+ # same names with the cloud specific variables, but not enabled
1084
+ # since it's not on the particular cloud?
1085
+
1068
1086
  # Docker run options
1069
1087
  docker_run_options = skypilot_config.get_nested(
1070
1088
  ('docker', 'run_options'),
@@ -1277,6 +1295,9 @@ class Resources:
1277
1295
  labels=override.pop('labels', self.labels),
1278
1296
  _docker_login_config=override.pop('_docker_login_config',
1279
1297
  self._docker_login_config),
1298
+ _docker_username_for_runpod=override.pop(
1299
+ '_docker_username_for_runpod',
1300
+ self._docker_username_for_runpod),
1280
1301
  _is_image_managed=override.pop('_is_image_managed',
1281
1302
  self._is_image_managed),
1282
1303
  _requires_fuse=override.pop('_requires_fuse', self._requires_fuse),
@@ -1438,6 +1459,8 @@ class Resources:
1438
1459
  resources_fields['labels'] = config.pop('labels', None)
1439
1460
  resources_fields['_docker_login_config'] = config.pop(
1440
1461
  '_docker_login_config', None)
1462
+ resources_fields['_docker_username_for_runpod'] = config.pop(
1463
+ '_docker_username_for_runpod', None)
1441
1464
  resources_fields['_is_image_managed'] = config.pop(
1442
1465
  '_is_image_managed', None)
1443
1466
  resources_fields['_requires_fuse'] = config.pop('_requires_fuse', None)
@@ -1486,6 +1509,9 @@ class Resources:
1486
1509
  if self._docker_login_config is not None:
1487
1510
  config['_docker_login_config'] = dataclasses.asdict(
1488
1511
  self._docker_login_config)
1512
+ if self._docker_username_for_runpod is not None:
1513
+ config['_docker_username_for_runpod'] = (
1514
+ self._docker_username_for_runpod)
1489
1515
  add_if_not_none('_cluster_config_overrides',
1490
1516
  self._cluster_config_overrides)
1491
1517
  if self._is_image_managed is not None:
sky/skylet/constants.py CHANGED
@@ -110,6 +110,8 @@ DOCKER_LOGIN_ENV_VARS = {
110
110
  DOCKER_SERVER_ENV_VAR,
111
111
  }
112
112
 
113
+ RUNPOD_DOCKER_USERNAME_ENV_VAR = 'SKYPILOT_RUNPOD_DOCKER_USERNAME'
114
+
113
115
  # Commands for disable GPU ECC, which can improve the performance of the GPU
114
116
  # for some workloads by 30%. This will only be applied when a user specify
115
117
  # `nvidia_gpus.disable_ecc: true` in ~/.sky/config.yaml.
sky/skylet/events.py CHANGED
@@ -20,6 +20,7 @@ from sky.serve import serve_utils
20
20
  from sky.skylet import autostop_lib
21
21
  from sky.skylet import constants
22
22
  from sky.skylet import job_lib
23
+ from sky.usage import usage_lib
23
24
  from sky.utils import cluster_yaml_utils
24
25
  from sky.utils import common_utils
25
26
  from sky.utils import ux_utils
@@ -90,6 +91,14 @@ class ServiceUpdateEvent(SkyletEvent):
90
91
  serve_utils.update_service_status()
91
92
 
92
93
 
94
+ class UsageHeartbeatReportEvent(SkyletEvent):
95
+ """Skylet event for reporting usage."""
96
+ EVENT_INTERVAL_SECONDS = 600
97
+
98
+ def _run(self):
99
+ usage_lib.send_heartbeat(interval_seconds=self.EVENT_INTERVAL_SECONDS)
100
+
101
+
93
102
  class AutostopEvent(SkyletEvent):
94
103
  """Skylet event for autostop.
95
104
 
sky/skylet/skylet.py CHANGED
@@ -25,6 +25,8 @@ EVENTS = [
25
25
  # unhealthy, this event will correctly update the controller
26
26
  # status to CONTROLLER_FAILED.
27
27
  events.ServiceUpdateEvent(),
28
+ # Report usage heartbeat every 10 minutes.
29
+ events.UsageHeartbeatReportEvent(),
28
30
  ]
29
31
 
30
32
  while True:
sky/task.py CHANGED
@@ -121,6 +121,9 @@ def _check_docker_login_config(task_envs: Dict[str, str]) -> bool:
121
121
 
122
122
  If any of the docker login env vars is set, all of them must be set.
123
123
 
124
+ Returns:
125
+ True if there is a valid docker login config in task_envs.
126
+ False otherwise.
124
127
  Raises:
125
128
  ValueError: if any of the docker login env vars is set, but not all of
126
129
  them are set.
@@ -168,6 +171,23 @@ def _with_docker_login_config(
168
171
  return type(resources)(new_resources)
169
172
 
170
173
 
174
+ def _with_docker_username_for_runpod(
175
+ resources: Union[Set['resources_lib.Resources'],
176
+ List['resources_lib.Resources']],
177
+ task_envs: Dict[str, str],
178
+ ) -> Union[Set['resources_lib.Resources'], List['resources_lib.Resources']]:
179
+ docker_username_for_runpod = task_envs.get(
180
+ constants.RUNPOD_DOCKER_USERNAME_ENV_VAR)
181
+
182
+ # We should not call r.copy() if docker_username_for_runpod is None,
183
+ # to prevent `DummyResources` instance becoming a `Resources` instance.
184
+ if docker_username_for_runpod is None:
185
+ return resources
186
+ return (type(resources)(
187
+ r.copy(_docker_username_for_runpod=docker_username_for_runpod)
188
+ for r in resources))
189
+
190
+
171
191
  class Task:
172
192
  """Task: a computation to be run on the cloud."""
173
193
 
@@ -582,6 +602,8 @@ class Task:
582
602
  if _check_docker_login_config(self._envs):
583
603
  self.resources = _with_docker_login_config(self.resources,
584
604
  self._envs)
605
+ self.resources = _with_docker_username_for_runpod(
606
+ self.resources, self._envs)
585
607
  return self
586
608
 
587
609
  @property
@@ -647,6 +669,9 @@ class Task:
647
669
  resources = {resources}
648
670
  # TODO(woosuk): Check if the resources are None.
649
671
  self.resources = _with_docker_login_config(resources, self.envs)
672
+ # Only have effect on RunPod.
673
+ self.resources = _with_docker_username_for_runpod(
674
+ self.resources, self.envs)
650
675
 
651
676
  # Evaluate if the task requires FUSE and set the requires_fuse flag
652
677
  for _, storage_obj in self.storage_mounts.items():
@@ -373,15 +373,16 @@ available_node_types:
373
373
  done;
374
374
  if [ ! -z "$INSTALL_FIRST" ]; then
375
375
  echo "Installing core packages: $INSTALL_FIRST";
376
- DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $INSTALL_FIRST;
376
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" $INSTALL_FIRST;
377
377
  fi;
378
378
  # SSH and other packages are not necessary, so we disable set -e
379
379
  set +e
380
380
 
381
381
  if [ ! -z "$MISSING_PACKAGES" ]; then
382
382
  echo "Installing missing packages: $MISSING_PACKAGES";
383
- DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $MISSING_PACKAGES;
383
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" $MISSING_PACKAGES;
384
384
  fi;
385
+
385
386
  $(prefix_cmd) mkdir -p /var/run/sshd;
386
387
  $(prefix_cmd) sed -i "s/PermitRootLogin prohibit-password/PermitRootLogin yes/" /etc/ssh/sshd_config;
387
388
  $(prefix_cmd) sed "s@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g" -i /etc/pam.d/sshd;
@@ -25,7 +25,7 @@ provider:
25
25
  {%- endif %}
26
26
 
27
27
  auth:
28
- ssh_user: root
28
+ ssh_user: {{docker_username_for_runpod}}
29
29
  ssh_private_key: {{ssh_private_key}}
30
30
 
31
31
  available_node_types:
sky/usage/constants.py CHANGED
@@ -3,7 +3,6 @@
3
3
  LOG_URL = 'http://usage.skypilot.co:9090/loki/api/v1/push' # pylint: disable=line-too-long
4
4
 
5
5
  USAGE_MESSAGE_SCHEMA_VERSION = 1
6
-
7
6
  PRIVACY_POLICY_PATH = '~/.sky/privacy_policy'
8
7
 
9
8
  USAGE_POLICY_MESSAGE = (
@@ -15,3 +14,5 @@ USAGE_POLICY_MESSAGE = (
15
14
 
16
15
  USAGE_MESSAGE_REDACT_KEYS = ['setup', 'run', 'envs']
17
16
  USAGE_MESSAGE_REDACT_TYPES = {str, dict}
17
+
18
+ USAGE_RUN_ID_FILE = '~/.sky/usage_run_id'
sky/usage/usage_lib.py CHANGED
@@ -44,6 +44,7 @@ def _get_current_timestamp_ns() -> int:
44
44
  class MessageType(enum.Enum):
45
45
  """Types for messages to be sent to Loki."""
46
46
  USAGE = 'usage'
47
+ HEARTBEAT = 'heartbeat'
47
48
  # TODO(zhwu): Add more types, e.g., cluster_lifecycle.
48
49
 
49
50
 
@@ -67,8 +68,9 @@ class MessageToReport:
67
68
  properties = self.__dict__.copy()
68
69
  return {k: v for k, v in properties.items() if not k.startswith('_')}
69
70
 
70
- def __repr__(self):
71
- raise NotImplementedError
71
+ def __repr__(self) -> str:
72
+ d = self.get_properties()
73
+ return json.dumps(d)
72
74
 
73
75
 
74
76
  class UsageMessageToReport(MessageToReport):
@@ -160,10 +162,6 @@ class UsageMessageToReport(MessageToReport):
160
162
  self.exception: Optional[str] = None # entrypoint_context
161
163
  self.stacktrace: Optional[str] = None # entrypoint_context
162
164
 
163
- def __repr__(self) -> str:
164
- d = self.get_properties()
165
- return json.dumps(d)
166
-
167
165
  def update_entrypoint(self, msg: str):
168
166
  self.entrypoint = msg
169
167
 
@@ -275,16 +273,43 @@ class UsageMessageToReport(MessageToReport):
275
273
  name_or_fn)
276
274
 
277
275
 
276
+ class HeartbeatMessageToReport(MessageToReport):
277
+ """Message to be reported to Grafana Loki for heartbeat on a cluster."""
278
+
279
+ def __init__(self, interval_seconds: int = 600):
280
+ super().__init__(constants.USAGE_MESSAGE_SCHEMA_VERSION)
281
+ # This interval_seconds is mainly for recording the heartbeat interval
282
+ # in the heartbeat message, so that the collector can use it.
283
+ self.interval_seconds = interval_seconds
284
+
285
+ def get_properties(self) -> Dict[str, Any]:
286
+ properties = super().get_properties()
287
+ # The run id is set by the skylet, which will always be the same for
288
+ # the entire lifetime of the run.
289
+ with open(os.path.expanduser(constants.USAGE_RUN_ID_FILE),
290
+ 'r',
291
+ encoding='utf-8') as f:
292
+ properties['run_id'] = f.read().strip()
293
+ return properties
294
+
295
+
278
296
  class MessageCollection:
279
297
  """A collection of messages."""
280
298
 
281
299
  def __init__(self):
282
- self._messages = {MessageType.USAGE: UsageMessageToReport()}
300
+ self._messages = {
301
+ MessageType.USAGE: UsageMessageToReport(),
302
+ MessageType.HEARTBEAT: HeartbeatMessageToReport()
303
+ }
283
304
 
284
305
  @property
285
- def usage(self):
306
+ def usage(self) -> UsageMessageToReport:
286
307
  return self._messages[MessageType.USAGE]
287
308
 
309
+ @property
310
+ def heartbeat(self) -> HeartbeatMessageToReport:
311
+ return self._messages[MessageType.HEARTBEAT]
312
+
288
313
  def reset(self, message_type: MessageType):
289
314
  self._messages[message_type] = self._messages[message_type].__class__()
290
315
 
@@ -308,13 +333,25 @@ def _send_to_loki(message_type: MessageType):
308
333
 
309
334
  message = messages[message_type]
310
335
 
336
+ # In case the message has no start time, set it to the current time.
337
+ message.start()
311
338
  message.send_time = _get_current_timestamp_ns()
312
- log_timestamp = message.start_time
339
+ # Use send time instead of start time to avoid the message being dropped
340
+ # by Loki, due to the timestamp being too old. We still have the start time
341
+ # in the message for dashboard.
342
+ log_timestamp = message.send_time
313
343
 
314
344
  environment = 'prod'
315
345
  if env_options.Options.IS_DEVELOPER.get():
316
346
  environment = 'dev'
317
- prom_labels = {'type': message_type.value, 'environment': environment}
347
+ prom_labels = {
348
+ 'type': message_type.value,
349
+ 'environment': environment,
350
+ 'schema_version': message.schema_version,
351
+ }
352
+ if message_type == MessageType.USAGE:
353
+ prom_labels['new_cluster'] = (message.original_cluster_status != 'UP'
354
+ and message.final_cluster_status == 'UP')
318
355
 
319
356
  headers = {'Content-type': 'application/json'}
320
357
  payload = {
@@ -392,7 +429,7 @@ def prepare_json_from_yaml_config(
392
429
  def _send_local_messages():
393
430
  """Send all messages not been uploaded to Loki."""
394
431
  for msg_type, message in messages.items():
395
- if not message.message_sent:
432
+ if not message.message_sent and msg_type != MessageType.HEARTBEAT:
396
433
  # Avoid the fallback entrypoint to send the message again
397
434
  # in normal case.
398
435
  try:
@@ -402,6 +439,11 @@ def _send_local_messages():
402
439
  f'exception caught: {type(e)}({e})')
403
440
 
404
441
 
442
+ def send_heartbeat(interval_seconds: int = 600):
443
+ messages.heartbeat.interval_seconds = interval_seconds
444
+ _send_to_loki(MessageType.HEARTBEAT)
445
+
446
+
405
447
  @contextlib.contextmanager
406
448
  def entrypoint_context(name: str, fallback: bool = False):
407
449
  """Context manager for entrypoint.
sky/utils/env_options.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Global environment options for sky."""
2
2
  import enum
3
3
  import os
4
+ from typing import Dict
4
5
 
5
6
 
6
7
  class Options(enum.Enum):
@@ -35,3 +36,8 @@ class Options(enum.Enum):
35
36
  def env_key(self) -> str:
36
37
  """The environment variable key name."""
37
38
  return self.value[0]
39
+
40
+ @classmethod
41
+ def all_options(cls) -> Dict[str, bool]:
42
+ """Returns all options as a dictionary."""
43
+ return {option.env_key: option.get() for option in list(Options)}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250211
3
+ Version: 1.0.0.dev20250213
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,20 +1,20 @@
1
- sky/__init__.py,sha256=1ezOy54DC-pEqSUADXHnHCcwQ1lU5dYJ6c14AjKNI1g,5560
1
+ sky/__init__.py,sha256=ulWt-DtbuSpxFnOQtMOqMTPqAYFd6WgWd_T-bxS7_QM,5560
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=MNc9uHnvQ1EsEl8SsrYcYCGbxcnDbR6gaRCXVNd5RZE,22338
4
4
  sky/check.py,sha256=xzLlxUkBCrzpOho8lw65EvKLPl_b9lA2nteF5MSYbDQ,10885
5
- sky/cli.py,sha256=B-YWYiKnfSGdSOXtAY8SRGOGhneUeNPBjXFZ0FuLZ8w,214131
5
+ sky/cli.py,sha256=_Q-XlsLN73e8BJilClajL7VOG8vINVJ_xRjENOpJdDA,213928
6
6
  sky/cloud_stores.py,sha256=PcLT57_8SZy7o6paAluElfBynaLkbaOq3l-8dNg1AVM,23672
7
7
  sky/core.py,sha256=fE1rn4Ku94S0XmWTO5-6t6eT6aaJImNczRqEnTe8v7Q,38742
8
8
  sky/dag.py,sha256=f3sJlkH4bE6Uuz3ozNtsMhcBpRx7KmC9Sa4seDKt4hU,3104
9
9
  sky/exceptions.py,sha256=SEhRubPlk-crkflPC5P_Z085iLrSd3UScYwc790QwYw,9378
10
- sky/execution.py,sha256=dpbk1kGRkGHT0FCJKGvjqeV3qIGEN2K20NDZbVrcAvI,28483
10
+ sky/execution.py,sha256=vNUE9Z8hCSQeil7h3kdote2r6nkbrGXSqqmK6ru594Q,28453
11
11
  sky/global_user_state.py,sha256=cTwltMCDIIBaapuGgARxFwpDJDCiKKyVW-PP_qtWuCA,30241
12
- sky/optimizer.py,sha256=d5BPAEZVrS3a2oBclSwo8MWkHQKQ3u4tcyawOANN0_0,59836
13
- sky/resources.py,sha256=D3jteQxKOUydoNm7VDl90p02dwP3RpbO3gqNcl4dpOI,70327
12
+ sky/optimizer.py,sha256=H5cpKELOQmnFpox0QXMB4P7jGhJxzXog4Ht_TYJaGuA,59758
13
+ sky/resources.py,sha256=W7VO5nTizr-KIhOamOs7oSwmBGLjQZhQM6DoYbiAOsg,71648
14
14
  sky/sky_logging.py,sha256=7Zk9mL1TDxFkGsy3INMBKYlqsbognVGSMzAsHZdZlhw,5891
15
15
  sky/skypilot_config.py,sha256=FN93hSG-heQCHBnemlIK2TwrJngKbpx4vMXNUzPIzV8,9087
16
16
  sky/status_lib.py,sha256=J7Jb4_Dz0v2T64ttOdyUgpokvl4S0sBJrMfH7Fvo51A,1457
17
- sky/task.py,sha256=zri5_Ghh5-fjDf2rl997ZmL4IlXetW9u9XXJIRUJ3Qg,51353
17
+ sky/task.py,sha256=Z74bBkOx1bFmGMoQRD3qbYxIZ5qi2AC5htY1KIsmvT0,52394
18
18
  sky/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  sky/adaptors/aws.py,sha256=FNNC8B-iqSSiCLPiWGK_PLm1R-Kt4yI5JPIpdE0QJxQ,7565
20
20
  sky/adaptors/azure.py,sha256=yjM8nAPW-mlSXfmA8OmJNnSIrZ9lQx2-GxiI-TIVrwE,21910
@@ -51,11 +51,11 @@ sky/clouds/do.py,sha256=od4gMTrs2W5IkaDDr_oexOSdIOqn94vKq2U_QZcrpRk,11311
51
51
  sky/clouds/fluidstack.py,sha256=u2I6jXEtTqgqRWi2EafMsKqc8VkUq1cR6CSDUvk72_U,12407
52
52
  sky/clouds/gcp.py,sha256=6QOnefFsYiLCcnajjduLHsayqJ641bBu42jPTpvy7Mc,55007
53
53
  sky/clouds/ibm.py,sha256=0ArRTQx1_DpTNGByFhukzFedEDzmVjBsGiiques1bQ0,21447
54
- sky/clouds/kubernetes.py,sha256=oZg4Lpn2ZBikyc5NTJIziUPEY0xs2mtz546ButhkZ7g,31541
54
+ sky/clouds/kubernetes.py,sha256=ocf8ZUlMbOcPb-n8JrSFix9rH75g089sC1JAd84JUXQ,31653
55
55
  sky/clouds/lambda_cloud.py,sha256=42AmcN2X_wdBMuAw606nR_pQCBAy5QFiAo711_WRqDE,12672
56
56
  sky/clouds/oci.py,sha256=VpPxpMJv52QePVdwdK9EuiMyqjp70dk8_rgUVv5Y-2w,27028
57
57
  sky/clouds/paperspace.py,sha256=F0Sj1RcqRb5fPjrr8qbdeY-JdfAHcRPc902pZOi4erw,10889
58
- sky/clouds/runpod.py,sha256=Wtaarp27_LTu5_E2agC7tTr2vhN1D4sblr2vZTT4vBI,11580
58
+ sky/clouds/runpod.py,sha256=4gZTbUO4I8bzio5x9Km42ZIujZzCx2eszdkN0Mz5fqE,11893
59
59
  sky/clouds/scp.py,sha256=JHyMqkAAqr9lJq79IVjj3rU1g-ZCCGLZTJEzIhYsw7c,15845
60
60
  sky/clouds/vast.py,sha256=vQV489qkZMfDtt_SnXParPY49gkgKx5LZAEOsk65kIo,11231
61
61
  sky/clouds/vsphere.py,sha256=rrNf6_uHy4ukjHwaN35XVh2-Xj9k43-QGQkiEXyHYJk,12273
@@ -85,7 +85,7 @@ sky/clouds/service_catalog/data_fetchers/fetch_azure.py,sha256=7YVnoGDGGZI2TK02b
85
85
  sky/clouds/service_catalog/data_fetchers/fetch_cudo.py,sha256=52P48lvWN0s1ArjeLPeLemPRpxjSRcHincRle0nqdm4,3440
86
86
  sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py,sha256=yKuAFbjBRNz_e2RNNDT_aHHAuKQ86Ac7GKgIie5O6Pg,7273
87
87
  sky/clouds/service_catalog/data_fetchers/fetch_gcp.py,sha256=HLxdxA9DMSi19mgpVM_cERV4o-xh_tJ9vmkGm1wOaIE,30868
88
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py,sha256=MN54h0CAGPHQAeF2eTmuESq3b0-d1kDARRUM6OkivCk,4962
88
+ sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py,sha256=Bi5ta91p4SkFCoaEJUPKPjDB0FZ24DMR4NcKFpKCkxU,4979
89
89
  sky/clouds/service_catalog/data_fetchers/fetch_vast.py,sha256=zR9icM3ty5C8tGw13pQbsBtQQMgG4kl1j_jSGqqrgOA,4741
90
90
  sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py,sha256=Opp2r3KSzXPtwk3lKNbO8IX9QzjoRSwy1kW3jPjtS1c,21453
91
91
  sky/clouds/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -115,10 +115,10 @@ sky/provision/__init__.py,sha256=hb_z69_7-FH1I8aDpFKNj2x_a8spzceWcovklutNgP8,637
115
115
  sky/provision/common.py,sha256=E8AlSUFcn0FYQq1erNmoVfMAdsF9tP2yxfyk-9PLvQU,10286
116
116
  sky/provision/constants.py,sha256=oc_XDUkcoLQ_lwDy5yMeMSWviKS0j0s1c0pjlvpNeWY,800
117
117
  sky/provision/docker_utils.py,sha256=ENm0LkyrYWic3Ikyacho8X5uDMvGsbkZQsb6kNH1DuI,19629
118
- sky/provision/instance_setup.py,sha256=8Pudbpke6ah0xufr2UwtsDnNZ64-aAYkz8M44ZA0huI,23218
118
+ sky/provision/instance_setup.py,sha256=YBFOwZQLBzpUjYoVQcX0KItej1rCBRWM23Dw9lg_q24,24386
119
119
  sky/provision/logging.py,sha256=yZWgejrFBhhRjAtvFu5N5bRXIMK5TuwNjp1vKQqz2pw,2103
120
120
  sky/provision/metadata_utils.py,sha256=LrxeV4wD2QPzNdXV_npj8q-pr35FatxBBjF_jSbpOT0,4013
121
- sky/provision/provisioner.py,sha256=ZOgFOO0NB4QZVPwd4qikRqi615Bq67n0Vcl3cTDVxNE,29153
121
+ sky/provision/provisioner.py,sha256=UuMO2wp98Nk1nue25TtIaMU79Aj14v9IoIAFO_CUy9w,29636
122
122
  sky/provision/aws/__init__.py,sha256=mxq8PeWJqUtalDozTNpbtENErRZ1ktEs8uf2aG9UUgU,731
123
123
  sky/provision/aws/config.py,sha256=-4mr5uxgsl_8eLm_4DfP8JurZGSysGuY0iDeBTHnX5Q,25943
124
124
  sky/provision/aws/instance.py,sha256=3-R8ohuN8ooNh2Fqqb7-c4vNFy1xsw2GQF4PHg3APhE,40843
@@ -153,7 +153,7 @@ sky/provision/kubernetes/config.py,sha256=bXwOGdSAnXCkDreew0KsSUqSv3ZrptNeevqat7
153
153
  sky/provision/kubernetes/instance.py,sha256=AQikdRgNklpeMgiEd4w2Hh7kGssVABsy0aCh9xsKi5Y,50313
154
154
  sky/provision/kubernetes/network.py,sha256=EpNjRQ131CXepqbdkoRKFu4szVrm0oKEpv1l8EgOkjU,12364
155
155
  sky/provision/kubernetes/network_utils.py,sha256=52BZY_5ynCH6IXlivKObYyAHDgQCJyAJIjmM7J4MpFo,11393
156
- sky/provision/kubernetes/utils.py,sha256=4kSEx6NZB3MAsDqCxLO-elo7EO6Coh-9wypwVqs3jgk,109895
156
+ sky/provision/kubernetes/utils.py,sha256=swOe6ozgSoucDtoJCExs0HLLWYuoi5HkIGMMSp7fEzc,109962
157
157
  sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml,sha256=AMzYzlY0JIlfBWj5eX054Rc1XDW2thUcLSOGMJVhIdA,229
158
158
  sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml,sha256=RtTq4F1QUmR2Uunb6zuuRaPhV7hpesz4saHjn3Ncsb4,2010
159
159
  sky/provision/lambda_cloud/__init__.py,sha256=6EEvSgtUeEiup9ivIFevHmgv0GqleroO2X0K7TRa2nE,612
@@ -172,7 +172,7 @@ sky/provision/paperspace/utils.py,sha256=uOmxbDKjV6skFizC4gYXSxRuEqso5ck2kF7MbtN
172
172
  sky/provision/runpod/__init__.py,sha256=6HYvHI27EaLrX1SS0vWVhdLu5HDBeZCdvAeDJuwM5pk,556
173
173
  sky/provision/runpod/config.py,sha256=9ulZJVL7nHuxhTdoj8D7lNn7SdicJ5zc6FIcHIG9tcg,321
174
174
  sky/provision/runpod/instance.py,sha256=FNalpTQMvnmACdtFsGvVPVhEkzdqrlmd_pExn33bIQ8,10358
175
- sky/provision/runpod/utils.py,sha256=2L4MUrFi4rQOP26IeAkPNQ9oWGPJhH5fvz7NAbL3nNQ,12421
175
+ sky/provision/runpod/utils.py,sha256=F3zsxPV3IY5C73J-zNZEfcXL8bsZBgNWzlmtSO6qOqI,12425
176
176
  sky/provision/runpod/api/__init__.py,sha256=eJwjPeQZ5B7chf4-Bl4YeI2Uo9aLX4M1rr2NmPk89_E,112
177
177
  sky/provision/runpod/api/commands.py,sha256=oh77PS0H0wZudHV8II9ceRuaFQ8FN4NJ4S3-6_PeqPM,4238
178
178
  sky/provision/runpod/api/pods.py,sha256=GMwxgNr9NnHPfyh2Y9b8S_vLhrLY4h7LybFBBQNAyfw,4948
@@ -215,12 +215,12 @@ sky/skylet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
215
215
  sky/skylet/attempt_skylet.py,sha256=GZ6ITjjA0m-da3IxXXfoHR6n4pjp3X3TOXUqVvSrV0k,2136
216
216
  sky/skylet/autostop_lib.py,sha256=JPDHmByuhoNYXSUHl-OnyeJUkOFWn7gDM1FrS7Kr3E8,4478
217
217
  sky/skylet/configs.py,sha256=UtnpmEL0F9hH6PSjhsps7xgjGZ6qzPOfW1p2yj9tSng,1887
218
- sky/skylet/constants.py,sha256=EUSW4yH59eqBDLMIdmQWIYd3nAJBFoUeo5v9MGiginI,16057
219
- sky/skylet/events.py,sha256=0bOjUYpphuAficD9wDB5NOan2vwJDaRqdnm4sl0RK0U,12535
218
+ sky/skylet/constants.py,sha256=bTvojyYHLhUxMI-xD8d6Fj6snQiK4-PLw-baRFKCcfQ,16125
219
+ sky/skylet/events.py,sha256=__7bt6Z8q2W1vwTQv4yug-oAXDwSf8zBeRxb8HFM36U,12792
220
220
  sky/skylet/job_lib.py,sha256=Rk-C069cusJIRXsks8xqCb016JSt7GlpU7LrpX0qFJk,42785
221
221
  sky/skylet/log_lib.py,sha256=oFEBd85vDYFrIyyZKekH30yc4rRYILC0F0o-COQ64oE,20445
222
222
  sky/skylet/log_lib.pyi,sha256=rRk4eUX0RHGs1QL9CXsJq6RE7FqqxZlfuPJOLXTvg7I,4453
223
- sky/skylet/skylet.py,sha256=Tpv4yYR3jwxZsYeFPexB1gS1bCL5_AAfPzGKLsknPhA,1147
223
+ sky/skylet/skylet.py,sha256=mWmqCvxSlfdVU_L8NL6P52jmCt3smd8K0HdyNBfMPeI,1234
224
224
  sky/skylet/subprocess_daemon.py,sha256=gcL-_Hea7-SrBUyZfAbo40RBFbaeuBmPCW0dm4YYkPo,3537
225
225
  sky/skylet/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
226
226
  sky/skylet/providers/command_runner.py,sha256=DdBKP0QX325_N3zAVYwnmXmfbfXNqkzWQZpF9DSR7Go,16259
@@ -250,20 +250,20 @@ sky/templates/jobs-controller.yaml.j2,sha256=FfagMkhXZdUWR6HtJHJ3JEZzJy4eov5CQZH
250
250
  sky/templates/kubernetes-ingress.yml.j2,sha256=73iDklVDWBMbItg0IexCa6_ClXPJOxw7PWz3leku4nE,1340
251
251
  sky/templates/kubernetes-loadbalancer.yml.j2,sha256=IxrNYM366N01bbkJEbZ_UPYxUP8wyVEbRNFHRsBuLsw,626
252
252
  sky/templates/kubernetes-port-forward-proxy-command.sh,sha256=iw7mypHszg6Ggq9MbyiYMFOkSlXaQZulaxqC5IWYGCc,3381
253
- sky/templates/kubernetes-ray.yml.j2,sha256=EHUDvALvhaPB44U7cdgXStV6v8Qh8yn5J4T6XFnmZoM,28856
253
+ sky/templates/kubernetes-ray.yml.j2,sha256=x3Eq1ejG577E6eAZtJvpTlzXRCW5beMhqApV3J8BEZY,29019
254
254
  sky/templates/kubernetes-ssh-jump.yml.j2,sha256=k5W5sOIMppU7dDkJMwPlqsUcb92y7L5_TVG3hkgMy8M,2747
255
255
  sky/templates/lambda-ray.yml.j2,sha256=HyvO_tX2vxwSsc4IFVSqGuIbjLMk0bevP9bcxb8ZQII,4498
256
256
  sky/templates/local-ray.yml.j2,sha256=FNHeyHF6nW9nU9QLIZceUWfvrFTTcO51KqhTnYCEFaA,1185
257
257
  sky/templates/oci-ray.yml.j2,sha256=92dvXGaUd2Kwep9fgTjOsAPJiBLr8GQTjy7pFvuPAyE,4562
258
258
  sky/templates/paperspace-ray.yml.j2,sha256=HQjZNamrB_a4fOMCxQXSVdV5JIHtbGtAE0JzEO8uuVQ,4021
259
- sky/templates/runpod-ray.yml.j2,sha256=bUiF4Y_EkCA_GKLtTzPXbajdL-NOUiJ38Pe4dZd2dys,4284
259
+ sky/templates/runpod-ray.yml.j2,sha256=95392Jvk-PwrEHIF2C5i-EnaqC31nTq4UTYZuQxUe1k,4310
260
260
  sky/templates/scp-ray.yml.j2,sha256=I9u8Ax-lit-d6UrCC9BVU8avst8w1cwK6TrzZBcz_JM,5608
261
261
  sky/templates/sky-serve-controller.yaml.j2,sha256=W4i1-OGRU2WDvauLC4EDXcYrNxj7mzRFSvSqzAKfehc,2020
262
262
  sky/templates/vast-ray.yml.j2,sha256=KaZLBJfI6FzAVRVq0NNM0_SN0RQUrDIehnJJ_LnvwnY,2990
263
263
  sky/templates/vsphere-ray.yml.j2,sha256=cOQ-qdpxGA2FHajMMhTJI-SmlYzdPterX4Gsiq-nkb0,3587
264
264
  sky/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
265
- sky/usage/constants.py,sha256=8xpg9vhDU9A3eObtpkNFjwa42oCazqGEv4yw_vJSO7U,590
266
- sky/usage/usage_lib.py,sha256=rjsekywo8IB_lJwRKBaWfQZ_znUJ-mIu1b9iWkCog88,18211
265
+ sky/usage/constants.py,sha256=k7PQ-QP1p3tDgnzvy7QoxJjuTXWDUyVkbtPcIEvDsYM,632
266
+ sky/usage/usage_lib.py,sha256=jpRt-24WVxYyd-XJz3_lSHboUKmWy8x8lRvvO-JO68g,20026
267
267
  sky/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
268
  sky/utils/accelerator_registry.py,sha256=cpPS9_MahQPt0ev4qPT-qyGpe12YD78UNj_gAvt720Q,4052
269
269
  sky/utils/admin_policy_utils.py,sha256=_Vt_jTTYCXmMdryj0vrrumFPewa93qHnzUqBDXjAhRU,5981
@@ -275,7 +275,7 @@ sky/utils/control_master_utils.py,sha256=90hnxiAUP20gbJ9e3MERh7rb04ZO_I3LsljNjR2
275
275
  sky/utils/controller_utils.py,sha256=SUrhK46ouBH2rm7azfFLIWr-T9-voYAdiXl2z5fG4Qw,45948
276
276
  sky/utils/dag_utils.py,sha256=l_0O3RUfe9OdQ9mtbhdlHpJVD4VAF_HQ3A75dgsYIjM,6099
277
277
  sky/utils/db_utils.py,sha256=K2-OHPg0FeHCarevMdWe0IWzm6wWumViEeYeJuGoFUE,3747
278
- sky/utils/env_options.py,sha256=E5iwRFBUY2Iq6e0y0c1Mv5OSQ4MRNdk0-p38xUyVerc,1366
278
+ sky/utils/env_options.py,sha256=aaD6GoYK0LaZIqjOEZ-R7eccQuiRriW3EuLWtOI5En8,1578
279
279
  sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
280
280
  sky/utils/log_utils.py,sha256=AjkgSrk0GVOUbnnCEC2f4lsf2HOIXkZETCxR0BJw2-U,14152
281
281
  sky/utils/resources_utils.py,sha256=06Kx6AfbBdwBYGmIYFEY_qm6OBc2a5esZMPvIX7gCvc,7787
@@ -298,9 +298,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
298
298
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
299
299
  sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
300
300
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
301
- skypilot_nightly-1.0.0.dev20250211.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
302
- skypilot_nightly-1.0.0.dev20250211.dist-info/METADATA,sha256=4PYhrvC32in1nSpeg1_y7Sx5Qx2mu5vqzLyUyRsAVbc,21397
303
- skypilot_nightly-1.0.0.dev20250211.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
304
- skypilot_nightly-1.0.0.dev20250211.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
305
- skypilot_nightly-1.0.0.dev20250211.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
306
- skypilot_nightly-1.0.0.dev20250211.dist-info/RECORD,,
301
+ skypilot_nightly-1.0.0.dev20250213.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
302
+ skypilot_nightly-1.0.0.dev20250213.dist-info/METADATA,sha256=hLWjPTEc6CnhdlKg2uvdj1VZgjmf3XNt0fJV3R61GTc,21397
303
+ skypilot_nightly-1.0.0.dev20250213.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
304
+ skypilot_nightly-1.0.0.dev20250213.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
305
+ skypilot_nightly-1.0.0.dev20250213.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
306
+ skypilot_nightly-1.0.0.dev20250213.dist-info/RECORD,,