skypilot-nightly 1.0.0.dev20250211__py3-none-any.whl → 1.0.0.dev20250213__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/cli.py +14 -18
- sky/clouds/kubernetes.py +3 -1
- sky/clouds/runpod.py +6 -0
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/execution.py +2 -2
- sky/optimizer.py +2 -4
- sky/provision/instance_setup.py +38 -5
- sky/provision/kubernetes/utils.py +36 -34
- sky/provision/provisioner.py +7 -0
- sky/provision/runpod/utils.py +4 -3
- sky/resources.py +26 -0
- sky/skylet/constants.py +2 -0
- sky/skylet/events.py +9 -0
- sky/skylet/skylet.py +2 -0
- sky/task.py +25 -0
- sky/templates/kubernetes-ray.yml.j2 +3 -2
- sky/templates/runpod-ray.yml.j2 +1 -1
- sky/usage/constants.py +2 -1
- sky/usage/usage_lib.py +53 -11
- sky/utils/env_options.py +6 -0
- {skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/RECORD +27 -27
- {skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'c49961417a83b049b3f3435a252c8ec5ea0fb5e6'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250213'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/cli.py
CHANGED
@@ -623,7 +623,8 @@ def _launch_with_confirm(
|
|
623
623
|
click.confirm(prompt, default=True, abort=True, show_default=True)
|
624
624
|
|
625
625
|
if not confirm_shown:
|
626
|
-
click.secho(
|
626
|
+
click.secho('Running on cluster: ', fg='cyan', nl=False)
|
627
|
+
click.secho(cluster)
|
627
628
|
|
628
629
|
sky.launch(
|
629
630
|
dag,
|
@@ -722,7 +723,6 @@ def _pop_and_ignore_fields_in_override_params(
|
|
722
723
|
def _make_task_or_dag_from_entrypoint_with_overrides(
|
723
724
|
entrypoint: Tuple[str, ...],
|
724
725
|
*,
|
725
|
-
entrypoint_name: str = 'Task',
|
726
726
|
name: Optional[str] = None,
|
727
727
|
workdir: Optional[str] = None,
|
728
728
|
cloud: Optional[str] = None,
|
@@ -754,19 +754,15 @@ def _make_task_or_dag_from_entrypoint_with_overrides(
|
|
754
754
|
entrypoint: Optional[str]
|
755
755
|
if is_yaml:
|
756
756
|
# Treat entrypoint as a yaml.
|
757
|
-
click.secho(
|
758
|
-
|
759
|
-
nl=False)
|
760
|
-
click.secho(entrypoint, bold=True)
|
757
|
+
click.secho('YAML to run: ', fg='cyan', nl=False)
|
758
|
+
click.secho(entrypoint)
|
761
759
|
else:
|
762
760
|
if not entrypoint:
|
763
761
|
entrypoint = None
|
764
762
|
else:
|
765
763
|
# Treat entrypoint as a bash command.
|
766
|
-
click.secho(
|
767
|
-
|
768
|
-
nl=False)
|
769
|
-
click.secho(entrypoint, bold=True)
|
764
|
+
click.secho('Command to run: ', fg='cyan', nl=False)
|
765
|
+
click.secho(entrypoint)
|
770
766
|
|
771
767
|
override_params = _parse_override_params(cloud=cloud,
|
772
768
|
region=region,
|
@@ -1333,7 +1329,8 @@ def exec(
|
|
1333
1329
|
'supports a single task only.')
|
1334
1330
|
task = task_or_dag
|
1335
1331
|
|
1336
|
-
click.secho(
|
1332
|
+
click.secho('Submitting job to cluster: ', fg='cyan', nl=False)
|
1333
|
+
click.secho(cluster)
|
1337
1334
|
sky.exec(task, backend=backend, cluster_name=cluster, detach_run=detach_run)
|
1338
1335
|
|
1339
1336
|
|
@@ -1982,7 +1979,7 @@ def cost_report(all: bool): # pylint: disable=redefined-builtin
|
|
1982
1979
|
def queue(clusters: List[str], skip_finished: bool, all_users: bool):
|
1983
1980
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
1984
1981
|
"""Show the job queue for cluster(s)."""
|
1985
|
-
click.secho('Fetching and parsing job queue...', fg='
|
1982
|
+
click.secho('Fetching and parsing job queue...', fg='cyan')
|
1986
1983
|
if clusters:
|
1987
1984
|
clusters = _get_glob_clusters(clusters)
|
1988
1985
|
else:
|
@@ -3785,7 +3782,7 @@ def jobs_queue(all: bool, refresh: bool, skip_finished: bool):
|
|
3785
3782
|
watch -n60 sky jobs queue
|
3786
3783
|
|
3787
3784
|
"""
|
3788
|
-
click.secho('Fetching managed
|
3785
|
+
click.secho('Fetching managed jobs...', fg='cyan')
|
3789
3786
|
with rich_utils.safe_status(
|
3790
3787
|
ux_utils.spinner_message('Checking managed jobs')):
|
3791
3788
|
_, msg = _get_managed_jobs(refresh=refresh,
|
@@ -3938,7 +3935,7 @@ def jobs_dashboard(port: Optional[int]):
|
|
3938
3935
|
# see if the controller is UP first, which is slow; (2) not have to run SSH
|
3939
3936
|
# port forwarding first (we'd just launch a local dashboard which would make
|
3940
3937
|
# REST API calls to the controller dashboard server).
|
3941
|
-
click.secho('Checking if jobs controller is up...', fg='
|
3938
|
+
click.secho('Checking if jobs controller is up...', fg='cyan')
|
3942
3939
|
hint = ('Dashboard is not available if jobs controller is not up. Run a '
|
3943
3940
|
'managed job first.')
|
3944
3941
|
backend_utils.is_controller_accessible(
|
@@ -4032,7 +4029,6 @@ def _generate_task_with_service(
|
|
4032
4029
|
disk_size=disk_size,
|
4033
4030
|
disk_tier=disk_tier,
|
4034
4031
|
ports=ports,
|
4035
|
-
entrypoint_name='Service',
|
4036
4032
|
)
|
4037
4033
|
if isinstance(task, sky.Dag):
|
4038
4034
|
raise click.UsageError(
|
@@ -4197,7 +4193,7 @@ def serve_up(
|
|
4197
4193
|
ports=ports,
|
4198
4194
|
not_supported_cmd='sky serve up',
|
4199
4195
|
)
|
4200
|
-
click.secho('Service
|
4196
|
+
click.secho('Service spec:', fg='cyan')
|
4201
4197
|
click.echo(task.service)
|
4202
4198
|
|
4203
4199
|
click.secho('Each replica will use the following resources (estimated):',
|
@@ -4315,7 +4311,7 @@ def serve_update(
|
|
4315
4311
|
ports=ports,
|
4316
4312
|
not_supported_cmd='sky serve update',
|
4317
4313
|
)
|
4318
|
-
click.secho('Service
|
4314
|
+
click.secho('Service spec:', fg='cyan')
|
4319
4315
|
click.echo(task.service)
|
4320
4316
|
|
4321
4317
|
click.secho('New replica will use the following resources (estimated):',
|
@@ -4767,7 +4763,7 @@ def benchmark_launch(
|
|
4767
4763
|
'Please provide a YAML file.')
|
4768
4764
|
assert config is not None, (is_yaml, config)
|
4769
4765
|
|
4770
|
-
click.secho('Benchmarking a task from YAML
|
4766
|
+
click.secho('Benchmarking a task from YAML: ', fg='cyan', nl=False)
|
4771
4767
|
click.secho(entrypoint, bold=True)
|
4772
4768
|
|
4773
4769
|
candidates = _get_candidate_configs(entrypoint)
|
sky/clouds/kubernetes.py
CHANGED
@@ -464,7 +464,9 @@ class Kubernetes(clouds.Cloud):
|
|
464
464
|
# CPU resources on the node instead within the pod.
|
465
465
|
custom_ray_options = {
|
466
466
|
'object-store-memory': 500000000,
|
467
|
-
'num-cpus'
|
467
|
+
# 'num-cpus' must be an integer, but we should not set it to 0 if
|
468
|
+
# cpus is <1.
|
469
|
+
'num-cpus': str(max(int(cpus), 1)),
|
468
470
|
}
|
469
471
|
deploy_vars = {
|
470
472
|
'instance_type': resources.instance_type,
|
sky/clouds/runpod.py
CHANGED
@@ -177,6 +177,11 @@ class RunPod(clouds.Cloud):
|
|
177
177
|
hourly_cost = self.instance_type_to_hourly_cost(
|
178
178
|
instance_type=instance_type, use_spot=use_spot)
|
179
179
|
|
180
|
+
# default to root
|
181
|
+
docker_username_for_runpod = (resources.docker_username_for_runpod
|
182
|
+
if resources.docker_username_for_runpod
|
183
|
+
is not None else 'root')
|
184
|
+
|
180
185
|
return {
|
181
186
|
'instance_type': instance_type,
|
182
187
|
'custom_resources': custom_resources,
|
@@ -184,6 +189,7 @@ class RunPod(clouds.Cloud):
|
|
184
189
|
'image_id': image_id,
|
185
190
|
'use_spot': use_spot,
|
186
191
|
'bid_per_gpu': str(hourly_cost),
|
192
|
+
'docker_username_for_runpod': docker_username_for_runpod,
|
187
193
|
}
|
188
194
|
|
189
195
|
def _get_feasible_launchable_resources(
|
sky/execution.py
CHANGED
@@ -259,8 +259,8 @@ def _execute(
|
|
259
259
|
bold = colorama.Style.BRIGHT
|
260
260
|
reset = colorama.Style.RESET_ALL
|
261
261
|
logger.info(
|
262
|
-
f'{yellow}Launching
|
263
|
-
f'automatically recover from preemptions.
|
262
|
+
f'{yellow}Launching a spot job that does not '
|
263
|
+
f'automatically recover from preemptions. To '
|
264
264
|
'get automatic recovery, use managed job instead: '
|
265
265
|
f'{reset}{bold}sky jobs launch{reset} {yellow}or{reset} '
|
266
266
|
f'{bold}sky.jobs.launch(){reset}.')
|
sky/optimizer.py
CHANGED
@@ -884,10 +884,8 @@ class Optimizer:
|
|
884
884
|
# Add a new line for better readability, when there are multiple
|
885
885
|
# tasks.
|
886
886
|
logger.info('')
|
887
|
-
logger.info(
|
888
|
-
|
889
|
-
f'({task.num_nodes} node{plural}):'
|
890
|
-
f'{colorama.Style.RESET_ALL}')
|
887
|
+
logger.info(f'Considered resources {task_str}'
|
888
|
+
f'({task.num_nodes} node{plural}):')
|
891
889
|
|
892
890
|
# Only print 1 row per cloud.
|
893
891
|
# The following code is to generate the table
|
sky/provision/instance_setup.py
CHANGED
@@ -15,9 +15,12 @@ from sky.provision import docker_utils
|
|
15
15
|
from sky.provision import logging as provision_logging
|
16
16
|
from sky.provision import metadata_utils
|
17
17
|
from sky.skylet import constants
|
18
|
+
from sky.usage import constants as usage_constants
|
19
|
+
from sky.usage import usage_lib
|
18
20
|
from sky.utils import accelerator_registry
|
19
21
|
from sky.utils import command_runner
|
20
22
|
from sky.utils import common_utils
|
23
|
+
from sky.utils import env_options
|
21
24
|
from sky.utils import subprocess_utils
|
22
25
|
from sky.utils import timeline
|
23
26
|
from sky.utils import ux_utils
|
@@ -67,6 +70,30 @@ MAYBE_SKYLET_RESTART_CMD = (f'{constants.ACTIVATE_SKY_REMOTE_PYTHON_ENV}; '
|
|
67
70
|
'sky.skylet.attempt_skylet;')
|
68
71
|
|
69
72
|
|
73
|
+
def _set_usage_run_id_cmd() -> str:
|
74
|
+
"""Gets the command to set the usage run id.
|
75
|
+
|
76
|
+
The command saves the current usage run id to the file, so that the skylet
|
77
|
+
can use it to report the heartbeat.
|
78
|
+
|
79
|
+
We use a function instead of a constant so that the usage run id is the
|
80
|
+
latest one when the function is called.
|
81
|
+
"""
|
82
|
+
return (
|
83
|
+
f'cat {usage_constants.USAGE_RUN_ID_FILE} || '
|
84
|
+
# The run id is retrieved locally for the current run, so that the
|
85
|
+
# remote cluster will be set with the same run id as the initial
|
86
|
+
# launch operation.
|
87
|
+
f'echo "{usage_lib.messages.usage.run_id}" > '
|
88
|
+
f'{usage_constants.USAGE_RUN_ID_FILE}')
|
89
|
+
|
90
|
+
|
91
|
+
def _set_skypilot_env_var_cmd() -> str:
|
92
|
+
"""Sets the skypilot environment variables on the remote machine."""
|
93
|
+
env_vars = env_options.Options.all_options()
|
94
|
+
return '; '.join([f'export {k}={v}' for k, v in env_vars.items()])
|
95
|
+
|
96
|
+
|
70
97
|
def _auto_retry(should_retry: Callable[[Exception], bool] = lambda _: True):
|
71
98
|
"""Decorator that retries the function if it fails.
|
72
99
|
|
@@ -450,11 +477,17 @@ def start_skylet_on_head_node(cluster_name: str,
|
|
450
477
|
logger.info(f'Running command on head node: {MAYBE_SKYLET_RESTART_CMD}')
|
451
478
|
# We need to source bashrc for skylet to make sure the autostop event can
|
452
479
|
# access the path to the cloud CLIs.
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
480
|
+
set_usage_run_id_cmd = _set_usage_run_id_cmd()
|
481
|
+
# Set the skypilot environment variables, including the usage type, debug
|
482
|
+
# info, and other options.
|
483
|
+
set_skypilot_env_var_cmd = _set_skypilot_env_var_cmd()
|
484
|
+
returncode, stdout, stderr = head_runner.run(
|
485
|
+
f'{set_usage_run_id_cmd}; {set_skypilot_env_var_cmd}; '
|
486
|
+
f'{MAYBE_SKYLET_RESTART_CMD}',
|
487
|
+
stream_logs=False,
|
488
|
+
require_outputs=True,
|
489
|
+
log_path=log_path_abs,
|
490
|
+
source_bashrc=True)
|
458
491
|
if returncode:
|
459
492
|
raise RuntimeError('Failed to start skylet on the head node '
|
460
493
|
f'(exit code {returncode}). Error: '
|
@@ -2178,52 +2178,54 @@ def get_kubernetes_node_info(
|
|
2178
2178
|
|
2179
2179
|
lf, _ = detect_gpu_label_formatter(context)
|
2180
2180
|
if not lf:
|
2181
|
-
|
2181
|
+
label_keys = []
|
2182
2182
|
else:
|
2183
2183
|
label_keys = lf.get_label_keys()
|
2184
2184
|
|
2185
2185
|
node_info_dict: Dict[str, KubernetesNodeInfo] = {}
|
2186
2186
|
|
2187
|
-
for
|
2188
|
-
|
2189
|
-
|
2187
|
+
for node in nodes:
|
2188
|
+
accelerator_name = None
|
2189
|
+
# Determine the accelerator name from the node labels and pick the
|
2190
|
+
# first one found. We assume that the node has only one accelerator type
|
2191
|
+
# (e.g., either GPU or TPU).
|
2192
|
+
for label_key in label_keys:
|
2190
2193
|
if lf is not None and label_key in node.metadata.labels:
|
2191
2194
|
accelerator_name = lf.get_accelerator_from_label_value(
|
2192
2195
|
node.metadata.labels.get(label_key))
|
2193
|
-
|
2194
|
-
accelerator_name = None
|
2196
|
+
break
|
2195
2197
|
|
2196
|
-
|
2197
|
-
|
2198
|
+
allocated_qty = 0
|
2199
|
+
accelerator_count = get_node_accelerator_count(node.status.allocatable)
|
2198
2200
|
|
2199
|
-
|
2200
|
-
|
2201
|
+
if pods is None:
|
2202
|
+
accelerators_available = -1
|
2201
2203
|
|
2202
|
-
|
2203
|
-
|
2204
|
-
|
2205
|
-
|
2206
|
-
|
2207
|
-
|
2208
|
-
|
2209
|
-
|
2210
|
-
|
2211
|
-
|
2212
|
-
|
2213
|
-
|
2214
|
-
|
2215
|
-
|
2216
|
-
|
2217
|
-
|
2218
|
-
|
2219
|
-
|
2220
|
-
|
2204
|
+
else:
|
2205
|
+
for pod in pods:
|
2206
|
+
# Get all the pods running on the node
|
2207
|
+
if (pod.spec.node_name == node.metadata.name and
|
2208
|
+
pod.status.phase in ['Running', 'Pending']):
|
2209
|
+
# Iterate over all the containers in the pod and sum the
|
2210
|
+
# GPU requests
|
2211
|
+
for container in pod.spec.containers:
|
2212
|
+
if container.resources.requests:
|
2213
|
+
allocated_qty += get_node_accelerator_count(
|
2214
|
+
container.resources.requests)
|
2215
|
+
|
2216
|
+
accelerators_available = accelerator_count - allocated_qty
|
2217
|
+
|
2218
|
+
# Exclude multi-host TPUs from being processed.
|
2219
|
+
# TODO(Doyoung): Remove the logic when adding support for
|
2220
|
+
# multi-host TPUs.
|
2221
|
+
if is_multi_host_tpu(node.metadata.labels):
|
2222
|
+
continue
|
2221
2223
|
|
2222
|
-
|
2223
|
-
|
2224
|
-
|
2225
|
-
|
2226
|
-
|
2224
|
+
node_info_dict[node.metadata.name] = KubernetesNodeInfo(
|
2225
|
+
name=node.metadata.name,
|
2226
|
+
accelerator_type=accelerator_name,
|
2227
|
+
total={'accelerator_count': int(accelerator_count)},
|
2228
|
+
free={'accelerators_available': int(accelerators_available)})
|
2227
2229
|
|
2228
2230
|
return node_info_dict
|
2229
2231
|
|
sky/provision/provisioner.py
CHANGED
@@ -450,6 +450,13 @@ def _post_provision_setup(
|
|
450
450
|
logger.info(f'{indent_str}{colorama.Style.DIM}{vm_str}{plural} {verb} '
|
451
451
|
f'up.{colorama.Style.RESET_ALL}')
|
452
452
|
|
453
|
+
# It's promised by the cluster config that docker_config does not
|
454
|
+
# exist for docker-native clouds, i.e. they provide docker containers
|
455
|
+
# instead of full VMs, like Kubernetes and RunPod, as it requires some
|
456
|
+
# special handlings to run docker inside their docker virtualization.
|
457
|
+
# For their Docker image settings, we do them when provisioning the
|
458
|
+
# cluster. See provision/{cloud}/instance.py:get_cluster_info for more
|
459
|
+
# details.
|
453
460
|
if docker_config:
|
454
461
|
status.update(
|
455
462
|
ux_utils.spinner_message(
|
sky/provision/runpod/utils.py
CHANGED
@@ -186,7 +186,7 @@ def delete_pod_template(template_name: str) -> None:
|
|
186
186
|
runpod.runpod.api.graphql.run_graphql_query(
|
187
187
|
f'mutation {{deleteTemplate(templateName: "{template_name}")}}')
|
188
188
|
except runpod.runpod.error.QueryError as e:
|
189
|
-
logger.warning(f'Failed to delete template {template_name}: {e}'
|
189
|
+
logger.warning(f'Failed to delete template {template_name}: {e} '
|
190
190
|
'Please delete it manually.')
|
191
191
|
|
192
192
|
|
@@ -195,8 +195,9 @@ def delete_register_auth(registry_auth_id: str) -> None:
|
|
195
195
|
try:
|
196
196
|
runpod.runpod.delete_container_registry_auth(registry_auth_id)
|
197
197
|
except runpod.runpod.error.QueryError as e:
|
198
|
-
logger.warning(
|
199
|
-
|
198
|
+
logger.warning(
|
199
|
+
f'Failed to delete registry auth {registry_auth_id}: {e} '
|
200
|
+
'Please delete it manually.')
|
200
201
|
|
201
202
|
|
202
203
|
def _create_template_for_docker_login(
|
sky/resources.py
CHANGED
@@ -67,6 +67,7 @@ class Resources:
|
|
67
67
|
# Internal use only.
|
68
68
|
# pylint: disable=invalid-name
|
69
69
|
_docker_login_config: Optional[docker_utils.DockerLoginConfig] = None,
|
70
|
+
_docker_username_for_runpod: Optional[str] = None,
|
70
71
|
_is_image_managed: Optional[bool] = None,
|
71
72
|
_requires_fuse: Optional[bool] = None,
|
72
73
|
_cluster_config_overrides: Optional[Dict[str, Any]] = None,
|
@@ -148,6 +149,9 @@ class Resources:
|
|
148
149
|
_docker_login_config: the docker configuration to use. This includes
|
149
150
|
the docker username, password, and registry server. If None, skip
|
150
151
|
docker login.
|
152
|
+
_docker_username_for_runpod: the login username for the docker
|
153
|
+
containers. This is used by RunPod to set the ssh user for the
|
154
|
+
docker containers.
|
151
155
|
_requires_fuse: whether the task requires FUSE mounting support. This
|
152
156
|
is used internally by certain cloud implementations to do additional
|
153
157
|
setup for FUSE mounting. This flag also safeguards against using
|
@@ -234,6 +238,12 @@ class Resources:
|
|
234
238
|
|
235
239
|
self._docker_login_config = _docker_login_config
|
236
240
|
|
241
|
+
# TODO(andyl): This ctor param seems to be unused.
|
242
|
+
# We always use `Task.set_resources` and `Resources.copy` to set the
|
243
|
+
# `docker_username_for_runpod`. But to keep the consistency with
|
244
|
+
# `_docker_login_config`, we keep it here.
|
245
|
+
self._docker_username_for_runpod = _docker_username_for_runpod
|
246
|
+
|
237
247
|
self._requires_fuse = _requires_fuse
|
238
248
|
|
239
249
|
self._cluster_config_overrides = _cluster_config_overrides
|
@@ -479,6 +489,10 @@ class Resources:
|
|
479
489
|
def requires_fuse(self, value: Optional[bool]) -> None:
|
480
490
|
self._requires_fuse = value
|
481
491
|
|
492
|
+
@property
|
493
|
+
def docker_username_for_runpod(self) -> Optional[str]:
|
494
|
+
return self._docker_username_for_runpod
|
495
|
+
|
482
496
|
def _set_cpus(
|
483
497
|
self,
|
484
498
|
cpus: Union[None, int, float, str],
|
@@ -1065,6 +1079,10 @@ class Resources:
|
|
1065
1079
|
cloud_specific_variables = self.cloud.make_deploy_resources_variables(
|
1066
1080
|
self, cluster_name, region, zones, num_nodes, dryrun)
|
1067
1081
|
|
1082
|
+
# TODO(andyl): Should we print some warnings if users' envs share
|
1083
|
+
# same names with the cloud specific variables, but not enabled
|
1084
|
+
# since it's not on the particular cloud?
|
1085
|
+
|
1068
1086
|
# Docker run options
|
1069
1087
|
docker_run_options = skypilot_config.get_nested(
|
1070
1088
|
('docker', 'run_options'),
|
@@ -1277,6 +1295,9 @@ class Resources:
|
|
1277
1295
|
labels=override.pop('labels', self.labels),
|
1278
1296
|
_docker_login_config=override.pop('_docker_login_config',
|
1279
1297
|
self._docker_login_config),
|
1298
|
+
_docker_username_for_runpod=override.pop(
|
1299
|
+
'_docker_username_for_runpod',
|
1300
|
+
self._docker_username_for_runpod),
|
1280
1301
|
_is_image_managed=override.pop('_is_image_managed',
|
1281
1302
|
self._is_image_managed),
|
1282
1303
|
_requires_fuse=override.pop('_requires_fuse', self._requires_fuse),
|
@@ -1438,6 +1459,8 @@ class Resources:
|
|
1438
1459
|
resources_fields['labels'] = config.pop('labels', None)
|
1439
1460
|
resources_fields['_docker_login_config'] = config.pop(
|
1440
1461
|
'_docker_login_config', None)
|
1462
|
+
resources_fields['_docker_username_for_runpod'] = config.pop(
|
1463
|
+
'_docker_username_for_runpod', None)
|
1441
1464
|
resources_fields['_is_image_managed'] = config.pop(
|
1442
1465
|
'_is_image_managed', None)
|
1443
1466
|
resources_fields['_requires_fuse'] = config.pop('_requires_fuse', None)
|
@@ -1486,6 +1509,9 @@ class Resources:
|
|
1486
1509
|
if self._docker_login_config is not None:
|
1487
1510
|
config['_docker_login_config'] = dataclasses.asdict(
|
1488
1511
|
self._docker_login_config)
|
1512
|
+
if self._docker_username_for_runpod is not None:
|
1513
|
+
config['_docker_username_for_runpod'] = (
|
1514
|
+
self._docker_username_for_runpod)
|
1489
1515
|
add_if_not_none('_cluster_config_overrides',
|
1490
1516
|
self._cluster_config_overrides)
|
1491
1517
|
if self._is_image_managed is not None:
|
sky/skylet/constants.py
CHANGED
@@ -110,6 +110,8 @@ DOCKER_LOGIN_ENV_VARS = {
|
|
110
110
|
DOCKER_SERVER_ENV_VAR,
|
111
111
|
}
|
112
112
|
|
113
|
+
RUNPOD_DOCKER_USERNAME_ENV_VAR = 'SKYPILOT_RUNPOD_DOCKER_USERNAME'
|
114
|
+
|
113
115
|
# Commands for disable GPU ECC, which can improve the performance of the GPU
|
114
116
|
# for some workloads by 30%. This will only be applied when a user specify
|
115
117
|
# `nvidia_gpus.disable_ecc: true` in ~/.sky/config.yaml.
|
sky/skylet/events.py
CHANGED
@@ -20,6 +20,7 @@ from sky.serve import serve_utils
|
|
20
20
|
from sky.skylet import autostop_lib
|
21
21
|
from sky.skylet import constants
|
22
22
|
from sky.skylet import job_lib
|
23
|
+
from sky.usage import usage_lib
|
23
24
|
from sky.utils import cluster_yaml_utils
|
24
25
|
from sky.utils import common_utils
|
25
26
|
from sky.utils import ux_utils
|
@@ -90,6 +91,14 @@ class ServiceUpdateEvent(SkyletEvent):
|
|
90
91
|
serve_utils.update_service_status()
|
91
92
|
|
92
93
|
|
94
|
+
class UsageHeartbeatReportEvent(SkyletEvent):
|
95
|
+
"""Skylet event for reporting usage."""
|
96
|
+
EVENT_INTERVAL_SECONDS = 600
|
97
|
+
|
98
|
+
def _run(self):
|
99
|
+
usage_lib.send_heartbeat(interval_seconds=self.EVENT_INTERVAL_SECONDS)
|
100
|
+
|
101
|
+
|
93
102
|
class AutostopEvent(SkyletEvent):
|
94
103
|
"""Skylet event for autostop.
|
95
104
|
|
sky/skylet/skylet.py
CHANGED
sky/task.py
CHANGED
@@ -121,6 +121,9 @@ def _check_docker_login_config(task_envs: Dict[str, str]) -> bool:
|
|
121
121
|
|
122
122
|
If any of the docker login env vars is set, all of them must be set.
|
123
123
|
|
124
|
+
Returns:
|
125
|
+
True if there is a valid docker login config in task_envs.
|
126
|
+
False otherwise.
|
124
127
|
Raises:
|
125
128
|
ValueError: if any of the docker login env vars is set, but not all of
|
126
129
|
them are set.
|
@@ -168,6 +171,23 @@ def _with_docker_login_config(
|
|
168
171
|
return type(resources)(new_resources)
|
169
172
|
|
170
173
|
|
174
|
+
def _with_docker_username_for_runpod(
|
175
|
+
resources: Union[Set['resources_lib.Resources'],
|
176
|
+
List['resources_lib.Resources']],
|
177
|
+
task_envs: Dict[str, str],
|
178
|
+
) -> Union[Set['resources_lib.Resources'], List['resources_lib.Resources']]:
|
179
|
+
docker_username_for_runpod = task_envs.get(
|
180
|
+
constants.RUNPOD_DOCKER_USERNAME_ENV_VAR)
|
181
|
+
|
182
|
+
# We should not call r.copy() if docker_username_for_runpod is None,
|
183
|
+
# to prevent `DummyResources` instance becoming a `Resources` instance.
|
184
|
+
if docker_username_for_runpod is None:
|
185
|
+
return resources
|
186
|
+
return (type(resources)(
|
187
|
+
r.copy(_docker_username_for_runpod=docker_username_for_runpod)
|
188
|
+
for r in resources))
|
189
|
+
|
190
|
+
|
171
191
|
class Task:
|
172
192
|
"""Task: a computation to be run on the cloud."""
|
173
193
|
|
@@ -582,6 +602,8 @@ class Task:
|
|
582
602
|
if _check_docker_login_config(self._envs):
|
583
603
|
self.resources = _with_docker_login_config(self.resources,
|
584
604
|
self._envs)
|
605
|
+
self.resources = _with_docker_username_for_runpod(
|
606
|
+
self.resources, self._envs)
|
585
607
|
return self
|
586
608
|
|
587
609
|
@property
|
@@ -647,6 +669,9 @@ class Task:
|
|
647
669
|
resources = {resources}
|
648
670
|
# TODO(woosuk): Check if the resources are None.
|
649
671
|
self.resources = _with_docker_login_config(resources, self.envs)
|
672
|
+
# Only have effect on RunPod.
|
673
|
+
self.resources = _with_docker_username_for_runpod(
|
674
|
+
self.resources, self.envs)
|
650
675
|
|
651
676
|
# Evaluate if the task requires FUSE and set the requires_fuse flag
|
652
677
|
for _, storage_obj in self.storage_mounts.items():
|
@@ -373,15 +373,16 @@ available_node_types:
|
|
373
373
|
done;
|
374
374
|
if [ ! -z "$INSTALL_FIRST" ]; then
|
375
375
|
echo "Installing core packages: $INSTALL_FIRST";
|
376
|
-
DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $INSTALL_FIRST;
|
376
|
+
DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" $INSTALL_FIRST;
|
377
377
|
fi;
|
378
378
|
# SSH and other packages are not necessary, so we disable set -e
|
379
379
|
set +e
|
380
380
|
|
381
381
|
if [ ! -z "$MISSING_PACKAGES" ]; then
|
382
382
|
echo "Installing missing packages: $MISSING_PACKAGES";
|
383
|
-
DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $MISSING_PACKAGES;
|
383
|
+
DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" $MISSING_PACKAGES;
|
384
384
|
fi;
|
385
|
+
|
385
386
|
$(prefix_cmd) mkdir -p /var/run/sshd;
|
386
387
|
$(prefix_cmd) sed -i "s/PermitRootLogin prohibit-password/PermitRootLogin yes/" /etc/ssh/sshd_config;
|
387
388
|
$(prefix_cmd) sed "s@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g" -i /etc/pam.d/sshd;
|
sky/templates/runpod-ray.yml.j2
CHANGED
sky/usage/constants.py
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
LOG_URL = 'http://usage.skypilot.co:9090/loki/api/v1/push' # pylint: disable=line-too-long
|
4
4
|
|
5
5
|
USAGE_MESSAGE_SCHEMA_VERSION = 1
|
6
|
-
|
7
6
|
PRIVACY_POLICY_PATH = '~/.sky/privacy_policy'
|
8
7
|
|
9
8
|
USAGE_POLICY_MESSAGE = (
|
@@ -15,3 +14,5 @@ USAGE_POLICY_MESSAGE = (
|
|
15
14
|
|
16
15
|
USAGE_MESSAGE_REDACT_KEYS = ['setup', 'run', 'envs']
|
17
16
|
USAGE_MESSAGE_REDACT_TYPES = {str, dict}
|
17
|
+
|
18
|
+
USAGE_RUN_ID_FILE = '~/.sky/usage_run_id'
|
sky/usage/usage_lib.py
CHANGED
@@ -44,6 +44,7 @@ def _get_current_timestamp_ns() -> int:
|
|
44
44
|
class MessageType(enum.Enum):
|
45
45
|
"""Types for messages to be sent to Loki."""
|
46
46
|
USAGE = 'usage'
|
47
|
+
HEARTBEAT = 'heartbeat'
|
47
48
|
# TODO(zhwu): Add more types, e.g., cluster_lifecycle.
|
48
49
|
|
49
50
|
|
@@ -67,8 +68,9 @@ class MessageToReport:
|
|
67
68
|
properties = self.__dict__.copy()
|
68
69
|
return {k: v for k, v in properties.items() if not k.startswith('_')}
|
69
70
|
|
70
|
-
def __repr__(self):
|
71
|
-
|
71
|
+
def __repr__(self) -> str:
|
72
|
+
d = self.get_properties()
|
73
|
+
return json.dumps(d)
|
72
74
|
|
73
75
|
|
74
76
|
class UsageMessageToReport(MessageToReport):
|
@@ -160,10 +162,6 @@ class UsageMessageToReport(MessageToReport):
|
|
160
162
|
self.exception: Optional[str] = None # entrypoint_context
|
161
163
|
self.stacktrace: Optional[str] = None # entrypoint_context
|
162
164
|
|
163
|
-
def __repr__(self) -> str:
|
164
|
-
d = self.get_properties()
|
165
|
-
return json.dumps(d)
|
166
|
-
|
167
165
|
def update_entrypoint(self, msg: str):
|
168
166
|
self.entrypoint = msg
|
169
167
|
|
@@ -275,16 +273,43 @@ class UsageMessageToReport(MessageToReport):
|
|
275
273
|
name_or_fn)
|
276
274
|
|
277
275
|
|
276
|
+
class HeartbeatMessageToReport(MessageToReport):
|
277
|
+
"""Message to be reported to Grafana Loki for heartbeat on a cluster."""
|
278
|
+
|
279
|
+
def __init__(self, interval_seconds: int = 600):
|
280
|
+
super().__init__(constants.USAGE_MESSAGE_SCHEMA_VERSION)
|
281
|
+
# This interval_seconds is mainly for recording the heartbeat interval
|
282
|
+
# in the heartbeat message, so that the collector can use it.
|
283
|
+
self.interval_seconds = interval_seconds
|
284
|
+
|
285
|
+
def get_properties(self) -> Dict[str, Any]:
|
286
|
+
properties = super().get_properties()
|
287
|
+
# The run id is set by the skylet, which will always be the same for
|
288
|
+
# the entire lifetime of the run.
|
289
|
+
with open(os.path.expanduser(constants.USAGE_RUN_ID_FILE),
|
290
|
+
'r',
|
291
|
+
encoding='utf-8') as f:
|
292
|
+
properties['run_id'] = f.read().strip()
|
293
|
+
return properties
|
294
|
+
|
295
|
+
|
278
296
|
class MessageCollection:
|
279
297
|
"""A collection of messages."""
|
280
298
|
|
281
299
|
def __init__(self):
|
282
|
-
self._messages = {
|
300
|
+
self._messages = {
|
301
|
+
MessageType.USAGE: UsageMessageToReport(),
|
302
|
+
MessageType.HEARTBEAT: HeartbeatMessageToReport()
|
303
|
+
}
|
283
304
|
|
284
305
|
@property
|
285
|
-
def usage(self):
|
306
|
+
def usage(self) -> UsageMessageToReport:
|
286
307
|
return self._messages[MessageType.USAGE]
|
287
308
|
|
309
|
+
@property
|
310
|
+
def heartbeat(self) -> HeartbeatMessageToReport:
|
311
|
+
return self._messages[MessageType.HEARTBEAT]
|
312
|
+
|
288
313
|
def reset(self, message_type: MessageType):
|
289
314
|
self._messages[message_type] = self._messages[message_type].__class__()
|
290
315
|
|
@@ -308,13 +333,25 @@ def _send_to_loki(message_type: MessageType):
|
|
308
333
|
|
309
334
|
message = messages[message_type]
|
310
335
|
|
336
|
+
# In case the message has no start time, set it to the current time.
|
337
|
+
message.start()
|
311
338
|
message.send_time = _get_current_timestamp_ns()
|
312
|
-
|
339
|
+
# Use send time instead of start time to avoid the message being dropped
|
340
|
+
# by Loki, due to the timestamp being too old. We still have the start time
|
341
|
+
# in the message for dashboard.
|
342
|
+
log_timestamp = message.send_time
|
313
343
|
|
314
344
|
environment = 'prod'
|
315
345
|
if env_options.Options.IS_DEVELOPER.get():
|
316
346
|
environment = 'dev'
|
317
|
-
prom_labels = {
|
347
|
+
prom_labels = {
|
348
|
+
'type': message_type.value,
|
349
|
+
'environment': environment,
|
350
|
+
'schema_version': message.schema_version,
|
351
|
+
}
|
352
|
+
if message_type == MessageType.USAGE:
|
353
|
+
prom_labels['new_cluster'] = (message.original_cluster_status != 'UP'
|
354
|
+
and message.final_cluster_status == 'UP')
|
318
355
|
|
319
356
|
headers = {'Content-type': 'application/json'}
|
320
357
|
payload = {
|
@@ -392,7 +429,7 @@ def prepare_json_from_yaml_config(
|
|
392
429
|
def _send_local_messages():
|
393
430
|
"""Send all messages not been uploaded to Loki."""
|
394
431
|
for msg_type, message in messages.items():
|
395
|
-
if not message.message_sent:
|
432
|
+
if not message.message_sent and msg_type != MessageType.HEARTBEAT:
|
396
433
|
# Avoid the fallback entrypoint to send the message again
|
397
434
|
# in normal case.
|
398
435
|
try:
|
@@ -402,6 +439,11 @@ def _send_local_messages():
|
|
402
439
|
f'exception caught: {type(e)}({e})')
|
403
440
|
|
404
441
|
|
442
|
+
def send_heartbeat(interval_seconds: int = 600):
|
443
|
+
messages.heartbeat.interval_seconds = interval_seconds
|
444
|
+
_send_to_loki(MessageType.HEARTBEAT)
|
445
|
+
|
446
|
+
|
405
447
|
@contextlib.contextmanager
|
406
448
|
def entrypoint_context(name: str, fallback: bool = False):
|
407
449
|
"""Context manager for entrypoint.
|
sky/utils/env_options.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Global environment options for sky."""
|
2
2
|
import enum
|
3
3
|
import os
|
4
|
+
from typing import Dict
|
4
5
|
|
5
6
|
|
6
7
|
class Options(enum.Enum):
|
@@ -35,3 +36,8 @@ class Options(enum.Enum):
|
|
35
36
|
def env_key(self) -> str:
|
36
37
|
"""The environment variable key name."""
|
37
38
|
return self.value[0]
|
39
|
+
|
40
|
+
@classmethod
|
41
|
+
def all_options(cls) -> Dict[str, bool]:
|
42
|
+
"""Returns all options as a dictionary."""
|
43
|
+
return {option.env_key: option.get() for option in list(Options)}
|
{skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/RECORD
RENAMED
@@ -1,20 +1,20 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=ulWt-DtbuSpxFnOQtMOqMTPqAYFd6WgWd_T-bxS7_QM,5560
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=MNc9uHnvQ1EsEl8SsrYcYCGbxcnDbR6gaRCXVNd5RZE,22338
|
4
4
|
sky/check.py,sha256=xzLlxUkBCrzpOho8lw65EvKLPl_b9lA2nteF5MSYbDQ,10885
|
5
|
-
sky/cli.py,sha256=
|
5
|
+
sky/cli.py,sha256=_Q-XlsLN73e8BJilClajL7VOG8vINVJ_xRjENOpJdDA,213928
|
6
6
|
sky/cloud_stores.py,sha256=PcLT57_8SZy7o6paAluElfBynaLkbaOq3l-8dNg1AVM,23672
|
7
7
|
sky/core.py,sha256=fE1rn4Ku94S0XmWTO5-6t6eT6aaJImNczRqEnTe8v7Q,38742
|
8
8
|
sky/dag.py,sha256=f3sJlkH4bE6Uuz3ozNtsMhcBpRx7KmC9Sa4seDKt4hU,3104
|
9
9
|
sky/exceptions.py,sha256=SEhRubPlk-crkflPC5P_Z085iLrSd3UScYwc790QwYw,9378
|
10
|
-
sky/execution.py,sha256=
|
10
|
+
sky/execution.py,sha256=vNUE9Z8hCSQeil7h3kdote2r6nkbrGXSqqmK6ru594Q,28453
|
11
11
|
sky/global_user_state.py,sha256=cTwltMCDIIBaapuGgARxFwpDJDCiKKyVW-PP_qtWuCA,30241
|
12
|
-
sky/optimizer.py,sha256=
|
13
|
-
sky/resources.py,sha256=
|
12
|
+
sky/optimizer.py,sha256=H5cpKELOQmnFpox0QXMB4P7jGhJxzXog4Ht_TYJaGuA,59758
|
13
|
+
sky/resources.py,sha256=W7VO5nTizr-KIhOamOs7oSwmBGLjQZhQM6DoYbiAOsg,71648
|
14
14
|
sky/sky_logging.py,sha256=7Zk9mL1TDxFkGsy3INMBKYlqsbognVGSMzAsHZdZlhw,5891
|
15
15
|
sky/skypilot_config.py,sha256=FN93hSG-heQCHBnemlIK2TwrJngKbpx4vMXNUzPIzV8,9087
|
16
16
|
sky/status_lib.py,sha256=J7Jb4_Dz0v2T64ttOdyUgpokvl4S0sBJrMfH7Fvo51A,1457
|
17
|
-
sky/task.py,sha256=
|
17
|
+
sky/task.py,sha256=Z74bBkOx1bFmGMoQRD3qbYxIZ5qi2AC5htY1KIsmvT0,52394
|
18
18
|
sky/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
sky/adaptors/aws.py,sha256=FNNC8B-iqSSiCLPiWGK_PLm1R-Kt4yI5JPIpdE0QJxQ,7565
|
20
20
|
sky/adaptors/azure.py,sha256=yjM8nAPW-mlSXfmA8OmJNnSIrZ9lQx2-GxiI-TIVrwE,21910
|
@@ -51,11 +51,11 @@ sky/clouds/do.py,sha256=od4gMTrs2W5IkaDDr_oexOSdIOqn94vKq2U_QZcrpRk,11311
|
|
51
51
|
sky/clouds/fluidstack.py,sha256=u2I6jXEtTqgqRWi2EafMsKqc8VkUq1cR6CSDUvk72_U,12407
|
52
52
|
sky/clouds/gcp.py,sha256=6QOnefFsYiLCcnajjduLHsayqJ641bBu42jPTpvy7Mc,55007
|
53
53
|
sky/clouds/ibm.py,sha256=0ArRTQx1_DpTNGByFhukzFedEDzmVjBsGiiques1bQ0,21447
|
54
|
-
sky/clouds/kubernetes.py,sha256=
|
54
|
+
sky/clouds/kubernetes.py,sha256=ocf8ZUlMbOcPb-n8JrSFix9rH75g089sC1JAd84JUXQ,31653
|
55
55
|
sky/clouds/lambda_cloud.py,sha256=42AmcN2X_wdBMuAw606nR_pQCBAy5QFiAo711_WRqDE,12672
|
56
56
|
sky/clouds/oci.py,sha256=VpPxpMJv52QePVdwdK9EuiMyqjp70dk8_rgUVv5Y-2w,27028
|
57
57
|
sky/clouds/paperspace.py,sha256=F0Sj1RcqRb5fPjrr8qbdeY-JdfAHcRPc902pZOi4erw,10889
|
58
|
-
sky/clouds/runpod.py,sha256=
|
58
|
+
sky/clouds/runpod.py,sha256=4gZTbUO4I8bzio5x9Km42ZIujZzCx2eszdkN0Mz5fqE,11893
|
59
59
|
sky/clouds/scp.py,sha256=JHyMqkAAqr9lJq79IVjj3rU1g-ZCCGLZTJEzIhYsw7c,15845
|
60
60
|
sky/clouds/vast.py,sha256=vQV489qkZMfDtt_SnXParPY49gkgKx5LZAEOsk65kIo,11231
|
61
61
|
sky/clouds/vsphere.py,sha256=rrNf6_uHy4ukjHwaN35XVh2-Xj9k43-QGQkiEXyHYJk,12273
|
@@ -85,7 +85,7 @@ sky/clouds/service_catalog/data_fetchers/fetch_azure.py,sha256=7YVnoGDGGZI2TK02b
|
|
85
85
|
sky/clouds/service_catalog/data_fetchers/fetch_cudo.py,sha256=52P48lvWN0s1ArjeLPeLemPRpxjSRcHincRle0nqdm4,3440
|
86
86
|
sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py,sha256=yKuAFbjBRNz_e2RNNDT_aHHAuKQ86Ac7GKgIie5O6Pg,7273
|
87
87
|
sky/clouds/service_catalog/data_fetchers/fetch_gcp.py,sha256=HLxdxA9DMSi19mgpVM_cERV4o-xh_tJ9vmkGm1wOaIE,30868
|
88
|
-
sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py,sha256=
|
88
|
+
sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py,sha256=Bi5ta91p4SkFCoaEJUPKPjDB0FZ24DMR4NcKFpKCkxU,4979
|
89
89
|
sky/clouds/service_catalog/data_fetchers/fetch_vast.py,sha256=zR9icM3ty5C8tGw13pQbsBtQQMgG4kl1j_jSGqqrgOA,4741
|
90
90
|
sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py,sha256=Opp2r3KSzXPtwk3lKNbO8IX9QzjoRSwy1kW3jPjtS1c,21453
|
91
91
|
sky/clouds/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -115,10 +115,10 @@ sky/provision/__init__.py,sha256=hb_z69_7-FH1I8aDpFKNj2x_a8spzceWcovklutNgP8,637
|
|
115
115
|
sky/provision/common.py,sha256=E8AlSUFcn0FYQq1erNmoVfMAdsF9tP2yxfyk-9PLvQU,10286
|
116
116
|
sky/provision/constants.py,sha256=oc_XDUkcoLQ_lwDy5yMeMSWviKS0j0s1c0pjlvpNeWY,800
|
117
117
|
sky/provision/docker_utils.py,sha256=ENm0LkyrYWic3Ikyacho8X5uDMvGsbkZQsb6kNH1DuI,19629
|
118
|
-
sky/provision/instance_setup.py,sha256=
|
118
|
+
sky/provision/instance_setup.py,sha256=YBFOwZQLBzpUjYoVQcX0KItej1rCBRWM23Dw9lg_q24,24386
|
119
119
|
sky/provision/logging.py,sha256=yZWgejrFBhhRjAtvFu5N5bRXIMK5TuwNjp1vKQqz2pw,2103
|
120
120
|
sky/provision/metadata_utils.py,sha256=LrxeV4wD2QPzNdXV_npj8q-pr35FatxBBjF_jSbpOT0,4013
|
121
|
-
sky/provision/provisioner.py,sha256=
|
121
|
+
sky/provision/provisioner.py,sha256=UuMO2wp98Nk1nue25TtIaMU79Aj14v9IoIAFO_CUy9w,29636
|
122
122
|
sky/provision/aws/__init__.py,sha256=mxq8PeWJqUtalDozTNpbtENErRZ1ktEs8uf2aG9UUgU,731
|
123
123
|
sky/provision/aws/config.py,sha256=-4mr5uxgsl_8eLm_4DfP8JurZGSysGuY0iDeBTHnX5Q,25943
|
124
124
|
sky/provision/aws/instance.py,sha256=3-R8ohuN8ooNh2Fqqb7-c4vNFy1xsw2GQF4PHg3APhE,40843
|
@@ -153,7 +153,7 @@ sky/provision/kubernetes/config.py,sha256=bXwOGdSAnXCkDreew0KsSUqSv3ZrptNeevqat7
|
|
153
153
|
sky/provision/kubernetes/instance.py,sha256=AQikdRgNklpeMgiEd4w2Hh7kGssVABsy0aCh9xsKi5Y,50313
|
154
154
|
sky/provision/kubernetes/network.py,sha256=EpNjRQ131CXepqbdkoRKFu4szVrm0oKEpv1l8EgOkjU,12364
|
155
155
|
sky/provision/kubernetes/network_utils.py,sha256=52BZY_5ynCH6IXlivKObYyAHDgQCJyAJIjmM7J4MpFo,11393
|
156
|
-
sky/provision/kubernetes/utils.py,sha256=
|
156
|
+
sky/provision/kubernetes/utils.py,sha256=swOe6ozgSoucDtoJCExs0HLLWYuoi5HkIGMMSp7fEzc,109962
|
157
157
|
sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml,sha256=AMzYzlY0JIlfBWj5eX054Rc1XDW2thUcLSOGMJVhIdA,229
|
158
158
|
sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml,sha256=RtTq4F1QUmR2Uunb6zuuRaPhV7hpesz4saHjn3Ncsb4,2010
|
159
159
|
sky/provision/lambda_cloud/__init__.py,sha256=6EEvSgtUeEiup9ivIFevHmgv0GqleroO2X0K7TRa2nE,612
|
@@ -172,7 +172,7 @@ sky/provision/paperspace/utils.py,sha256=uOmxbDKjV6skFizC4gYXSxRuEqso5ck2kF7MbtN
|
|
172
172
|
sky/provision/runpod/__init__.py,sha256=6HYvHI27EaLrX1SS0vWVhdLu5HDBeZCdvAeDJuwM5pk,556
|
173
173
|
sky/provision/runpod/config.py,sha256=9ulZJVL7nHuxhTdoj8D7lNn7SdicJ5zc6FIcHIG9tcg,321
|
174
174
|
sky/provision/runpod/instance.py,sha256=FNalpTQMvnmACdtFsGvVPVhEkzdqrlmd_pExn33bIQ8,10358
|
175
|
-
sky/provision/runpod/utils.py,sha256=
|
175
|
+
sky/provision/runpod/utils.py,sha256=F3zsxPV3IY5C73J-zNZEfcXL8bsZBgNWzlmtSO6qOqI,12425
|
176
176
|
sky/provision/runpod/api/__init__.py,sha256=eJwjPeQZ5B7chf4-Bl4YeI2Uo9aLX4M1rr2NmPk89_E,112
|
177
177
|
sky/provision/runpod/api/commands.py,sha256=oh77PS0H0wZudHV8II9ceRuaFQ8FN4NJ4S3-6_PeqPM,4238
|
178
178
|
sky/provision/runpod/api/pods.py,sha256=GMwxgNr9NnHPfyh2Y9b8S_vLhrLY4h7LybFBBQNAyfw,4948
|
@@ -215,12 +215,12 @@ sky/skylet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
215
215
|
sky/skylet/attempt_skylet.py,sha256=GZ6ITjjA0m-da3IxXXfoHR6n4pjp3X3TOXUqVvSrV0k,2136
|
216
216
|
sky/skylet/autostop_lib.py,sha256=JPDHmByuhoNYXSUHl-OnyeJUkOFWn7gDM1FrS7Kr3E8,4478
|
217
217
|
sky/skylet/configs.py,sha256=UtnpmEL0F9hH6PSjhsps7xgjGZ6qzPOfW1p2yj9tSng,1887
|
218
|
-
sky/skylet/constants.py,sha256=
|
219
|
-
sky/skylet/events.py,sha256=
|
218
|
+
sky/skylet/constants.py,sha256=bTvojyYHLhUxMI-xD8d6Fj6snQiK4-PLw-baRFKCcfQ,16125
|
219
|
+
sky/skylet/events.py,sha256=__7bt6Z8q2W1vwTQv4yug-oAXDwSf8zBeRxb8HFM36U,12792
|
220
220
|
sky/skylet/job_lib.py,sha256=Rk-C069cusJIRXsks8xqCb016JSt7GlpU7LrpX0qFJk,42785
|
221
221
|
sky/skylet/log_lib.py,sha256=oFEBd85vDYFrIyyZKekH30yc4rRYILC0F0o-COQ64oE,20445
|
222
222
|
sky/skylet/log_lib.pyi,sha256=rRk4eUX0RHGs1QL9CXsJq6RE7FqqxZlfuPJOLXTvg7I,4453
|
223
|
-
sky/skylet/skylet.py,sha256=
|
223
|
+
sky/skylet/skylet.py,sha256=mWmqCvxSlfdVU_L8NL6P52jmCt3smd8K0HdyNBfMPeI,1234
|
224
224
|
sky/skylet/subprocess_daemon.py,sha256=gcL-_Hea7-SrBUyZfAbo40RBFbaeuBmPCW0dm4YYkPo,3537
|
225
225
|
sky/skylet/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
226
226
|
sky/skylet/providers/command_runner.py,sha256=DdBKP0QX325_N3zAVYwnmXmfbfXNqkzWQZpF9DSR7Go,16259
|
@@ -250,20 +250,20 @@ sky/templates/jobs-controller.yaml.j2,sha256=FfagMkhXZdUWR6HtJHJ3JEZzJy4eov5CQZH
|
|
250
250
|
sky/templates/kubernetes-ingress.yml.j2,sha256=73iDklVDWBMbItg0IexCa6_ClXPJOxw7PWz3leku4nE,1340
|
251
251
|
sky/templates/kubernetes-loadbalancer.yml.j2,sha256=IxrNYM366N01bbkJEbZ_UPYxUP8wyVEbRNFHRsBuLsw,626
|
252
252
|
sky/templates/kubernetes-port-forward-proxy-command.sh,sha256=iw7mypHszg6Ggq9MbyiYMFOkSlXaQZulaxqC5IWYGCc,3381
|
253
|
-
sky/templates/kubernetes-ray.yml.j2,sha256=
|
253
|
+
sky/templates/kubernetes-ray.yml.j2,sha256=x3Eq1ejG577E6eAZtJvpTlzXRCW5beMhqApV3J8BEZY,29019
|
254
254
|
sky/templates/kubernetes-ssh-jump.yml.j2,sha256=k5W5sOIMppU7dDkJMwPlqsUcb92y7L5_TVG3hkgMy8M,2747
|
255
255
|
sky/templates/lambda-ray.yml.j2,sha256=HyvO_tX2vxwSsc4IFVSqGuIbjLMk0bevP9bcxb8ZQII,4498
|
256
256
|
sky/templates/local-ray.yml.j2,sha256=FNHeyHF6nW9nU9QLIZceUWfvrFTTcO51KqhTnYCEFaA,1185
|
257
257
|
sky/templates/oci-ray.yml.j2,sha256=92dvXGaUd2Kwep9fgTjOsAPJiBLr8GQTjy7pFvuPAyE,4562
|
258
258
|
sky/templates/paperspace-ray.yml.j2,sha256=HQjZNamrB_a4fOMCxQXSVdV5JIHtbGtAE0JzEO8uuVQ,4021
|
259
|
-
sky/templates/runpod-ray.yml.j2,sha256=
|
259
|
+
sky/templates/runpod-ray.yml.j2,sha256=95392Jvk-PwrEHIF2C5i-EnaqC31nTq4UTYZuQxUe1k,4310
|
260
260
|
sky/templates/scp-ray.yml.j2,sha256=I9u8Ax-lit-d6UrCC9BVU8avst8w1cwK6TrzZBcz_JM,5608
|
261
261
|
sky/templates/sky-serve-controller.yaml.j2,sha256=W4i1-OGRU2WDvauLC4EDXcYrNxj7mzRFSvSqzAKfehc,2020
|
262
262
|
sky/templates/vast-ray.yml.j2,sha256=KaZLBJfI6FzAVRVq0NNM0_SN0RQUrDIehnJJ_LnvwnY,2990
|
263
263
|
sky/templates/vsphere-ray.yml.j2,sha256=cOQ-qdpxGA2FHajMMhTJI-SmlYzdPterX4Gsiq-nkb0,3587
|
264
264
|
sky/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
265
|
-
sky/usage/constants.py,sha256=
|
266
|
-
sky/usage/usage_lib.py,sha256=
|
265
|
+
sky/usage/constants.py,sha256=k7PQ-QP1p3tDgnzvy7QoxJjuTXWDUyVkbtPcIEvDsYM,632
|
266
|
+
sky/usage/usage_lib.py,sha256=jpRt-24WVxYyd-XJz3_lSHboUKmWy8x8lRvvO-JO68g,20026
|
267
267
|
sky/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
268
268
|
sky/utils/accelerator_registry.py,sha256=cpPS9_MahQPt0ev4qPT-qyGpe12YD78UNj_gAvt720Q,4052
|
269
269
|
sky/utils/admin_policy_utils.py,sha256=_Vt_jTTYCXmMdryj0vrrumFPewa93qHnzUqBDXjAhRU,5981
|
@@ -275,7 +275,7 @@ sky/utils/control_master_utils.py,sha256=90hnxiAUP20gbJ9e3MERh7rb04ZO_I3LsljNjR2
|
|
275
275
|
sky/utils/controller_utils.py,sha256=SUrhK46ouBH2rm7azfFLIWr-T9-voYAdiXl2z5fG4Qw,45948
|
276
276
|
sky/utils/dag_utils.py,sha256=l_0O3RUfe9OdQ9mtbhdlHpJVD4VAF_HQ3A75dgsYIjM,6099
|
277
277
|
sky/utils/db_utils.py,sha256=K2-OHPg0FeHCarevMdWe0IWzm6wWumViEeYeJuGoFUE,3747
|
278
|
-
sky/utils/env_options.py,sha256=
|
278
|
+
sky/utils/env_options.py,sha256=aaD6GoYK0LaZIqjOEZ-R7eccQuiRriW3EuLWtOI5En8,1578
|
279
279
|
sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
|
280
280
|
sky/utils/log_utils.py,sha256=AjkgSrk0GVOUbnnCEC2f4lsf2HOIXkZETCxR0BJw2-U,14152
|
281
281
|
sky/utils/resources_utils.py,sha256=06Kx6AfbBdwBYGmIYFEY_qm6OBc2a5esZMPvIX7gCvc,7787
|
@@ -298,9 +298,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
|
|
298
298
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
299
299
|
sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
|
300
300
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
|
301
|
-
skypilot_nightly-1.0.0.
|
302
|
-
skypilot_nightly-1.0.0.
|
303
|
-
skypilot_nightly-1.0.0.
|
304
|
-
skypilot_nightly-1.0.0.
|
305
|
-
skypilot_nightly-1.0.0.
|
306
|
-
skypilot_nightly-1.0.0.
|
301
|
+
skypilot_nightly-1.0.0.dev20250213.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
302
|
+
skypilot_nightly-1.0.0.dev20250213.dist-info/METADATA,sha256=hLWjPTEc6CnhdlKg2uvdj1VZgjmf3XNt0fJV3R61GTc,21397
|
303
|
+
skypilot_nightly-1.0.0.dev20250213.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
304
|
+
skypilot_nightly-1.0.0.dev20250213.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
305
|
+
skypilot_nightly-1.0.0.dev20250213.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
306
|
+
skypilot_nightly-1.0.0.dev20250213.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20250211.dist-info → skypilot_nightly-1.0.0.dev20250213.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|