skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/utils/resources_utils.py
CHANGED
@@ -1,14 +1,20 @@
|
|
1
1
|
"""Utility functions for resources."""
|
2
|
+
import dataclasses
|
2
3
|
import enum
|
3
4
|
import itertools
|
5
|
+
import json
|
6
|
+
import math
|
4
7
|
import re
|
5
8
|
import typing
|
6
|
-
from typing import List, Optional, Set
|
9
|
+
from typing import Dict, List, Optional, Set, Union
|
7
10
|
|
11
|
+
from sky import skypilot_config
|
12
|
+
from sky.utils import registry
|
8
13
|
from sky.utils import ux_utils
|
9
14
|
|
10
15
|
if typing.TYPE_CHECKING:
|
11
16
|
from sky import backends
|
17
|
+
from sky import resources as resources_lib
|
12
18
|
|
13
19
|
_PORT_RANGE_HINT_MSG = ('Invalid port range {}. Please use the format '
|
14
20
|
'"from-to", in which from <= to. e.g. "1-3".')
|
@@ -22,6 +28,7 @@ class DiskTier(enum.Enum):
|
|
22
28
|
LOW = 'low'
|
23
29
|
MEDIUM = 'medium'
|
24
30
|
HIGH = 'high'
|
31
|
+
ULTRA = 'ultra'
|
25
32
|
BEST = 'best'
|
26
33
|
|
27
34
|
@classmethod
|
@@ -43,6 +50,18 @@ class DiskTier(enum.Enum):
|
|
43
50
|
return types.index(self) <= types.index(other)
|
44
51
|
|
45
52
|
|
53
|
+
@dataclasses.dataclass
|
54
|
+
class ClusterName:
|
55
|
+
display_name: str
|
56
|
+
name_on_cloud: str
|
57
|
+
|
58
|
+
def __repr__(self) -> str:
|
59
|
+
return repr(self.display_name)
|
60
|
+
|
61
|
+
def __str__(self) -> str:
|
62
|
+
return self.display_name
|
63
|
+
|
64
|
+
|
46
65
|
def check_port_str(port: str) -> None:
|
47
66
|
if not port.isdigit():
|
48
67
|
with ux_utils.print_exception_no_traceback():
|
@@ -118,29 +137,82 @@ def simplify_ports(ports: List[str]) -> List[str]:
|
|
118
137
|
return port_set_to_ranges(port_ranges_to_set(ports))
|
119
138
|
|
120
139
|
|
140
|
+
def format_resource(resource: 'resources_lib.Resources',
|
141
|
+
simplify: bool = False) -> str:
|
142
|
+
if simplify:
|
143
|
+
cloud = resource.cloud
|
144
|
+
if resource.accelerators is None:
|
145
|
+
vcpu, _ = cloud.get_vcpus_mem_from_instance_type(
|
146
|
+
resource.instance_type)
|
147
|
+
hardware = f'vCPU={int(vcpu)}'
|
148
|
+
else:
|
149
|
+
hardware = f'{resource.accelerators}'
|
150
|
+
spot = '[Spot]' if resource.use_spot else ''
|
151
|
+
return f'{cloud}({spot}{hardware})'
|
152
|
+
else:
|
153
|
+
# accelerator_args is way too long.
|
154
|
+
# Convert from:
|
155
|
+
# GCP(n1-highmem-8, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '2.12.0'} # pylint: disable=line-too-long
|
156
|
+
# to:
|
157
|
+
# GCP(n1-highmem-8, {'tpu-v2-8': 1}...)
|
158
|
+
pattern = ', accelerator_args={.*}'
|
159
|
+
launched_resource_str = re.sub(pattern, '...', str(resource))
|
160
|
+
return launched_resource_str
|
161
|
+
|
162
|
+
|
121
163
|
def get_readable_resources_repr(handle: 'backends.CloudVmRayResourceHandle',
|
122
164
|
simplify: bool = False) -> str:
|
123
165
|
if (handle.launched_nodes is not None and
|
124
166
|
handle.launched_resources is not None):
|
125
|
-
|
126
|
-
|
127
|
-
if handle.launched_resources.accelerators is None:
|
128
|
-
vcpu, _ = cloud.get_vcpus_mem_from_instance_type(
|
129
|
-
handle.launched_resources.instance_type)
|
130
|
-
hardware = f'vCPU={int(vcpu)}'
|
131
|
-
else:
|
132
|
-
hardware = f'{handle.launched_resources.accelerators}'
|
133
|
-
spot = '[Spot]' if handle.launched_resources.use_spot else ''
|
134
|
-
return f'{handle.launched_nodes}x {cloud}({spot}{hardware})'
|
135
|
-
else:
|
136
|
-
launched_resource_str = str(handle.launched_resources)
|
137
|
-
# accelerator_args is way too long.
|
138
|
-
# Convert from:
|
139
|
-
# GCP(n1-highmem-8, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '2.12.0'} # pylint: disable=line-too-long
|
140
|
-
# to:
|
141
|
-
# GCP(n1-highmem-8, {'tpu-v2-8': 1}...)
|
142
|
-
pattern = ', accelerator_args={.*}'
|
143
|
-
launched_resource_str = re.sub(pattern, '...',
|
144
|
-
launched_resource_str)
|
145
|
-
return f'{handle.launched_nodes}x {launched_resource_str}'
|
167
|
+
return (f'{handle.launched_nodes}x '
|
168
|
+
f'{format_resource(handle.launched_resources, simplify)}')
|
146
169
|
return _DEFAULT_MESSAGE_HANDLE_INITIALIZING
|
170
|
+
|
171
|
+
|
172
|
+
def make_ray_custom_resources_str(
|
173
|
+
resource_dict: Optional[Dict[str, Union[int, float]]]) -> Optional[str]:
|
174
|
+
"""Convert resources to Ray custom resources format."""
|
175
|
+
if resource_dict is None:
|
176
|
+
return None
|
177
|
+
# Ray does not allow fractional resources, so we need to ceil the values.
|
178
|
+
ceiled_dict = {k: math.ceil(v) for k, v in resource_dict.items()}
|
179
|
+
return json.dumps(ceiled_dict, separators=(',', ':'))
|
180
|
+
|
181
|
+
|
182
|
+
@dataclasses.dataclass
|
183
|
+
class FeasibleResources:
|
184
|
+
"""Feasible resources returned by cloud.
|
185
|
+
|
186
|
+
Used to represent a collection of feasible resources returned by cloud,
|
187
|
+
any fuzzy candidates, and optionally a string hint if no feasible resources
|
188
|
+
are found.
|
189
|
+
|
190
|
+
Fuzzy candidates example: when the requested GPU is A100:1 but is not
|
191
|
+
available in a cloud/region, the fuzzy candidates are results of a fuzzy
|
192
|
+
search in the catalog that are offered in the location. E.g.,
|
193
|
+
['A100-80GB:1', 'A100-80GB:2', 'A100-80GB:4', 'A100:8']
|
194
|
+
"""
|
195
|
+
resources_list: List['resources_lib.Resources']
|
196
|
+
fuzzy_candidate_list: List[str]
|
197
|
+
hint: Optional[str]
|
198
|
+
|
199
|
+
|
200
|
+
def need_to_query_reservations() -> bool:
|
201
|
+
"""Checks if we need to query reservations from cloud APIs.
|
202
|
+
|
203
|
+
We need to query reservations if:
|
204
|
+
- The cloud has specific reservations.
|
205
|
+
- The cloud prioritizes reservations over on-demand instances.
|
206
|
+
|
207
|
+
This is useful to skip the potentially expensive reservation query for
|
208
|
+
clouds that do not use reservations.
|
209
|
+
"""
|
210
|
+
for cloud_str in registry.CLOUD_REGISTRY.keys():
|
211
|
+
cloud_specific_reservations = skypilot_config.get_nested(
|
212
|
+
(cloud_str, 'specific_reservations'), None)
|
213
|
+
cloud_prioritize_reservations = skypilot_config.get_nested(
|
214
|
+
(cloud_str, 'prioritize_reservations'), False)
|
215
|
+
if (cloud_specific_reservations is not None or
|
216
|
+
cloud_prioritize_reservations):
|
217
|
+
return True
|
218
|
+
return False
|
sky/utils/rich_utils.py
CHANGED
@@ -1,16 +1,107 @@
|
|
1
1
|
"""Rich status spinner utils."""
|
2
2
|
import contextlib
|
3
|
+
import enum
|
4
|
+
import logging
|
3
5
|
import threading
|
4
|
-
|
6
|
+
import typing
|
7
|
+
from typing import Dict, Iterator, Optional, Tuple, Union
|
5
8
|
|
6
9
|
import rich.console as rich_console
|
7
10
|
|
8
|
-
|
9
|
-
|
11
|
+
from sky.utils import annotations
|
12
|
+
from sky.utils import message_utils
|
13
|
+
|
14
|
+
if typing.TYPE_CHECKING:
|
15
|
+
import requests
|
16
|
+
|
17
|
+
console = rich_console.Console(soft_wrap=True)
|
18
|
+
_statuses: Dict[str, Optional[Union['EncodedStatus',
|
19
|
+
'rich_console.Status']]] = {
|
20
|
+
'server': None,
|
21
|
+
'client': None,
|
22
|
+
}
|
23
|
+
_status_nesting_level = 0
|
10
24
|
|
11
25
|
_logging_lock = threading.RLock()
|
12
26
|
|
13
27
|
|
28
|
+
class Control(enum.Enum):
|
29
|
+
"""Control codes for the status spinner."""
|
30
|
+
INIT = 'rich_init'
|
31
|
+
START = 'rich_start'
|
32
|
+
STOP = 'rich_stop'
|
33
|
+
EXIT = 'rich_exit'
|
34
|
+
UPDATE = 'rich_update'
|
35
|
+
|
36
|
+
def encode(self, msg: str) -> str:
|
37
|
+
return f'<{self.value}>{msg}</{self.value}>'
|
38
|
+
|
39
|
+
@classmethod
|
40
|
+
def decode(cls, encoded_msg: str) -> Tuple[Optional['Control'], str]:
|
41
|
+
# Find the control code
|
42
|
+
control_str = None
|
43
|
+
for control in cls:
|
44
|
+
if f'<{control.value}>' in encoded_msg:
|
45
|
+
control_str = control.value
|
46
|
+
encoded_msg = encoded_msg.replace(f'<{control.value}>', '')
|
47
|
+
encoded_msg = encoded_msg.replace(f'</{control.value}>', '')
|
48
|
+
break
|
49
|
+
else:
|
50
|
+
return None, encoded_msg
|
51
|
+
return cls(control_str), encoded_msg
|
52
|
+
|
53
|
+
|
54
|
+
class EncodedStatusMessage:
|
55
|
+
"""A class to encode status messages."""
|
56
|
+
|
57
|
+
def __init__(self, msg: str):
|
58
|
+
self.msg = msg
|
59
|
+
|
60
|
+
def init(self) -> str:
|
61
|
+
return message_utils.encode_payload(Control.INIT.encode(self.msg))
|
62
|
+
|
63
|
+
def enter(self) -> str:
|
64
|
+
return message_utils.encode_payload(Control.START.encode(self.msg))
|
65
|
+
|
66
|
+
def exit(self) -> str:
|
67
|
+
return message_utils.encode_payload(Control.EXIT.encode(''))
|
68
|
+
|
69
|
+
def update(self, msg: str) -> str:
|
70
|
+
return message_utils.encode_payload(Control.UPDATE.encode(msg))
|
71
|
+
|
72
|
+
def stop(self) -> str:
|
73
|
+
return message_utils.encode_payload(Control.STOP.encode(''))
|
74
|
+
|
75
|
+
def start(self) -> str:
|
76
|
+
return message_utils.encode_payload(Control.START.encode(self.msg))
|
77
|
+
|
78
|
+
|
79
|
+
class EncodedStatus:
|
80
|
+
"""A class to encode status messages."""
|
81
|
+
|
82
|
+
def __init__(self, msg: str):
|
83
|
+
self.status = msg
|
84
|
+
self.encoded_msg = EncodedStatusMessage(msg)
|
85
|
+
print(self.encoded_msg.init(), end='', flush=True)
|
86
|
+
|
87
|
+
def __enter__(self):
|
88
|
+
print(self.encoded_msg.enter(), end='', flush=True)
|
89
|
+
return self
|
90
|
+
|
91
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
92
|
+
print(self.encoded_msg.exit(), end='', flush=True)
|
93
|
+
|
94
|
+
def update(self, msg: str):
|
95
|
+
self.status = msg
|
96
|
+
print(self.encoded_msg.update(msg), end='', flush=True)
|
97
|
+
|
98
|
+
def stop(self):
|
99
|
+
print(self.encoded_msg.stop(), end='', flush=True)
|
100
|
+
|
101
|
+
def start(self):
|
102
|
+
print(self.encoded_msg.start(), end='', flush=True)
|
103
|
+
|
104
|
+
|
14
105
|
class _NoOpConsoleStatus:
|
15
106
|
"""An empty class for multi-threaded console.status."""
|
16
107
|
|
@@ -30,34 +121,172 @@ class _NoOpConsoleStatus:
|
|
30
121
|
pass
|
31
122
|
|
32
123
|
|
124
|
+
# TODO(SKY-1216): we need a wrapper for the rich.progress in our code as well.
|
125
|
+
class _RevertibleStatus:
|
126
|
+
"""A wrapper for status that can revert to previous message after exit."""
|
127
|
+
|
128
|
+
def __init__(self, message: str, status_type: str):
|
129
|
+
self.previous_message = None
|
130
|
+
self.status_type = status_type
|
131
|
+
status = _statuses[status_type]
|
132
|
+
if status is not None:
|
133
|
+
self.previous_message = status.status
|
134
|
+
self.message = message
|
135
|
+
|
136
|
+
def __enter__(self):
|
137
|
+
global _status_nesting_level
|
138
|
+
_statuses[self.status_type].update(self.message)
|
139
|
+
_status_nesting_level += 1
|
140
|
+
_statuses[self.status_type].__enter__()
|
141
|
+
return _statuses[self.status_type]
|
142
|
+
|
143
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
144
|
+
global _status_nesting_level
|
145
|
+
_status_nesting_level -= 1
|
146
|
+
if _status_nesting_level <= 0:
|
147
|
+
_status_nesting_level = 0
|
148
|
+
if _statuses[self.status_type] is not None:
|
149
|
+
_statuses[self.status_type].__exit__(exc_type, exc_val, exc_tb)
|
150
|
+
_statuses[self.status_type] = None
|
151
|
+
else:
|
152
|
+
_statuses[self.status_type].update(self.previous_message)
|
153
|
+
|
154
|
+
def update(self, *args, **kwargs):
|
155
|
+
_statuses[self.status_type].update(*args, **kwargs)
|
156
|
+
|
157
|
+
def stop(self):
|
158
|
+
_statuses[self.status_type].stop()
|
159
|
+
|
160
|
+
def start(self):
|
161
|
+
_statuses[self.status_type].start()
|
162
|
+
|
163
|
+
|
33
164
|
def safe_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
|
34
165
|
"""A wrapper for multi-threaded console.status."""
|
35
166
|
from sky import sky_logging # pylint: disable=import-outside-toplevel
|
36
|
-
if (
|
167
|
+
if (annotations.is_on_api_server and
|
168
|
+
threading.current_thread() is threading.main_thread() and
|
37
169
|
not sky_logging.is_silent()):
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
_status.update(msg)
|
42
|
-
return _status
|
170
|
+
if _statuses['server'] is None:
|
171
|
+
_statuses['server'] = EncodedStatus(msg)
|
172
|
+
return _RevertibleStatus(msg, 'server')
|
43
173
|
return _NoOpConsoleStatus()
|
44
174
|
|
45
175
|
|
176
|
+
def stop_safe_status():
|
177
|
+
"""Stops all nested statuses.
|
178
|
+
|
179
|
+
This is useful when we need to stop all statuses, e.g., when we are going to
|
180
|
+
stream logs from user program and do not want it to interfere with the
|
181
|
+
spinner display.
|
182
|
+
"""
|
183
|
+
if (threading.current_thread() is threading.main_thread() and
|
184
|
+
_statuses['server'] is not None):
|
185
|
+
_statuses['server'].stop()
|
186
|
+
|
187
|
+
|
46
188
|
def force_update_status(msg: str):
|
47
189
|
"""Update the status message even if sky_logging.is_silent() is true."""
|
48
190
|
if (threading.current_thread() is threading.main_thread() and
|
49
|
-
|
50
|
-
|
191
|
+
_statuses['server'] is not None):
|
192
|
+
_statuses['server'].update(msg)
|
51
193
|
|
52
194
|
|
53
195
|
@contextlib.contextmanager
|
54
196
|
def safe_logger():
|
55
|
-
logged = False
|
56
197
|
with _logging_lock:
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
198
|
+
client_status_obj = _statuses['client']
|
199
|
+
|
200
|
+
client_status_live = (client_status_obj is not None and
|
201
|
+
client_status_obj._live.is_started) # pylint: disable=protected-access
|
202
|
+
if client_status_live:
|
203
|
+
client_status_obj.stop()
|
63
204
|
yield
|
205
|
+
if client_status_live:
|
206
|
+
client_status_obj.start()
|
207
|
+
|
208
|
+
|
209
|
+
class RichSafeStreamHandler(logging.StreamHandler):
|
210
|
+
|
211
|
+
def emit(self, record: logging.LogRecord) -> None:
|
212
|
+
with safe_logger():
|
213
|
+
return super().emit(record)
|
214
|
+
|
215
|
+
|
216
|
+
def client_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
|
217
|
+
"""A wrapper for multi-threaded console.status."""
|
218
|
+
from sky import sky_logging # pylint: disable=import-outside-toplevel
|
219
|
+
if (threading.current_thread() is threading.main_thread() and
|
220
|
+
not sky_logging.is_silent()):
|
221
|
+
if _statuses['client'] is None:
|
222
|
+
_statuses['client'] = console.status(msg)
|
223
|
+
return _RevertibleStatus(msg, 'client')
|
224
|
+
return _NoOpConsoleStatus()
|
225
|
+
|
226
|
+
|
227
|
+
def decode_rich_status(
|
228
|
+
response: 'requests.Response') -> Iterator[Optional[str]]:
|
229
|
+
"""Decode the rich status message from the response."""
|
230
|
+
decoding_status = None
|
231
|
+
try:
|
232
|
+
last_line = ''
|
233
|
+
# Iterate over the response content in chunks. We do not use iter_lines
|
234
|
+
# because it will strip the trailing newline characters, causing the
|
235
|
+
# progress bar ending with `\r` becomes a pyramid.
|
236
|
+
for encoded_msg in response.iter_content(chunk_size=None):
|
237
|
+
if encoded_msg is None:
|
238
|
+
return
|
239
|
+
encoded_msg = encoded_msg.decode('utf-8')
|
240
|
+
lines = encoded_msg.splitlines(keepends=True)
|
241
|
+
|
242
|
+
lines[0] = last_line + lines[0]
|
243
|
+
last_line = lines[-1]
|
244
|
+
# If the last line is not ended with `\r` or `\n` (with ending
|
245
|
+
# spaces stripped), it means the last line is not a complete line.
|
246
|
+
# We keep the last line in the buffer and continue.
|
247
|
+
if (not last_line.strip(' ').endswith('\r') and
|
248
|
+
not last_line.strip(' ').endswith('\n')):
|
249
|
+
lines = lines[:-1]
|
250
|
+
else:
|
251
|
+
# Reset the buffer for the next line, as the last line is a
|
252
|
+
# complete line.
|
253
|
+
last_line = ''
|
254
|
+
|
255
|
+
for line in lines:
|
256
|
+
if line.endswith('\r\n'):
|
257
|
+
# Replace `\r\n` with `\n`, as printing a line ends with
|
258
|
+
# `\r\n` in linux will cause the line to be empty.
|
259
|
+
line = line[:-2] + '\n'
|
260
|
+
is_payload, line = message_utils.decode_payload(
|
261
|
+
line, raise_for_mismatch=False)
|
262
|
+
control = None
|
263
|
+
if is_payload:
|
264
|
+
control, encoded_status = Control.decode(line)
|
265
|
+
if control is None:
|
266
|
+
yield line
|
267
|
+
continue
|
268
|
+
|
269
|
+
# control is not None, i.e. it is a rich status control message.
|
270
|
+
if threading.current_thread() is not threading.main_thread():
|
271
|
+
yield None
|
272
|
+
continue
|
273
|
+
if control == Control.INIT:
|
274
|
+
decoding_status = client_status(encoded_status)
|
275
|
+
else:
|
276
|
+
if decoding_status is None:
|
277
|
+
# status may not be initialized if a user use --tail for
|
278
|
+
# sky api logs.
|
279
|
+
continue
|
280
|
+
assert decoding_status is not None, (
|
281
|
+
f'Rich status not initialized: {line}')
|
282
|
+
if control == Control.UPDATE:
|
283
|
+
decoding_status.update(encoded_status)
|
284
|
+
elif control == Control.STOP:
|
285
|
+
decoding_status.stop()
|
286
|
+
elif control == Control.EXIT:
|
287
|
+
decoding_status.__exit__(None, None, None)
|
288
|
+
elif control == Control.START:
|
289
|
+
decoding_status.start()
|
290
|
+
finally:
|
291
|
+
if decoding_status is not None:
|
292
|
+
decoding_status.__exit__(None, None, None)
|