skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,192 @@
|
|
1
|
+
"""Decoders for the REST API return values."""
|
2
|
+
import base64
|
3
|
+
import pickle
|
4
|
+
import typing
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple
|
6
|
+
|
7
|
+
from sky import jobs as managed_jobs
|
8
|
+
from sky import models
|
9
|
+
from sky.clouds.service_catalog import common
|
10
|
+
from sky.data import storage
|
11
|
+
from sky.provision.kubernetes import utils as kubernetes_utils
|
12
|
+
from sky.serve import serve_state
|
13
|
+
from sky.server import constants as server_constants
|
14
|
+
from sky.skylet import job_lib
|
15
|
+
from sky.utils import registry
|
16
|
+
from sky.utils import status_lib
|
17
|
+
|
18
|
+
if typing.TYPE_CHECKING:
|
19
|
+
from sky import backends
|
20
|
+
from sky import clouds
|
21
|
+
|
22
|
+
handlers: Dict[str, Any] = {}
|
23
|
+
|
24
|
+
|
25
|
+
def decode_and_unpickle(obj: str) -> Any:
|
26
|
+
return pickle.loads(base64.b64decode(obj.encode('utf-8')))
|
27
|
+
|
28
|
+
|
29
|
+
def register_decoders(*names: str):
|
30
|
+
"""Decorator to register a decoder."""
|
31
|
+
|
32
|
+
def decorator(func):
|
33
|
+
for name in names:
|
34
|
+
if name != server_constants.DEFAULT_HANDLER_NAME:
|
35
|
+
name = server_constants.REQUEST_NAME_PREFIX + name
|
36
|
+
handlers[name] = func
|
37
|
+
return func
|
38
|
+
|
39
|
+
return decorator
|
40
|
+
|
41
|
+
|
42
|
+
def get_decoder(name: str):
|
43
|
+
"""Get the decoder for a request name name."""
|
44
|
+
return handlers.get(name, handlers[server_constants.DEFAULT_HANDLER_NAME])
|
45
|
+
|
46
|
+
|
47
|
+
@register_decoders(server_constants.DEFAULT_HANDLER_NAME)
|
48
|
+
def default_decode_handler(return_value: Any) -> Any:
|
49
|
+
"""The default handler."""
|
50
|
+
return return_value
|
51
|
+
|
52
|
+
|
53
|
+
@register_decoders('status')
|
54
|
+
def decode_status(return_value: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
55
|
+
clusters = return_value
|
56
|
+
for cluster in clusters:
|
57
|
+
cluster['handle'] = decode_and_unpickle(cluster['handle'])
|
58
|
+
cluster['status'] = status_lib.ClusterStatus(cluster['status'])
|
59
|
+
|
60
|
+
return clusters
|
61
|
+
|
62
|
+
|
63
|
+
@register_decoders('status_kubernetes')
|
64
|
+
def decode_status_kubernetes(
|
65
|
+
return_value: Tuple[List[Dict[str, Any]], List[Dict[str, Any]],
|
66
|
+
List[Dict[str, Any]], Optional[str]]
|
67
|
+
) -> Tuple[List[kubernetes_utils.KubernetesSkyPilotClusterInfoPayload],
|
68
|
+
List[kubernetes_utils.KubernetesSkyPilotClusterInfoPayload],
|
69
|
+
List[Dict[str, Any]], Optional[str]]:
|
70
|
+
(encoded_all_clusters, encoded_unmanaged_clusters, all_jobs,
|
71
|
+
context) = return_value
|
72
|
+
all_clusters = []
|
73
|
+
for cluster in encoded_all_clusters:
|
74
|
+
cluster['status'] = status_lib.ClusterStatus(cluster['status'])
|
75
|
+
all_clusters.append(
|
76
|
+
kubernetes_utils.KubernetesSkyPilotClusterInfoPayload(**cluster))
|
77
|
+
unmanaged_clusters = []
|
78
|
+
for cluster in encoded_unmanaged_clusters:
|
79
|
+
cluster['status'] = status_lib.ClusterStatus(cluster['status'])
|
80
|
+
unmanaged_clusters.append(
|
81
|
+
kubernetes_utils.KubernetesSkyPilotClusterInfoPayload(**cluster))
|
82
|
+
return all_clusters, unmanaged_clusters, all_jobs, context
|
83
|
+
|
84
|
+
|
85
|
+
@register_decoders('launch', 'exec', 'jobs.launch')
|
86
|
+
def decode_launch(
|
87
|
+
return_value: Dict[str, Any]
|
88
|
+
) -> Tuple[str, 'backends.CloudVmRayResourceHandle']:
|
89
|
+
return return_value['job_id'], decode_and_unpickle(return_value['handle'])
|
90
|
+
|
91
|
+
|
92
|
+
@register_decoders('start')
|
93
|
+
def decode_start(return_value: str) -> 'backends.CloudVmRayResourceHandle':
|
94
|
+
return decode_and_unpickle(return_value)
|
95
|
+
|
96
|
+
|
97
|
+
@register_decoders('queue')
|
98
|
+
def decode_queue(return_value: List[dict],) -> List[Dict[str, Any]]:
|
99
|
+
jobs = return_value
|
100
|
+
for job in jobs:
|
101
|
+
job['status'] = job_lib.JobStatus(job['status'])
|
102
|
+
return jobs
|
103
|
+
|
104
|
+
|
105
|
+
@register_decoders('jobs.queue')
|
106
|
+
def decode_jobs_queue(return_value: List[dict],) -> List[Dict[str, Any]]:
|
107
|
+
jobs = return_value
|
108
|
+
for job in jobs:
|
109
|
+
job['status'] = managed_jobs.ManagedJobStatus(job['status'])
|
110
|
+
return jobs
|
111
|
+
|
112
|
+
|
113
|
+
@register_decoders('serve.status')
|
114
|
+
def decode_serve_status(return_value: List[dict]) -> List[Dict[str, Any]]:
|
115
|
+
service_statuses = return_value
|
116
|
+
for service_status in service_statuses:
|
117
|
+
service_status['status'] = serve_state.ServiceStatus(
|
118
|
+
service_status['status'])
|
119
|
+
for replica_info in service_status.get('replica_info', []):
|
120
|
+
replica_info['status'] = serve_state.ReplicaStatus(
|
121
|
+
replica_info['status'])
|
122
|
+
replica_info['handle'] = decode_and_unpickle(replica_info['handle'])
|
123
|
+
return service_statuses
|
124
|
+
|
125
|
+
|
126
|
+
@register_decoders('cost_report')
|
127
|
+
def decode_cost_report(
|
128
|
+
return_value: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
129
|
+
for cluster_report in return_value:
|
130
|
+
if cluster_report['status'] is not None:
|
131
|
+
cluster_report['status'] = status_lib.ClusterStatus(
|
132
|
+
cluster_report['status'])
|
133
|
+
cluster_report['resources'] = decode_and_unpickle(
|
134
|
+
cluster_report['resources'])
|
135
|
+
return return_value
|
136
|
+
|
137
|
+
|
138
|
+
@register_decoders('enabled_clouds')
|
139
|
+
def decode_enabled_clouds(return_value: List[str]) -> List['clouds.Cloud']:
|
140
|
+
clouds = []
|
141
|
+
for cloud_name in return_value:
|
142
|
+
cloud = registry.CLOUD_REGISTRY.from_str(cloud_name)
|
143
|
+
assert cloud is not None, return_value
|
144
|
+
clouds.append(cloud)
|
145
|
+
return clouds
|
146
|
+
|
147
|
+
|
148
|
+
@register_decoders('list_accelerators')
|
149
|
+
def decode_list_accelerators(
|
150
|
+
return_value: Dict[str, List[List[Any]]]
|
151
|
+
) -> Dict[str, List['common.InstanceTypeInfo']]:
|
152
|
+
instance_dict: Dict[str, List['common.InstanceTypeInfo']] = {}
|
153
|
+
for gpu, instance_type_infos in return_value.items():
|
154
|
+
instance_dict[gpu] = []
|
155
|
+
for instance_type_info in instance_type_infos:
|
156
|
+
instance_dict[gpu].append(
|
157
|
+
common.InstanceTypeInfo(*instance_type_info))
|
158
|
+
return instance_dict
|
159
|
+
|
160
|
+
|
161
|
+
@register_decoders('storage_ls')
|
162
|
+
def decode_storage_ls(
|
163
|
+
return_value: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
164
|
+
for storage_info in return_value:
|
165
|
+
storage_info['status'] = status_lib.StorageStatus(
|
166
|
+
storage_info['status'])
|
167
|
+
storage_info['store'] = [
|
168
|
+
storage.StoreType(store) for store in storage_info['store']
|
169
|
+
]
|
170
|
+
return return_value
|
171
|
+
|
172
|
+
|
173
|
+
@register_decoders('job_status')
|
174
|
+
def decode_job_status(
|
175
|
+
return_value: Dict[int, Optional[str]]
|
176
|
+
) -> Dict[int, Optional['job_lib.JobStatus']]:
|
177
|
+
job_statuses: Dict[int, Optional['job_lib.JobStatus']] = {}
|
178
|
+
for job_id in return_value.keys():
|
179
|
+
if return_value[job_id] is not None:
|
180
|
+
job_statuses[job_id] = job_lib.JobStatus(return_value[job_id])
|
181
|
+
else:
|
182
|
+
job_statuses[job_id] = None
|
183
|
+
return job_statuses
|
184
|
+
|
185
|
+
|
186
|
+
@register_decoders('kubernetes_node_info')
|
187
|
+
def decode_kubernetes_node_info(
|
188
|
+
return_value: Dict[str, Any]) -> Dict[str, models.KubernetesNodeInfo]:
|
189
|
+
return {
|
190
|
+
node_name: models.KubernetesNodeInfo(**node_info)
|
191
|
+
for node_name, node_info in return_value.items()
|
192
|
+
}
|
@@ -0,0 +1,166 @@
|
|
1
|
+
"""Encoders for the REST API return values."""
|
2
|
+
# TODO(SKY-1211): we should evaluate that if we can move our return values to
|
3
|
+
# pydantic models, so we can take advantage of model_dump_json of pydantic,
|
4
|
+
# instead of implementing our own handlers.
|
5
|
+
import base64
|
6
|
+
import dataclasses
|
7
|
+
import pickle
|
8
|
+
import typing
|
9
|
+
from typing import Any, Dict, List, Optional, Tuple
|
10
|
+
|
11
|
+
from sky.server import constants as server_constants
|
12
|
+
|
13
|
+
if typing.TYPE_CHECKING:
|
14
|
+
from sky import backends
|
15
|
+
from sky import clouds
|
16
|
+
from sky import models
|
17
|
+
from sky.provision.kubernetes import utils as kubernetes_utils
|
18
|
+
|
19
|
+
handlers: Dict[str, Any] = {}
|
20
|
+
|
21
|
+
|
22
|
+
def pickle_and_encode(obj: Any) -> str:
|
23
|
+
try:
|
24
|
+
return base64.b64encode(pickle.dumps(obj)).decode('utf-8')
|
25
|
+
except TypeError as e:
|
26
|
+
raise ValueError(f'Failed to pickle object: {obj}') from e
|
27
|
+
|
28
|
+
|
29
|
+
def register_encoder(*names: str):
|
30
|
+
"""Decorator to register an encoder."""
|
31
|
+
|
32
|
+
def decorator(func):
|
33
|
+
for name in names:
|
34
|
+
if name != server_constants.DEFAULT_HANDLER_NAME:
|
35
|
+
name = server_constants.REQUEST_NAME_PREFIX + name
|
36
|
+
handlers[name] = func
|
37
|
+
return func
|
38
|
+
|
39
|
+
return decorator
|
40
|
+
|
41
|
+
|
42
|
+
def get_encoder(name: str):
|
43
|
+
"""Get the encoder for a request name."""
|
44
|
+
return handlers.get(name, handlers[server_constants.DEFAULT_HANDLER_NAME])
|
45
|
+
|
46
|
+
|
47
|
+
@register_encoder(server_constants.DEFAULT_HANDLER_NAME)
|
48
|
+
def default_encoder(return_value: Any) -> Any:
|
49
|
+
"""The default encoder."""
|
50
|
+
return return_value
|
51
|
+
|
52
|
+
|
53
|
+
@register_encoder('status')
|
54
|
+
def encode_status(clusters: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
55
|
+
for cluster in clusters:
|
56
|
+
cluster['status'] = cluster['status'].value
|
57
|
+
cluster['handle'] = pickle_and_encode(cluster['handle'])
|
58
|
+
cluster['storage_mounts_metadata'] = pickle_and_encode(
|
59
|
+
cluster['storage_mounts_metadata'])
|
60
|
+
return clusters
|
61
|
+
|
62
|
+
|
63
|
+
@register_encoder('launch', 'exec', 'jobs.launch')
|
64
|
+
def encode_launch(
|
65
|
+
job_id_handle: Tuple[Optional[int], Optional['backends.ResourceHandle']]
|
66
|
+
) -> Dict[str, Any]:
|
67
|
+
job_id, handle = job_id_handle
|
68
|
+
return {
|
69
|
+
'job_id': job_id,
|
70
|
+
'handle': pickle_and_encode(handle),
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
@register_encoder('start')
|
75
|
+
def encode_start(resource_handle: 'backends.CloudVmRayResourceHandle') -> str:
|
76
|
+
return pickle_and_encode(resource_handle)
|
77
|
+
|
78
|
+
|
79
|
+
@register_encoder('queue')
|
80
|
+
def encode_queue(jobs: List[dict],) -> List[Dict[str, Any]]:
|
81
|
+
for job in jobs:
|
82
|
+
job['status'] = job['status'].value
|
83
|
+
return jobs
|
84
|
+
|
85
|
+
|
86
|
+
@register_encoder('status_kubernetes')
|
87
|
+
def encode_status_kubernetes(
|
88
|
+
return_value: Tuple[
|
89
|
+
List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
|
90
|
+
List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
|
91
|
+
List[Dict[str, Any]], Optional[str]]
|
92
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]],
|
93
|
+
Optional[str]]:
|
94
|
+
all_clusters, unmanaged_clusters, all_jobs, context = return_value
|
95
|
+
encoded_all_clusters = []
|
96
|
+
encoded_unmanaged_clusters = []
|
97
|
+
for cluster in all_clusters:
|
98
|
+
encoded_cluster = dataclasses.asdict(cluster)
|
99
|
+
encoded_cluster['status'] = encoded_cluster['status'].value
|
100
|
+
encoded_all_clusters.append(encoded_cluster)
|
101
|
+
for cluster in unmanaged_clusters:
|
102
|
+
encoded_cluster = dataclasses.asdict(cluster)
|
103
|
+
encoded_cluster['status'] = encoded_cluster['status'].value
|
104
|
+
encoded_unmanaged_clusters.append(encoded_cluster)
|
105
|
+
return encoded_all_clusters, encoded_unmanaged_clusters, all_jobs, context
|
106
|
+
|
107
|
+
|
108
|
+
@register_encoder('jobs.queue')
|
109
|
+
def encode_jobs_queue(jobs: List[dict],) -> List[Dict[str, Any]]:
|
110
|
+
for job in jobs:
|
111
|
+
job['status'] = job['status'].value
|
112
|
+
return jobs
|
113
|
+
|
114
|
+
|
115
|
+
@register_encoder('serve.status')
|
116
|
+
def encode_serve_status(
|
117
|
+
service_statuses: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
118
|
+
for service_status in service_statuses:
|
119
|
+
service_status['status'] = service_status['status'].value
|
120
|
+
for replica_info in service_status.get('replica_info', []):
|
121
|
+
replica_info['status'] = replica_info['status'].value
|
122
|
+
replica_info['handle'] = pickle_and_encode(replica_info['handle'])
|
123
|
+
return service_statuses
|
124
|
+
|
125
|
+
|
126
|
+
@register_encoder('cost_report')
|
127
|
+
def encode_cost_report(
|
128
|
+
cost_report: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
129
|
+
for cluster_report in cost_report:
|
130
|
+
if cluster_report['status'] is not None:
|
131
|
+
cluster_report['status'] = cluster_report['status'].value
|
132
|
+
cluster_report['resources'] = pickle_and_encode(
|
133
|
+
cluster_report['resources'])
|
134
|
+
return cost_report
|
135
|
+
|
136
|
+
|
137
|
+
@register_encoder('enabled_clouds')
|
138
|
+
def encode_enabled_clouds(clouds: List['clouds.Cloud']) -> List[str]:
|
139
|
+
enabled_clodus_list = [str(cloud) for cloud in clouds]
|
140
|
+
return enabled_clodus_list
|
141
|
+
|
142
|
+
|
143
|
+
@register_encoder('storage_ls')
|
144
|
+
def encode_storage_ls(
|
145
|
+
return_value: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
146
|
+
for storage_info in return_value:
|
147
|
+
storage_info['status'] = storage_info['status'].value
|
148
|
+
storage_info['store'] = [store.value for store in storage_info['store']]
|
149
|
+
return return_value
|
150
|
+
|
151
|
+
|
152
|
+
@register_encoder('job_status')
|
153
|
+
def encode_job_status(return_value: Dict[int, Any]) -> Dict[int, str]:
|
154
|
+
for job_id in return_value.keys():
|
155
|
+
if return_value[job_id] is not None:
|
156
|
+
return_value[job_id] = return_value[job_id].value
|
157
|
+
return return_value
|
158
|
+
|
159
|
+
|
160
|
+
@register_encoder('kubernetes_node_info')
|
161
|
+
def encode_kubernetes_node_info(
|
162
|
+
return_value: Dict[str, 'models.KubernetesNodeInfo']) -> Dict[str, Any]:
|
163
|
+
return {
|
164
|
+
node_name: dataclasses.asdict(node_info)
|
165
|
+
for node_name, node_info in return_value.items()
|
166
|
+
}
|