skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/provision/docker_utils.py
CHANGED
@@ -12,9 +12,6 @@ from sky.utils import subprocess_utils
|
|
12
12
|
|
13
13
|
logger = sky_logging.init_logger(__name__)
|
14
14
|
|
15
|
-
DOCKER_PERMISSION_DENIED_STR = ('permission denied while trying to connect to '
|
16
|
-
'the Docker daemon socket')
|
17
|
-
|
18
15
|
# Configure environment variables. A docker image can have environment variables
|
19
16
|
# set in the Dockerfile with `ENV``. We need to export these variables to the
|
20
17
|
# shell environment, so that our ssh session can access them.
|
@@ -23,9 +20,16 @@ SETUP_ENV_VARS_CMD = (
|
|
23
20
|
'{ if [ $(id -u) -ne 0 ]; then echo "sudo"; else echo ""; fi; } && '
|
24
21
|
'printenv | while IFS=\'=\' read -r key value; do echo "export $key=\\\"$value\\\""; done > ' # pylint: disable=line-too-long
|
25
22
|
'~/container_env_var.sh && '
|
26
|
-
'$(prefix_cmd) mv ~/container_env_var.sh /etc/profile.d/container_env_var.sh'
|
23
|
+
'$(prefix_cmd) mv ~/container_env_var.sh /etc/profile.d/container_env_var.sh;'
|
27
24
|
)
|
28
25
|
|
26
|
+
# Docker daemon may not be ready when the machine is firstly started. The error
|
27
|
+
# message starts with the following string. We should wait for a while and retry
|
28
|
+
# the command.
|
29
|
+
DOCKER_PERMISSION_DENIED_STR = ('permission denied while trying to connect to '
|
30
|
+
'the Docker daemon socket')
|
31
|
+
_DOCKER_SOCKET_WAIT_TIMEOUT_SECONDS = 30
|
32
|
+
|
29
33
|
|
30
34
|
@dataclasses.dataclass
|
31
35
|
class DockerLoginConfig:
|
@@ -34,6 +38,13 @@ class DockerLoginConfig:
|
|
34
38
|
password: str
|
35
39
|
server: str
|
36
40
|
|
41
|
+
def format_image(self, image: str) -> str:
|
42
|
+
"""Format the image name with the server prefix."""
|
43
|
+
server_prefix = f'{self.server}/'
|
44
|
+
if not image.startswith(server_prefix):
|
45
|
+
return f'{server_prefix}{image}'
|
46
|
+
return image
|
47
|
+
|
37
48
|
@classmethod
|
38
49
|
def from_env_vars(cls, d: Dict[str, str]) -> 'DockerLoginConfig':
|
39
50
|
return cls(
|
@@ -106,8 +117,8 @@ def docker_start_cmds(
|
|
106
117
|
'--cap-add=SYS_ADMIN',
|
107
118
|
'--device=/dev/fuse',
|
108
119
|
'--security-opt=apparmor:unconfined',
|
120
|
+
'--entrypoint=/bin/bash',
|
109
121
|
image,
|
110
|
-
'bash',
|
111
122
|
]
|
112
123
|
return ' '.join(docker_run)
|
113
124
|
|
@@ -139,7 +150,9 @@ class DockerInitializer:
|
|
139
150
|
def _run(self,
|
140
151
|
cmd,
|
141
152
|
run_env='host',
|
142
|
-
wait_for_docker_daemon: bool = False
|
153
|
+
wait_for_docker_daemon: bool = False,
|
154
|
+
separate_stderr: bool = False,
|
155
|
+
log_err_when_fail: bool = True) -> str:
|
143
156
|
|
144
157
|
if run_env == 'docker':
|
145
158
|
cmd = self._docker_expand_user(cmd, any_char=True)
|
@@ -152,29 +165,38 @@ class DockerInitializer:
|
|
152
165
|
f' {shlex.quote(cmd)} ')
|
153
166
|
|
154
167
|
logger.debug(f'+ {cmd}')
|
155
|
-
|
156
|
-
retry = 3
|
168
|
+
start = time.time()
|
157
169
|
while True:
|
158
|
-
rc, stdout, stderr = self.runner.run(
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
170
|
+
rc, stdout, stderr = self.runner.run(
|
171
|
+
cmd,
|
172
|
+
require_outputs=True,
|
173
|
+
stream_logs=False,
|
174
|
+
separate_stderr=separate_stderr,
|
175
|
+
log_path=self.log_path)
|
176
|
+
if (DOCKER_PERMISSION_DENIED_STR in stdout + stderr and
|
177
|
+
wait_for_docker_daemon):
|
178
|
+
if time.time() - start > _DOCKER_SOCKET_WAIT_TIMEOUT_SECONDS:
|
179
|
+
if rc == 0:
|
180
|
+
# Set returncode to 1 if failed to connect to docker
|
181
|
+
# daemon after timeout.
|
182
|
+
rc = 1
|
183
|
+
break
|
184
|
+
# Close the cached connection to make the permission update of
|
185
|
+
# ssh user take effect, e.g. usermod -aG docker $USER, called
|
186
|
+
# by cloud-init of Azure.
|
187
|
+
self.runner.close_cached_connection()
|
188
|
+
logger.info('Failed to connect to docker daemon. It might be '
|
189
|
+
'initializing, retrying in 5 seconds...')
|
190
|
+
time.sleep(5)
|
191
|
+
continue
|
192
|
+
break
|
173
193
|
subprocess_utils.handle_returncode(
|
174
194
|
rc,
|
175
195
|
cmd,
|
176
|
-
error_msg='Failed to run docker setup commands',
|
177
|
-
stderr=stdout + stderr
|
196
|
+
error_msg='Failed to run docker setup commands.',
|
197
|
+
stderr=stdout + stderr,
|
198
|
+
# Print out the error message if the command failed.
|
199
|
+
stream_logs=log_err_when_fail)
|
178
200
|
return stdout.strip()
|
179
201
|
|
180
202
|
def initialize(self) -> str:
|
@@ -205,9 +227,7 @@ class DockerInitializer:
|
|
205
227
|
wait_for_docker_daemon=True)
|
206
228
|
# We automatically add the server prefix to the image name if
|
207
229
|
# the user did not add it.
|
208
|
-
|
209
|
-
if not specific_image.startswith(server_prefix):
|
210
|
-
specific_image = f'{server_prefix}{specific_image}'
|
230
|
+
specific_image = docker_login_config.format_image(specific_image)
|
211
231
|
|
212
232
|
if self.docker_config.get('pull_before_run', True):
|
213
233
|
assert specific_image, ('Image must be included in config if ' +
|
@@ -238,12 +258,13 @@ class DockerInitializer:
|
|
238
258
|
# issue with nvidia container toolkit:
|
239
259
|
# https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
|
240
260
|
self._run(
|
241
|
-
'
|
261
|
+
'{ which jq || sudo apt update && sudo apt install -y jq; } && '
|
262
|
+
'{ [ -f /etc/docker/daemon.json ] || '
|
242
263
|
'echo "{}" | sudo tee /etc/docker/daemon.json;'
|
243
264
|
'sudo jq \'.["exec-opts"] = ["native.cgroupdriver=cgroupfs"]\' '
|
244
265
|
'/etc/docker/daemon.json > /tmp/daemon.json;'
|
245
266
|
'sudo mv /tmp/daemon.json /etc/docker/daemon.json;'
|
246
|
-
'sudo systemctl restart docker')
|
267
|
+
'sudo systemctl restart docker; } || true')
|
247
268
|
user_docker_run_options = self.docker_config.get('run_options', [])
|
248
269
|
start_command = docker_start_cmds(
|
249
270
|
specific_image,
|
@@ -320,12 +341,22 @@ class DockerInitializer:
|
|
320
341
|
|
321
342
|
def _check_docker_installed(self):
|
322
343
|
no_exist = 'NoExist'
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
344
|
+
# SkyPilot: Add the current user to the docker group first (if needed),
|
345
|
+
# before checking if docker is installed to avoid permission issues.
|
346
|
+
docker_cmd = ('id -nG $USER | grep -qw docker || '
|
347
|
+
'sudo usermod -aG docker $USER > /dev/null 2>&1;'
|
348
|
+
f'command -v {self.docker_cmd} || echo {no_exist!r}')
|
349
|
+
cleaned_output = self._run(docker_cmd)
|
350
|
+
timeout = 60 * 10 # 10 minute timeout
|
351
|
+
start = time.time()
|
352
|
+
while no_exist in cleaned_output or 'docker' not in cleaned_output:
|
353
|
+
if time.time() - start > timeout:
|
354
|
+
logger.error(
|
355
|
+
f'{self.docker_cmd.capitalize()} not installed. Please use '
|
356
|
+
f'an image with {self.docker_cmd.capitalize()} installed.')
|
357
|
+
return
|
358
|
+
time.sleep(5)
|
359
|
+
cleaned_output = self._run(docker_cmd)
|
329
360
|
|
330
361
|
def _check_container_status(self):
|
331
362
|
if self.initialized:
|
@@ -340,9 +371,14 @@ class DockerInitializer:
|
|
340
371
|
user_pos = string.find('~')
|
341
372
|
if user_pos > -1:
|
342
373
|
if self.home_dir is None:
|
343
|
-
|
344
|
-
|
345
|
-
|
374
|
+
cmd = (f'{self.docker_cmd} exec {self.container_name} '
|
375
|
+
'printenv HOME')
|
376
|
+
self.home_dir = self._run(cmd, separate_stderr=True)
|
377
|
+
# Check for unexpected newline in home directory, which can be
|
378
|
+
# a common issue when the output is mixed with stderr.
|
379
|
+
assert '\n' not in self.home_dir, (
|
380
|
+
'Unexpected newline in home directory '
|
381
|
+
f'({{self.home_dir}}) retrieved with {cmd}')
|
346
382
|
|
347
383
|
if any_char:
|
348
384
|
return string.replace('~/', self.home_dir + '/')
|
@@ -360,8 +396,8 @@ class DockerInitializer:
|
|
360
396
|
'info -f "{{.Runtimes}}"'))
|
361
397
|
if 'nvidia-container-runtime' in runtime_output:
|
362
398
|
try:
|
363
|
-
self._run('nvidia-smi')
|
364
|
-
return run_options + ['--runtime=nvidia']
|
399
|
+
self._run('nvidia-smi', log_err_when_fail=False)
|
400
|
+
return run_options + ['--runtime=nvidia', '--gpus all']
|
365
401
|
except Exception as e: # pylint: disable=broad-except
|
366
402
|
logger.debug(
|
367
403
|
'Nvidia Container Runtime is present in the docker image'
|
@@ -404,8 +440,8 @@ class DockerInitializer:
|
|
404
440
|
def _check_container_exited(self) -> bool:
|
405
441
|
if self.initialized:
|
406
442
|
return True
|
407
|
-
output =
|
408
|
-
|
409
|
-
|
410
|
-
return 'false' in output.lower(
|
411
|
-
|
443
|
+
output = self._run(check_docker_running_cmd(self.container_name,
|
444
|
+
self.docker_cmd),
|
445
|
+
wait_for_docker_daemon=True)
|
446
|
+
return ('false' in output.lower() and
|
447
|
+
'no such object' not in output.lower())
|
@@ -1,21 +1,22 @@
|
|
1
1
|
"""FluidStack API client."""
|
2
2
|
|
3
|
-
import functools
|
4
3
|
import json
|
5
4
|
import os
|
6
|
-
|
5
|
+
import time
|
6
|
+
from typing import Any, Dict, List
|
7
7
|
import uuid
|
8
8
|
|
9
9
|
import requests
|
10
10
|
|
11
|
+
from sky.utils import annotations
|
12
|
+
|
11
13
|
|
12
14
|
def get_key_suffix():
|
13
15
|
return str(uuid.uuid4()).replace('-', '')[:8]
|
14
16
|
|
15
17
|
|
16
|
-
ENDPOINT = 'https://
|
18
|
+
ENDPOINT = 'https://platform.fluidstack.io/'
|
17
19
|
FLUIDSTACK_API_KEY_PATH = '~/.fluidstack/api_key'
|
18
|
-
FLUIDSTACK_API_TOKEN_PATH = '~/.fluidstack/api_token'
|
19
20
|
|
20
21
|
|
21
22
|
def read_contents(path: str) -> str:
|
@@ -30,7 +31,7 @@ class FluidstackAPIError(Exception):
|
|
30
31
|
super().__init__(message)
|
31
32
|
|
32
33
|
|
33
|
-
def raise_fluidstack_error(response: requests.Response) -> None:
|
34
|
+
def raise_fluidstack_error(response: 'requests.Response') -> None:
|
34
35
|
"""Raise FluidstackAPIError if appropriate."""
|
35
36
|
status_code = response.status_code
|
36
37
|
if response.ok:
|
@@ -46,109 +47,76 @@ def raise_fluidstack_error(response: requests.Response) -> None:
|
|
46
47
|
raise FluidstackAPIError(f'{message}', status_code)
|
47
48
|
|
48
49
|
|
49
|
-
@functools.lru_cache()
|
50
|
-
def with_nvidia_drivers(region: str):
|
51
|
-
if region in ['norway_4_eu', 'generic_1_canada']:
|
52
|
-
return False
|
53
|
-
client = FluidstackClient()
|
54
|
-
plans = client.get_plans()
|
55
|
-
for plan in plans:
|
56
|
-
if region in [r['id'] for r in plan['regions']]:
|
57
|
-
if 'Ubuntu 20.04 LTS (Nvidia)' in plan['os_options']:
|
58
|
-
return True
|
59
|
-
return False
|
60
|
-
|
61
|
-
|
62
50
|
class FluidstackClient:
|
63
51
|
"""FluidStack API Client"""
|
64
52
|
|
65
53
|
def __init__(self):
|
66
54
|
self.api_key = read_contents(
|
67
|
-
os.path.expanduser(FLUIDSTACK_API_KEY_PATH))
|
68
|
-
self.api_token = read_contents(
|
69
|
-
os.path.expanduser(FLUIDSTACK_API_TOKEN_PATH))
|
55
|
+
os.path.expanduser(FLUIDSTACK_API_KEY_PATH)).strip()
|
70
56
|
|
71
57
|
def get_plans(self):
|
72
|
-
response = requests.get(ENDPOINT + '
|
58
|
+
response = requests.get(ENDPOINT + 'list_available_configurations',
|
59
|
+
headers={'api-key': self.api_key})
|
73
60
|
raise_fluidstack_error(response)
|
74
61
|
plans = response.json()
|
75
|
-
plans = [
|
76
|
-
plan for plan in plans
|
77
|
-
if plan['minimum_commitment'] == 'hourly' and plan['type'] in
|
78
|
-
['preconfigured', 'custom'] and plan['gpu_type'] != 'NO GPU'
|
79
|
-
]
|
80
62
|
return plans
|
81
63
|
|
82
|
-
def list_instances(
|
83
|
-
self,
|
84
|
-
tag_filters: Optional[Dict[str,
|
85
|
-
str]] = None) -> List[Dict[str, Any]]:
|
64
|
+
def list_instances(self) -> List[Dict[str, Any]]:
|
86
65
|
response = requests.get(
|
87
|
-
ENDPOINT + '
|
88
|
-
|
66
|
+
ENDPOINT + 'instances',
|
67
|
+
headers={'api-key': self.api_key},
|
89
68
|
)
|
90
69
|
raise_fluidstack_error(response)
|
91
70
|
instances = response.json()
|
92
|
-
|
93
|
-
|
94
|
-
for instance in instances:
|
95
|
-
if isinstance(instance['tags'], str):
|
96
|
-
instance['tags'] = json.loads(instance['tags'])
|
97
|
-
if not instance['tags']:
|
98
|
-
instance['tags'] = {}
|
99
|
-
if tag_filters:
|
100
|
-
for key in tag_filters:
|
101
|
-
if instance['tags'].get(key, None) != tag_filters[key]:
|
102
|
-
break
|
103
|
-
else:
|
104
|
-
filtered_instances.append(instance)
|
105
|
-
else:
|
106
|
-
filtered_instances.append(instance)
|
107
|
-
|
108
|
-
return filtered_instances
|
71
|
+
return instances
|
109
72
|
|
110
73
|
def create_instance(
|
111
74
|
self,
|
112
75
|
instance_type: str = '',
|
113
|
-
|
76
|
+
name: str = '',
|
114
77
|
region: str = '',
|
115
78
|
ssh_pub_key: str = '',
|
116
79
|
count: int = 1,
|
117
80
|
) -> List[str]:
|
118
81
|
"""Launch new instances."""
|
119
82
|
|
120
|
-
config: Dict[str, Any] = {}
|
121
83
|
plans = self.get_plans()
|
122
84
|
regions = self.list_regions()
|
85
|
+
gpu_type, gpu_count = instance_type.split('::')
|
86
|
+
gpu_count = int(gpu_count)
|
87
|
+
|
123
88
|
plans = [
|
124
|
-
plan for plan in plans if plan['
|
125
|
-
|
89
|
+
plan for plan in plans if plan['gpu_type'] == gpu_type and
|
90
|
+
gpu_count in plan['gpu_counts'] and region in plan['regions']
|
126
91
|
]
|
127
92
|
if not plans:
|
128
93
|
raise FluidstackAPIError(
|
129
94
|
f'Plan {instance_type} out of stock in region {region}')
|
130
95
|
|
131
96
|
ssh_key = self.get_or_add_ssh_key(ssh_pub_key)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
97
|
+
default_operating_system = 'ubuntu_22_04_lts_nvidia'
|
98
|
+
instance_ids = []
|
99
|
+
for _ in range(count):
|
100
|
+
body = dict(gpu_type=gpu_type,
|
101
|
+
gpu_count=gpu_count,
|
102
|
+
region=regions[region],
|
103
|
+
operating_system_label=default_operating_system,
|
104
|
+
name=name,
|
105
|
+
ssh_key=ssh_key['name'])
|
106
|
+
|
107
|
+
response = requests.post(ENDPOINT + 'instances',
|
108
|
+
headers={'api-key': self.api_key},
|
109
|
+
json=body)
|
110
|
+
raise_fluidstack_error(response)
|
111
|
+
instance_id = response.json().get('id')
|
112
|
+
instance_ids.append(instance_id)
|
113
|
+
time.sleep(1)
|
114
|
+
|
147
115
|
return instance_ids
|
148
116
|
|
149
117
|
def list_ssh_keys(self):
|
150
|
-
response = requests.get(ENDPOINT + '
|
151
|
-
|
118
|
+
response = requests.get(ENDPOINT + 'ssh_keys',
|
119
|
+
headers={'api-key': self.api_key})
|
152
120
|
raise_fluidstack_error(response)
|
153
121
|
return response.json()
|
154
122
|
|
@@ -156,86 +124,50 @@ class FluidstackClient:
|
|
156
124
|
"""Add ssh key if not already added."""
|
157
125
|
ssh_keys = self.list_ssh_keys()
|
158
126
|
for key in ssh_keys:
|
159
|
-
if key['public_key'].strip() == ssh_pub_key.strip(
|
160
|
-
|
161
|
-
|
162
|
-
'name': key['name'],
|
163
|
-
'ssh_key': ssh_pub_key
|
164
|
-
}
|
127
|
+
if key['public_key'].strip().split()[:2] == ssh_pub_key.strip(
|
128
|
+
).split()[:2]:
|
129
|
+
return {'name': key['name'], 'ssh_key': ssh_pub_key}
|
165
130
|
ssh_key_name = 'skypilot-' + get_key_suffix()
|
166
131
|
response = requests.post(
|
167
|
-
ENDPOINT + '
|
168
|
-
|
132
|
+
ENDPOINT + 'ssh_keys',
|
133
|
+
headers={'api-key': self.api_key},
|
169
134
|
json=dict(name=ssh_key_name, public_key=ssh_pub_key),
|
170
135
|
)
|
171
136
|
raise_fluidstack_error(response)
|
172
|
-
|
173
|
-
return {'id': key_id, 'name': ssh_key_name, 'ssh_key': ssh_pub_key}
|
137
|
+
return {'name': ssh_key_name, 'ssh_key': ssh_pub_key}
|
174
138
|
|
175
|
-
@
|
139
|
+
@annotations.lru_cache(scope='global')
|
176
140
|
def list_regions(self):
|
177
|
-
|
178
|
-
raise_fluidstack_error(response)
|
179
|
-
plans = response.json()
|
180
|
-
plans = [
|
181
|
-
plan for plan in plans
|
182
|
-
if plan['minimum_commitment'] == 'hourly' and plan['type'] in
|
183
|
-
['preconfigured', 'custom'] and plan['gpu_type'] != 'NO GPU'
|
184
|
-
]
|
141
|
+
plans = self.get_plans()
|
185
142
|
|
186
143
|
def get_regions(plans: List) -> dict:
|
187
144
|
"""Return a list of regions where the plan is available."""
|
188
145
|
regions = {}
|
189
146
|
for plan in plans:
|
190
147
|
for region in plan.get('regions', []):
|
191
|
-
regions[region
|
148
|
+
regions[region] = region
|
192
149
|
return regions
|
193
150
|
|
194
151
|
regions = get_regions(plans)
|
195
152
|
return regions
|
196
153
|
|
197
154
|
def delete(self, instance_id: str):
|
198
|
-
response = requests.delete(ENDPOINT + '
|
199
|
-
|
155
|
+
response = requests.delete(ENDPOINT + 'instances/' + instance_id,
|
156
|
+
headers={'api-key': self.api_key})
|
200
157
|
raise_fluidstack_error(response)
|
201
158
|
return response.json()
|
202
159
|
|
203
160
|
def stop(self, instance_id: str):
|
204
|
-
response = requests.put(ENDPOINT + '
|
205
|
-
|
206
|
-
raise_fluidstack_error(response)
|
207
|
-
return response.json()
|
208
|
-
|
209
|
-
def restart(self, instance_id: str):
|
210
|
-
response = requests.post(ENDPOINT + 'server/' + instance_id + '/reboot',
|
211
|
-
auth=(self.api_key, self.api_token))
|
212
|
-
raise_fluidstack_error(response)
|
213
|
-
return response.json()
|
214
|
-
|
215
|
-
def info(self, instance_id: str):
|
216
|
-
response = requests.get(ENDPOINT + f'server/{instance_id}',
|
217
|
-
auth=(self.api_key, self.api_token))
|
218
|
-
raise_fluidstack_error(response)
|
219
|
-
return response.json()
|
220
|
-
|
221
|
-
def status(self, instance_id: str):
|
222
|
-
response = self.info(instance_id)
|
223
|
-
return response['status']
|
224
|
-
|
225
|
-
def add_tags(self, instance_id: str, tags: Dict[str, str]) -> str:
|
226
|
-
response = requests.patch(
|
227
|
-
ENDPOINT + f'server/{instance_id}/tag',
|
228
|
-
auth=(self.api_key, self.api_token),
|
229
|
-
json=dict(tags=json.dumps(tags)),
|
230
|
-
)
|
161
|
+
response = requests.put(ENDPOINT + 'instances/' + instance_id + '/stop',
|
162
|
+
headers={'api-key': self.api_key})
|
231
163
|
raise_fluidstack_error(response)
|
232
164
|
return response.json()
|
233
165
|
|
234
|
-
def rename(self, instance_id: str,
|
235
|
-
response = requests.
|
236
|
-
ENDPOINT + f'
|
237
|
-
|
238
|
-
json=dict(name
|
166
|
+
def rename(self, instance_id: str, name: str) -> str:
|
167
|
+
response = requests.put(
|
168
|
+
ENDPOINT + f'instances/{instance_id}/rename',
|
169
|
+
headers={'api-key': self.api_key},
|
170
|
+
json=dict(new_instance_name=name),
|
239
171
|
)
|
240
172
|
raise_fluidstack_error(response)
|
241
173
|
return response.json()
|
@@ -1,15 +1,16 @@
|
|
1
1
|
"""FluidStack instance provisioning."""
|
2
|
+
import os
|
2
3
|
import time
|
3
4
|
from typing import Any, Dict, List, Optional
|
4
5
|
|
5
6
|
from sky import authentication as auth
|
6
7
|
from sky import exceptions
|
7
8
|
from sky import sky_logging
|
8
|
-
from sky import status_lib
|
9
9
|
from sky.provision import common
|
10
10
|
from sky.provision.fluidstack import fluidstack_utils as utils
|
11
11
|
from sky.utils import command_runner
|
12
12
|
from sky.utils import common_utils
|
13
|
+
from sky.utils import status_lib
|
13
14
|
from sky.utils import subprocess_utils
|
14
15
|
from sky.utils import ux_utils
|
15
16
|
|
@@ -25,10 +26,11 @@ logger = sky_logging.init_logger(__name__)
|
|
25
26
|
|
26
27
|
def get_internal_ip(node_info: Dict[str, Any]) -> None:
|
27
28
|
node_info['internal_ip'] = node_info['ip_address']
|
29
|
+
private_key_path, _ = auth.get_or_generate_keys()
|
28
30
|
runner = command_runner.SSHCommandRunner(
|
29
|
-
node_info['ip_address'],
|
30
|
-
ssh_user=
|
31
|
-
ssh_private_key=
|
31
|
+
(node_info['ip_address'], 22),
|
32
|
+
ssh_user='ubuntu',
|
33
|
+
ssh_private_key=os.path.expanduser(private_key_path))
|
32
34
|
result = runner.run(_GET_INTERNAL_IP_CMD,
|
33
35
|
require_outputs=True,
|
34
36
|
stream_logs=False)
|
@@ -61,7 +63,7 @@ def _filter_instances(
|
|
61
63
|
if (include_instances is not None and
|
62
64
|
instance['id'] not in include_instances):
|
63
65
|
continue
|
64
|
-
if instance.get('
|
66
|
+
if instance.get('name') in possible_names:
|
65
67
|
filtered_instances[instance['id']] = instance
|
66
68
|
return filtered_instances
|
67
69
|
|
@@ -69,7 +71,7 @@ def _filter_instances(
|
|
69
71
|
def _get_head_instance_id(instances: Dict[str, Any]) -> Optional[str]:
|
70
72
|
head_instance_id = None
|
71
73
|
for inst_id, inst in instances.items():
|
72
|
-
if inst['
|
74
|
+
if inst['name'].endswith('-head'):
|
73
75
|
head_instance_id = inst_id
|
74
76
|
break
|
75
77
|
return head_instance_id
|
@@ -79,18 +81,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
79
81
|
config: common.ProvisionConfig) -> common.ProvisionRecord:
|
80
82
|
"""Runs instances for the given cluster."""
|
81
83
|
|
82
|
-
pending_status = [
|
83
|
-
'create',
|
84
|
-
'requesting',
|
85
|
-
'provisioning',
|
86
|
-
'customizing',
|
87
|
-
'starting',
|
88
|
-
'stopping',
|
89
|
-
'start',
|
90
|
-
'stop',
|
91
|
-
'reboot',
|
92
|
-
'rebooting',
|
93
|
-
]
|
84
|
+
pending_status = ['pending', 'provisioning']
|
94
85
|
while True:
|
95
86
|
instances = _filter_instances(cluster_name_on_cloud, pending_status)
|
96
87
|
if len(instances) > config.count:
|
@@ -127,7 +118,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
127
118
|
f'{instance_name}')
|
128
119
|
rename(instance_id, instance_name)
|
129
120
|
if (instance_id != head_instance_id and
|
130
|
-
instance['
|
121
|
+
instance['name'].endswith('-head')):
|
131
122
|
# Multiple head instances exist.
|
132
123
|
# This is a rare case when the instance name was manually modified
|
133
124
|
# on the cloud or some unexpected behavior happened.
|
@@ -167,7 +158,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
167
158
|
node_type = 'head' if head_instance_id is None else 'worker'
|
168
159
|
try:
|
169
160
|
instance_ids = utils.FluidstackClient().create_instance(
|
170
|
-
|
161
|
+
name=f'{cluster_name_on_cloud}-{node_type}',
|
171
162
|
instance_type=config.node_config['InstanceType'],
|
172
163
|
ssh_pub_key=config.node_config['AuthorizedKey'],
|
173
164
|
region=region)
|
@@ -184,9 +175,6 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
184
175
|
instances = _filter_instances(cluster_name_on_cloud,
|
185
176
|
pending_status + ['running'])
|
186
177
|
if len(instances) < config.count:
|
187
|
-
# Some of pending instances have been convert to a state that will
|
188
|
-
# not convert to `running` status. This can be due to resource
|
189
|
-
# availability issue.
|
190
178
|
all_instances = _filter_instances(
|
191
179
|
cluster_name_on_cloud,
|
192
180
|
status_filters=None,
|
@@ -253,15 +241,11 @@ def terminate_instances(
|
|
253
241
|
instances = _filter_instances(cluster_name_on_cloud, None)
|
254
242
|
for inst_id, inst in instances.items():
|
255
243
|
logger.debug(f'Terminating instance {inst_id}: {inst}')
|
256
|
-
if worker_only and inst['
|
244
|
+
if worker_only and inst['name'].endswith('-head'):
|
257
245
|
continue
|
258
246
|
try:
|
259
247
|
utils.FluidstackClient().delete(inst_id)
|
260
248
|
except Exception as e: # pylint: disable=broad-except
|
261
|
-
if (isinstance(e, utils.FluidstackAPIError) and
|
262
|
-
'Machine is already terminated' in str(e)):
|
263
|
-
logger.debug(f'Instance {inst_id} is already terminated.')
|
264
|
-
continue
|
265
249
|
with ux_utils.print_exception_no_traceback():
|
266
250
|
raise RuntimeError(
|
267
251
|
f'Failed to terminate instance {inst_id}: '
|
@@ -291,7 +275,7 @@ def get_cluster_info(
|
|
291
275
|
tags={},
|
292
276
|
)
|
293
277
|
]
|
294
|
-
if instance_info['
|
278
|
+
if instance_info['name'].endswith('-head'):
|
295
279
|
head_instance_id = instance_id
|
296
280
|
|
297
281
|
return common.ClusterInfo(instances=instances,
|
@@ -311,22 +295,10 @@ def query_instances(
|
|
311
295
|
instances = _filter_instances(cluster_name_on_cloud, None)
|
312
296
|
instances = _filter_instances(cluster_name_on_cloud, None)
|
313
297
|
status_map = {
|
314
|
-
'
|
315
|
-
'requesting': status_lib.ClusterStatus.INIT,
|
316
|
-
'create': status_lib.ClusterStatus.INIT,
|
317
|
-
'customizing': status_lib.ClusterStatus.INIT,
|
318
|
-
'stopping': status_lib.ClusterStatus.STOPPED,
|
319
|
-
'stop': status_lib.ClusterStatus.STOPPED,
|
320
|
-
'start': status_lib.ClusterStatus.INIT,
|
321
|
-
'reboot': status_lib.ClusterStatus.STOPPED,
|
322
|
-
'rebooting': status_lib.ClusterStatus.STOPPED,
|
298
|
+
'pending': status_lib.ClusterStatus.INIT,
|
323
299
|
'stopped': status_lib.ClusterStatus.STOPPED,
|
324
|
-
'starting': status_lib.ClusterStatus.INIT,
|
325
300
|
'running': status_lib.ClusterStatus.UP,
|
326
|
-
'
|
327
|
-
'timeout error': status_lib.ClusterStatus.INIT,
|
328
|
-
'out of stock': status_lib.ClusterStatus.INIT,
|
329
|
-
'terminating': None,
|
301
|
+
'unhealthy': status_lib.ClusterStatus.INIT,
|
330
302
|
'terminated': None,
|
331
303
|
}
|
332
304
|
statuses: Dict[str, Optional[status_lib.ClusterStatus]] = {}
|