skypilot-nightly 1.0.0.dev20250220__py3-none-any.whl → 1.0.0.dev20250222__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +4 -2
- sky/adaptors/nebius.py +85 -0
- sky/backends/backend_utils.py +8 -0
- sky/backends/cloud_vm_ray_backend.py +10 -2
- sky/client/sdk.py +8 -3
- sky/clouds/__init__.py +2 -0
- sky/clouds/nebius.py +294 -0
- sky/clouds/service_catalog/constants.py +1 -1
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/jobs/controller.py +17 -0
- sky/jobs/server/core.py +31 -3
- sky/provision/__init__.py +1 -0
- sky/provision/kubernetes/instance.py +5 -1
- sky/provision/kubernetes/utils.py +8 -7
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +310 -0
- sky/server/common.py +1 -1
- sky/setup_files/dependencies.py +9 -1
- sky/skylet/constants.py +3 -6
- sky/task.py +6 -0
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/utils/controller_utils.py +66 -2
- {skypilot_nightly-1.0.0.dev20250220.dist-info → skypilot_nightly-1.0.0.dev20250222.dist-info}/METADATA +8 -4
- {skypilot_nightly-1.0.0.dev20250220.dist-info → skypilot_nightly-1.0.0.dev20250222.dist-info}/RECORD +31 -23
- {skypilot_nightly-1.0.0.dev20250220.dist-info → skypilot_nightly-1.0.0.dev20250222.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250220.dist-info → skypilot_nightly-1.0.0.dev20250222.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250220.dist-info → skypilot_nightly-1.0.0.dev20250222.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250220.dist-info → skypilot_nightly-1.0.0.dev20250222.dist-info}/top_level.txt +0 -0
sky/jobs/server/core.py
CHANGED
@@ -19,6 +19,7 @@ from sky import sky_logging
|
|
19
19
|
from sky import task as task_lib
|
20
20
|
from sky.backends import backend_utils
|
21
21
|
from sky.clouds.service_catalog import common as service_catalog_common
|
22
|
+
from sky.data import storage as storage_lib
|
22
23
|
from sky.jobs import constants as managed_job_constants
|
23
24
|
from sky.jobs import utils as managed_job_utils
|
24
25
|
from sky.provision import common as provision_common
|
@@ -101,9 +102,35 @@ def launch(
|
|
101
102
|
|
102
103
|
with rich_utils.safe_status(
|
103
104
|
ux_utils.spinner_message('Initializing managed job')):
|
104
|
-
|
105
|
-
|
106
|
-
|
105
|
+
|
106
|
+
local_to_controller_file_mounts = {}
|
107
|
+
|
108
|
+
if storage_lib.get_cached_enabled_storage_clouds_or_refresh():
|
109
|
+
for task_ in dag.tasks:
|
110
|
+
controller_utils.maybe_translate_local_file_mounts_and_sync_up(
|
111
|
+
task_, task_type='jobs')
|
112
|
+
|
113
|
+
else:
|
114
|
+
# We do not have any cloud storage available, so fall back to
|
115
|
+
# two-hop file_mount uploading.
|
116
|
+
# Note: we can't easily hack sync_storage_mounts() to upload
|
117
|
+
# directly to the controller, because the controller may not
|
118
|
+
# even be up yet.
|
119
|
+
for task_ in dag.tasks:
|
120
|
+
if task_.storage_mounts:
|
121
|
+
# Technically, we could convert COPY storage_mounts that
|
122
|
+
# have a local source and do not specify `store`, but we
|
123
|
+
# will not do that for now. Only plain file_mounts are
|
124
|
+
# supported.
|
125
|
+
raise exceptions.NotSupportedError(
|
126
|
+
'Cloud-based file_mounts are specified, but no cloud '
|
127
|
+
'storage is available. Please specify local '
|
128
|
+
'file_mounts only.')
|
129
|
+
|
130
|
+
# Merge file mounts from all tasks.
|
131
|
+
local_to_controller_file_mounts.update(
|
132
|
+
controller_utils.translate_local_file_mounts_to_two_hop(
|
133
|
+
task_))
|
107
134
|
|
108
135
|
with tempfile.NamedTemporaryFile(prefix=f'managed-dag-{dag.name}-',
|
109
136
|
mode='w') as f:
|
@@ -120,6 +147,7 @@ def launch(
|
|
120
147
|
vars_to_fill = {
|
121
148
|
'remote_user_yaml_path': remote_user_yaml_path,
|
122
149
|
'user_yaml_path': f.name,
|
150
|
+
'local_to_controller_file_mounts': local_to_controller_file_mounts,
|
123
151
|
'jobs_controller': controller_name,
|
124
152
|
# Note: actual cluster name will be <task.name>-<managed job ID>
|
125
153
|
'dag_name': dag.name,
|
sky/provision/__init__.py
CHANGED
@@ -20,6 +20,7 @@ from sky.provision import fluidstack
|
|
20
20
|
from sky.provision import gcp
|
21
21
|
from sky.provision import kubernetes
|
22
22
|
from sky.provision import lambda_cloud
|
23
|
+
from sky.provision import nebius
|
23
24
|
from sky.provision import oci
|
24
25
|
from sky.provision import runpod
|
25
26
|
from sky.provision import vast
|
@@ -797,7 +797,11 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
797
797
|
'value': 'present',
|
798
798
|
'effect': 'NoSchedule'
|
799
799
|
}
|
800
|
-
|
800
|
+
# Preserve existing tolerations if any
|
801
|
+
existing_tolerations = pod_spec_copy['spec'].get('tolerations', [])
|
802
|
+
pod_spec_copy['spec']['tolerations'] = existing_tolerations + [
|
803
|
+
tpu_toleration
|
804
|
+
]
|
801
805
|
|
802
806
|
return _create_namespaced_pod_with_retries(namespace, pod_spec_copy,
|
803
807
|
context)
|
@@ -1302,13 +1302,13 @@ class KubernetesInstanceType:
|
|
1302
1302
|
- Accelerators
|
1303
1303
|
The name format is "{n}CPU--{k}GB" where n is the number of vCPUs and
|
1304
1304
|
k is the amount of memory in GB. Accelerators can be specified by
|
1305
|
-
appending "--{
|
1306
|
-
|
1305
|
+
appending "--{type}:{a}" where type is the accelerator type and a
|
1306
|
+
is the number of accelerators.
|
1307
1307
|
CPU and memory can be specified as floats. Accelerator count must be int.
|
1308
1308
|
Examples:
|
1309
1309
|
- 4CPU--16GB
|
1310
1310
|
- 0.5CPU--1.5GB
|
1311
|
-
- 4CPU--16GB--
|
1311
|
+
- 4CPU--16GB--V100:1
|
1312
1312
|
"""
|
1313
1313
|
|
1314
1314
|
def __init__(self,
|
@@ -1333,13 +1333,14 @@ class KubernetesInstanceType:
|
|
1333
1333
|
# valid logical instance type name.
|
1334
1334
|
assert self.accelerator_type is not None, self.accelerator_count
|
1335
1335
|
acc_name = self.accelerator_type.replace(' ', '_')
|
1336
|
-
name += f'--{self.accelerator_count}
|
1336
|
+
name += f'--{acc_name}:{self.accelerator_count}'
|
1337
1337
|
return name
|
1338
1338
|
|
1339
1339
|
@staticmethod
|
1340
1340
|
def is_valid_instance_type(name: str) -> bool:
|
1341
1341
|
"""Returns whether the given name is a valid instance type."""
|
1342
|
-
pattern = re.compile(
|
1342
|
+
pattern = re.compile(
|
1343
|
+
r'^(\d+(\.\d+)?CPU--\d+(\.\d+)?GB)(--[\w\d-]+:\d+)?$')
|
1343
1344
|
return bool(pattern.match(name))
|
1344
1345
|
|
1345
1346
|
@classmethod
|
@@ -1354,7 +1355,7 @@ class KubernetesInstanceType:
|
|
1354
1355
|
accelerator_type | str: Type of accelerator
|
1355
1356
|
"""
|
1356
1357
|
pattern = re.compile(
|
1357
|
-
r'^(?P<cpus>\d+(\.\d+)?)CPU--(?P<memory>\d+(\.\d+)?)GB(?:--(?P<
|
1358
|
+
r'^(?P<cpus>\d+(\.\d+)?)CPU--(?P<memory>\d+(\.\d+)?)GB(?:--(?P<accelerator_type>[\w\d-]+):(?P<accelerator_count>\d+))?$' # pylint: disable=line-too-long
|
1358
1359
|
)
|
1359
1360
|
match = pattern.match(name)
|
1360
1361
|
if match:
|
@@ -1400,7 +1401,7 @@ class KubernetesInstanceType:
|
|
1400
1401
|
# Round up accelerator_count if it is not an int.
|
1401
1402
|
accelerator_count = math.ceil(accelerator_count)
|
1402
1403
|
if accelerator_count > 0:
|
1403
|
-
name += f'--{
|
1404
|
+
name += f'--{accelerator_type}:{accelerator_count}'
|
1404
1405
|
return cls(cpus=cpus,
|
1405
1406
|
memory=memory,
|
1406
1407
|
accelerator_count=accelerator_count,
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""Nebius provisioner for SkyPilot."""
|
2
|
+
|
3
|
+
from sky.provision.nebius.config import bootstrap_instances
|
4
|
+
from sky.provision.nebius.instance import cleanup_ports
|
5
|
+
from sky.provision.nebius.instance import get_cluster_info
|
6
|
+
from sky.provision.nebius.instance import open_ports
|
7
|
+
from sky.provision.nebius.instance import query_instances
|
8
|
+
from sky.provision.nebius.instance import run_instances
|
9
|
+
from sky.provision.nebius.instance import stop_instances
|
10
|
+
from sky.provision.nebius.instance import terminate_instances
|
11
|
+
from sky.provision.nebius.instance import wait_instances
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""Nebius configuration bootstrapping."""
|
2
|
+
|
3
|
+
from sky.provision import common
|
4
|
+
|
5
|
+
|
6
|
+
def bootstrap_instances(
|
7
|
+
region: str, cluster_name: str,
|
8
|
+
config: common.ProvisionConfig) -> common.ProvisionConfig:
|
9
|
+
"""Bootstraps instances for the given cluster."""
|
10
|
+
del region, cluster_name # unused
|
11
|
+
return config
|
@@ -0,0 +1,285 @@
|
|
1
|
+
"""Nebius instance provisioning."""
|
2
|
+
import time
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
|
+
|
5
|
+
from sky import sky_logging
|
6
|
+
from sky.provision import common
|
7
|
+
from sky.provision.nebius import utils
|
8
|
+
from sky.utils import common_utils
|
9
|
+
from sky.utils import status_lib
|
10
|
+
from sky.utils import ux_utils
|
11
|
+
|
12
|
+
PENDING_STATUS = ['STARTING', 'DELETING', 'STOPPING']
|
13
|
+
|
14
|
+
MAX_RETRIES_TO_LAUNCH = 120 # Maximum number of retries
|
15
|
+
|
16
|
+
logger = sky_logging.init_logger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
def _filter_instances(region: str,
|
20
|
+
cluster_name_on_cloud: str,
|
21
|
+
status_filters: Optional[List[str]],
|
22
|
+
head_only: bool = False) -> Dict[str, Any]:
|
23
|
+
project_id = utils.get_project_by_region(region)
|
24
|
+
instances = utils.list_instances(project_id)
|
25
|
+
filtered_instances = {}
|
26
|
+
for instance_id, instance in instances.items():
|
27
|
+
if (status_filters is not None and
|
28
|
+
instance['status'] not in status_filters):
|
29
|
+
continue
|
30
|
+
|
31
|
+
if instance['name'] and instance['name'].startswith(
|
32
|
+
f'{cluster_name_on_cloud}-'):
|
33
|
+
if head_only and instance['name'].endswith('-worker'):
|
34
|
+
continue
|
35
|
+
else:
|
36
|
+
filtered_instances[instance_id] = instance
|
37
|
+
return filtered_instances
|
38
|
+
|
39
|
+
|
40
|
+
def _get_head_instance_id(instances: Dict[str, Any]) -> Optional[str]:
|
41
|
+
head_instance_id = None
|
42
|
+
for inst_id, inst in instances.items():
|
43
|
+
if inst['name'].endswith('-head'):
|
44
|
+
head_instance_id = inst_id
|
45
|
+
break
|
46
|
+
return head_instance_id
|
47
|
+
|
48
|
+
|
49
|
+
def _wait_until_no_pending(region: str, cluster_name_on_cloud: str) -> None:
|
50
|
+
retry_count = 0
|
51
|
+
while retry_count < MAX_RETRIES_TO_LAUNCH:
|
52
|
+
instances = _filter_instances(region, cluster_name_on_cloud,
|
53
|
+
PENDING_STATUS)
|
54
|
+
if not instances:
|
55
|
+
break
|
56
|
+
logger.info(f'Waiting for {len(instances)} instances to be ready '
|
57
|
+
f'(Attempt {retry_count + 1}/{MAX_RETRIES_TO_LAUNCH}).')
|
58
|
+
time.sleep(utils.POLL_INTERVAL)
|
59
|
+
retry_count += 1
|
60
|
+
|
61
|
+
if retry_count == MAX_RETRIES_TO_LAUNCH:
|
62
|
+
raise TimeoutError(f'Exceeded maximum retries '
|
63
|
+
f'({MAX_RETRIES_TO_LAUNCH * utils.POLL_INTERVAL}'
|
64
|
+
f' seconds) while waiting for instances'
|
65
|
+
f' to be ready.')
|
66
|
+
|
67
|
+
|
68
|
+
def run_instances(region: str, cluster_name_on_cloud: str,
|
69
|
+
config: common.ProvisionConfig) -> common.ProvisionRecord:
|
70
|
+
"""Runs instances for the given cluster."""
|
71
|
+
_wait_until_no_pending(region, cluster_name_on_cloud)
|
72
|
+
running_instances = _filter_instances(region, cluster_name_on_cloud,
|
73
|
+
['RUNNING'])
|
74
|
+
head_instance_id = _get_head_instance_id(running_instances)
|
75
|
+
to_start_count = config.count - len(running_instances)
|
76
|
+
if to_start_count < 0:
|
77
|
+
raise RuntimeError(
|
78
|
+
f'Cluster {cluster_name_on_cloud} already has '
|
79
|
+
f'{len(running_instances)} nodes, but {config.count} are required.')
|
80
|
+
if to_start_count == 0:
|
81
|
+
if head_instance_id is None:
|
82
|
+
raise RuntimeError(
|
83
|
+
f'Cluster {cluster_name_on_cloud} has no head node.')
|
84
|
+
logger.info(f'Cluster {cluster_name_on_cloud} already has '
|
85
|
+
f'{len(running_instances)} nodes, no need to start more.')
|
86
|
+
return common.ProvisionRecord(provider_name='nebius',
|
87
|
+
cluster_name=cluster_name_on_cloud,
|
88
|
+
region=region,
|
89
|
+
zone=None,
|
90
|
+
head_instance_id=head_instance_id,
|
91
|
+
resumed_instance_ids=[],
|
92
|
+
created_instance_ids=[])
|
93
|
+
|
94
|
+
created_instance_ids = []
|
95
|
+
resumed_instance_ids = []
|
96
|
+
stopped_instances = _filter_instances(region, cluster_name_on_cloud,
|
97
|
+
['STOPPED'])
|
98
|
+
if config.resume_stopped_nodes and len(stopped_instances) > to_start_count:
|
99
|
+
|
100
|
+
raise RuntimeError(
|
101
|
+
'The number of running/stopped/stopping instances combined '
|
102
|
+
f'({len(stopped_instances) + len(running_instances)}) in '
|
103
|
+
f'cluster "{cluster_name_on_cloud}" is greater than the '
|
104
|
+
f'number requested by the user ({config.count}). '
|
105
|
+
'This is likely a resource leak. '
|
106
|
+
'Use "sky down" to terminate the cluster.')
|
107
|
+
|
108
|
+
for stopped_instance_id, _ in stopped_instances.items():
|
109
|
+
if to_start_count > 0:
|
110
|
+
try:
|
111
|
+
utils.start(stopped_instance_id)
|
112
|
+
resumed_instance_ids.append(stopped_instance_id)
|
113
|
+
to_start_count -= 1
|
114
|
+
if stopped_instances[stopped_instance_id]['name'].endswith(
|
115
|
+
'-head'):
|
116
|
+
head_instance_id = stopped_instance_id
|
117
|
+
except Exception as e: # pylint: disable=broad-except
|
118
|
+
logger.warning(f'Start instance error: {e}')
|
119
|
+
raise
|
120
|
+
time.sleep(utils.POLL_INTERVAL) # to avoid fake STOPPED status
|
121
|
+
logger.info(f'Started instance {stopped_instance_id}.')
|
122
|
+
|
123
|
+
for _ in range(to_start_count):
|
124
|
+
node_type = 'head' if head_instance_id is None else 'worker'
|
125
|
+
try:
|
126
|
+
platform, preset = config.node_config['InstanceType'].split('_')
|
127
|
+
instance_id = utils.launch(
|
128
|
+
cluster_name_on_cloud=cluster_name_on_cloud,
|
129
|
+
node_type=node_type,
|
130
|
+
platform=platform,
|
131
|
+
preset=preset,
|
132
|
+
region=region,
|
133
|
+
image_family=config.node_config['ImageId'],
|
134
|
+
disk_size=config.node_config['DiskSize'],
|
135
|
+
user_data=config.node_config['UserData'])
|
136
|
+
except Exception as e: # pylint: disable=broad-except
|
137
|
+
logger.warning(f'run_instances error: {e}')
|
138
|
+
raise
|
139
|
+
logger.info(f'Launched instance {instance_id}.')
|
140
|
+
created_instance_ids.append(instance_id)
|
141
|
+
if head_instance_id is None:
|
142
|
+
head_instance_id = instance_id
|
143
|
+
assert head_instance_id is not None, 'head_instance_id should not be None'
|
144
|
+
return common.ProvisionRecord(provider_name='nebius',
|
145
|
+
cluster_name=cluster_name_on_cloud,
|
146
|
+
region=region,
|
147
|
+
zone=None,
|
148
|
+
head_instance_id=head_instance_id,
|
149
|
+
resumed_instance_ids=resumed_instance_ids,
|
150
|
+
created_instance_ids=created_instance_ids)
|
151
|
+
|
152
|
+
|
153
|
+
def wait_instances(region: str, cluster_name_on_cloud: str,
|
154
|
+
state: Optional[status_lib.ClusterStatus]) -> None:
|
155
|
+
_wait_until_no_pending(region, cluster_name_on_cloud)
|
156
|
+
if state is not None:
|
157
|
+
if state == status_lib.ClusterStatus.UP:
|
158
|
+
stopped_instances = _filter_instances(region, cluster_name_on_cloud,
|
159
|
+
['STOPPED'])
|
160
|
+
if stopped_instances:
|
161
|
+
raise RuntimeError(
|
162
|
+
f'Cluster {cluster_name_on_cloud} is in UP state, but '
|
163
|
+
f'{len(stopped_instances)} instances are stopped.')
|
164
|
+
if state == status_lib.ClusterStatus.STOPPED:
|
165
|
+
running_instances = _filter_instances(region, cluster_name_on_cloud,
|
166
|
+
['RUNNIG'])
|
167
|
+
|
168
|
+
if running_instances:
|
169
|
+
raise RuntimeError(
|
170
|
+
f'Cluster {cluster_name_on_cloud} is in STOPPED state, but '
|
171
|
+
f'{len(running_instances)} instances are running.')
|
172
|
+
|
173
|
+
|
174
|
+
def stop_instances(
|
175
|
+
cluster_name_on_cloud: str,
|
176
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
177
|
+
worker_only: bool = False,
|
178
|
+
) -> None:
|
179
|
+
assert provider_config is not None
|
180
|
+
exist_instances = _filter_instances(provider_config['region'],
|
181
|
+
cluster_name_on_cloud, ['RUNNING'])
|
182
|
+
for instance in exist_instances:
|
183
|
+
if worker_only and instance.endswith('-head'):
|
184
|
+
continue
|
185
|
+
utils.stop(instance)
|
186
|
+
|
187
|
+
|
188
|
+
def terminate_instances(
|
189
|
+
cluster_name_on_cloud: str,
|
190
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
191
|
+
worker_only: bool = False,
|
192
|
+
) -> None:
|
193
|
+
"""See sky/provision/__init__.py"""
|
194
|
+
|
195
|
+
assert provider_config is not None
|
196
|
+
instances = _filter_instances(provider_config['region'],
|
197
|
+
cluster_name_on_cloud,
|
198
|
+
status_filters=None)
|
199
|
+
for inst_id, inst in instances.items():
|
200
|
+
logger.debug(f'Terminating instance {inst_id}: {inst}')
|
201
|
+
if worker_only and inst['name'].endswith('-head'):
|
202
|
+
continue
|
203
|
+
try:
|
204
|
+
utils.remove(inst_id)
|
205
|
+
except Exception as e: # pylint: disable=broad-except
|
206
|
+
with ux_utils.print_exception_no_traceback():
|
207
|
+
raise RuntimeError(
|
208
|
+
f'Failed to terminate instance {inst_id}: '
|
209
|
+
f'{common_utils.format_exception(e, use_bracket=False)}'
|
210
|
+
) from e
|
211
|
+
utils.delete_cluster(cluster_name_on_cloud, provider_config['region'])
|
212
|
+
|
213
|
+
|
214
|
+
def get_cluster_info(
|
215
|
+
region: str,
|
216
|
+
cluster_name_on_cloud: str,
|
217
|
+
provider_config: Optional[Dict[str, Any]] = None) -> common.ClusterInfo:
|
218
|
+
_wait_until_no_pending(region, cluster_name_on_cloud)
|
219
|
+
running_instances = _filter_instances(region, cluster_name_on_cloud,
|
220
|
+
['RUNNING'])
|
221
|
+
instances: Dict[str, List[common.InstanceInfo]] = {}
|
222
|
+
head_instance_id = None
|
223
|
+
for instance_id, instance_info in running_instances.items():
|
224
|
+
instances[instance_id] = [
|
225
|
+
common.InstanceInfo(
|
226
|
+
instance_id=instance_id,
|
227
|
+
internal_ip=instance_info['internal_ip'],
|
228
|
+
external_ip=instance_info['external_ip'],
|
229
|
+
tags={},
|
230
|
+
)
|
231
|
+
]
|
232
|
+
if instance_info['name'].endswith('-head'):
|
233
|
+
head_instance_id = instance_id
|
234
|
+
assert head_instance_id is not None
|
235
|
+
return common.ClusterInfo(
|
236
|
+
instances=instances,
|
237
|
+
head_instance_id=head_instance_id,
|
238
|
+
provider_name='nebius',
|
239
|
+
provider_config=provider_config,
|
240
|
+
)
|
241
|
+
|
242
|
+
|
243
|
+
def query_instances(
|
244
|
+
cluster_name_on_cloud: str,
|
245
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
246
|
+
non_terminated_only: bool = True,
|
247
|
+
) -> Dict[str, Optional[status_lib.ClusterStatus]]:
|
248
|
+
"""See sky/provision/__init__.py"""
|
249
|
+
assert provider_config is not None, (cluster_name_on_cloud, provider_config)
|
250
|
+
instances = _filter_instances(provider_config['region'],
|
251
|
+
cluster_name_on_cloud, None)
|
252
|
+
|
253
|
+
status_map = {
|
254
|
+
'STARTING': status_lib.ClusterStatus.INIT,
|
255
|
+
'RUNNING': status_lib.ClusterStatus.UP,
|
256
|
+
'STOPPED': status_lib.ClusterStatus.STOPPED,
|
257
|
+
'STOPPING': status_lib.ClusterStatus.STOPPED,
|
258
|
+
'DELETING': status_lib.ClusterStatus.STOPPED,
|
259
|
+
}
|
260
|
+
statuses: Dict[str, Optional[status_lib.ClusterStatus]] = {}
|
261
|
+
for inst_id, inst in instances.items():
|
262
|
+
status = status_map[inst['status']]
|
263
|
+
if non_terminated_only and status is None:
|
264
|
+
continue
|
265
|
+
statuses[inst_id] = status
|
266
|
+
return statuses
|
267
|
+
|
268
|
+
|
269
|
+
def open_ports(
|
270
|
+
cluster_name_on_cloud: str,
|
271
|
+
ports: List[str],
|
272
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
273
|
+
) -> None:
|
274
|
+
"""See sky/provision/__init__.py"""
|
275
|
+
logger.debug(f'Skip opening ports {ports} for Nebius instances, as all '
|
276
|
+
'ports are open by default.')
|
277
|
+
del cluster_name_on_cloud, provider_config, ports
|
278
|
+
|
279
|
+
|
280
|
+
def cleanup_ports(
|
281
|
+
cluster_name_on_cloud: str,
|
282
|
+
ports: List[str],
|
283
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
284
|
+
) -> None:
|
285
|
+
del cluster_name_on_cloud, ports, provider_config # Unused.
|