skypilot-nightly 1.0.0.dev20250615__py3-none-any.whl → 1.0.0.dev20250617__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -4
- sky/backends/cloud_vm_ray_backend.py +43 -60
- sky/cli.py +55 -637
- sky/client/cli.py +55 -637
- sky/clouds/kubernetes.py +3 -0
- sky/clouds/scp.py +7 -26
- sky/clouds/utils/scp_utils.py +177 -124
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-36bc0962129f72df.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-cf490d1fa38f3740.js +16 -0
- sky/dashboard/out/_next/static/{R07f8gwfXT1U0zRznq4Lg → vA3PPpkBwpRTRNBHFYAw_}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/jobs/controller.py +98 -31
- sky/jobs/scheduler.py +37 -29
- sky/jobs/server/core.py +36 -3
- sky/jobs/state.py +69 -9
- sky/jobs/utils.py +11 -0
- sky/provision/__init__.py +1 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +528 -0
- sky/resources.py +164 -29
- sky/skylet/constants.py +39 -0
- sky/skylet/job_lib.py +8 -0
- sky/task.py +171 -21
- sky/templates/kubernetes-ray.yml.j2 +51 -4
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/users/permission.py +19 -36
- sky/utils/command_runner.py +1 -1
- sky/utils/common_utils.py +16 -14
- sky/utils/context.py +1 -1
- sky/utils/controller_utils.py +12 -3
- sky/utils/dag_utils.py +17 -4
- sky/utils/kubernetes/deploy_remote_cluster.py +17 -8
- sky/utils/schemas.py +43 -5
- {skypilot_nightly-1.0.0.dev20250615.dist-info → skypilot_nightly-1.0.0.dev20250617.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250615.dist-info → skypilot_nightly-1.0.0.dev20250617.dist-info}/RECORD +54 -57
- sky/benchmark/__init__.py +0 -0
- sky/benchmark/benchmark_state.py +0 -295
- sky/benchmark/benchmark_utils.py +0 -641
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-59950b2f83b66e48.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b3dbf38b51cb29be.js +0 -16
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- /sky/dashboard/out/_next/static/{R07f8gwfXT1U0zRznq4Lg → vA3PPpkBwpRTRNBHFYAw_}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250615.dist-info → skypilot_nightly-1.0.0.dev20250617.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250615.dist-info → skypilot_nightly-1.0.0.dev20250617.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250615.dist-info → skypilot_nightly-1.0.0.dev20250617.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250615.dist-info → skypilot_nightly-1.0.0.dev20250617.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,528 @@
|
|
1
|
+
"""SCP instance provisioning."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import random
|
5
|
+
import string
|
6
|
+
import time
|
7
|
+
from typing import Any, Dict, List, Optional
|
8
|
+
|
9
|
+
from sky.clouds.utils import scp_utils
|
10
|
+
from sky.provision import common
|
11
|
+
from sky.utils import status_lib
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
def run_instances(region: str, cluster_name_on_cloud: str,
|
17
|
+
config: common.ProvisionConfig) -> common.ProvisionRecord:
|
18
|
+
|
19
|
+
zone_id = config.node_config['zone_id']
|
20
|
+
running_instances = _filter_instances(cluster_name_on_cloud, ['RUNNING'])
|
21
|
+
head_instance_id = _get_head_instance_id(running_instances)
|
22
|
+
|
23
|
+
to_start_count = config.count - len(running_instances)
|
24
|
+
if to_start_count < 0:
|
25
|
+
raise RuntimeError(
|
26
|
+
f'Cluster {cluster_name_on_cloud} already has '
|
27
|
+
f'{len(running_instances)} nodes, but {config.count} are required.')
|
28
|
+
|
29
|
+
if to_start_count == 0:
|
30
|
+
if head_instance_id is None:
|
31
|
+
raise RuntimeError(
|
32
|
+
f'Cluster {cluster_name_on_cloud} has no head node.')
|
33
|
+
logger.info(f'Cluster {cluster_name_on_cloud} already has '
|
34
|
+
f'{len(running_instances)} nodes, no need to start more.')
|
35
|
+
return common.ProvisionRecord(provider_name='scp',
|
36
|
+
cluster_name=cluster_name_on_cloud,
|
37
|
+
region=region,
|
38
|
+
zone=None,
|
39
|
+
head_instance_id=head_instance_id,
|
40
|
+
resumed_instance_ids=[],
|
41
|
+
created_instance_ids=[])
|
42
|
+
|
43
|
+
stopped_instances = _filter_instances(cluster_name_on_cloud, ['STOPPED'])
|
44
|
+
if to_start_count <= len(stopped_instances):
|
45
|
+
head_instance_id = _get_head_instance_id(stopped_instances)
|
46
|
+
scp_utils.SCPClient().start_instance(head_instance_id)
|
47
|
+
while True:
|
48
|
+
instance_info = scp_utils.SCPClient().get_instance_info(
|
49
|
+
head_instance_id)
|
50
|
+
if instance_info['virtualServerState'] == 'RUNNING':
|
51
|
+
break
|
52
|
+
time.sleep(2)
|
53
|
+
resumed_instance_ids = [head_instance_id]
|
54
|
+
return common.ProvisionRecord(provider_name='scp',
|
55
|
+
cluster_name=cluster_name_on_cloud,
|
56
|
+
region=region,
|
57
|
+
zone=None,
|
58
|
+
head_instance_id=head_instance_id,
|
59
|
+
resumed_instance_ids=resumed_instance_ids,
|
60
|
+
created_instance_ids=[])
|
61
|
+
|
62
|
+
# SCP does not support multi-node
|
63
|
+
instance_config = config.docker_config
|
64
|
+
instance_config['virtualServerName'] = cluster_name_on_cloud
|
65
|
+
|
66
|
+
instance_id = None
|
67
|
+
vpc_subnets = _get_or_create_vpc_subnets(zone_id)
|
68
|
+
for vpc, subnets in vpc_subnets.items():
|
69
|
+
sg_id = _create_security_group(zone_id, vpc)
|
70
|
+
if sg_id is None:
|
71
|
+
continue
|
72
|
+
try:
|
73
|
+
instance_config['securityGroupIds'] = [sg_id]
|
74
|
+
for subnet in subnets:
|
75
|
+
instance_config['nic']['subnetId'] = subnet
|
76
|
+
instance_id = _create_instance(vpc, instance_config)
|
77
|
+
if instance_id is not None:
|
78
|
+
break
|
79
|
+
except Exception as e: # pylint: disable=broad-except
|
80
|
+
_delete_security_group(sg_id)
|
81
|
+
logger.error(f'run_instances error: {e}')
|
82
|
+
continue
|
83
|
+
|
84
|
+
if instance_id is None:
|
85
|
+
raise RuntimeError('instance creation error')
|
86
|
+
|
87
|
+
if head_instance_id is None:
|
88
|
+
head_instance_id = instance_id
|
89
|
+
|
90
|
+
created_instance_ids = [instance_id]
|
91
|
+
|
92
|
+
return common.ProvisionRecord(provider_name='scp',
|
93
|
+
cluster_name=cluster_name_on_cloud,
|
94
|
+
region=region,
|
95
|
+
zone=None,
|
96
|
+
head_instance_id=head_instance_id,
|
97
|
+
resumed_instance_ids=[],
|
98
|
+
created_instance_ids=created_instance_ids)
|
99
|
+
|
100
|
+
|
101
|
+
def _get_or_create_vpc_subnets(zone_id):
|
102
|
+
while len(_get_vcp_subnets(zone_id)) == 0:
|
103
|
+
try:
|
104
|
+
response = scp_utils.SCPClient().create_vpc(zone_id)
|
105
|
+
time.sleep(5)
|
106
|
+
vpc_id = response['resourceId']
|
107
|
+
while True:
|
108
|
+
vpc_info = scp_utils.SCPClient().get_vpc_info(vpc_id)
|
109
|
+
if vpc_info['vpcState'] == 'ACTIVE':
|
110
|
+
break
|
111
|
+
else:
|
112
|
+
time.sleep(5)
|
113
|
+
|
114
|
+
response = scp_utils.SCPClient().create_subnet(vpc_id, zone_id)
|
115
|
+
time.sleep(5)
|
116
|
+
subnet_id = response['resourceId']
|
117
|
+
while True:
|
118
|
+
subnet_info = scp_utils.SCPClient().get_subnet_info(subnet_id)
|
119
|
+
if subnet_info['subnetState'] == 'ACTIVE':
|
120
|
+
break
|
121
|
+
else:
|
122
|
+
time.sleep(5)
|
123
|
+
|
124
|
+
response = scp_utils.SCPClient().create_internet_gateway(vpc_id)
|
125
|
+
time.sleep(5)
|
126
|
+
internet_gateway_id = response['resourceId']
|
127
|
+
while True:
|
128
|
+
internet_gateway_info = scp_utils.SCPClient(
|
129
|
+
).get_internet_gateway_info(internet_gateway_id)
|
130
|
+
if internet_gateway_info['internetGatewayState'] == 'ATTACHED':
|
131
|
+
break
|
132
|
+
else:
|
133
|
+
time.sleep(5)
|
134
|
+
|
135
|
+
while True:
|
136
|
+
vpc_info = scp_utils.SCPClient().get_vpc_info(vpc_id)
|
137
|
+
if vpc_info['vpcState'] == 'ACTIVE':
|
138
|
+
break
|
139
|
+
else:
|
140
|
+
time.sleep(5)
|
141
|
+
|
142
|
+
break
|
143
|
+
except Exception as e: # pylint: disable=broad-except
|
144
|
+
time.sleep(10)
|
145
|
+
logger.error(f'vpc creation error: {e}')
|
146
|
+
continue
|
147
|
+
|
148
|
+
vpc_subnets = _get_vcp_subnets(zone_id)
|
149
|
+
return vpc_subnets
|
150
|
+
|
151
|
+
|
152
|
+
def _get_vcp_subnets(zone_id):
|
153
|
+
vpc_contents = scp_utils.SCPClient().get_vpcs(zone_id)
|
154
|
+
vpc_list = [
|
155
|
+
item['vpcId'] for item in vpc_contents if item['vpcState'] == 'ACTIVE'
|
156
|
+
]
|
157
|
+
|
158
|
+
igw_contents = scp_utils.SCPClient().get_internet_gateway()
|
159
|
+
vpc_with_igw = [
|
160
|
+
item['vpcId']
|
161
|
+
for item in igw_contents
|
162
|
+
if item['internetGatewayState'] == 'ATTACHED'
|
163
|
+
]
|
164
|
+
|
165
|
+
vpc_list = [vpc for vpc in vpc_list if vpc in vpc_with_igw]
|
166
|
+
|
167
|
+
subnet_contents = scp_utils.SCPClient().get_subnets()
|
168
|
+
|
169
|
+
vpc_subnets = {}
|
170
|
+
for vpc in vpc_list:
|
171
|
+
subnet_list = [
|
172
|
+
item['subnetId']
|
173
|
+
for item in subnet_contents
|
174
|
+
if item['subnetState'] == 'ACTIVE' and item['vpcId'] == vpc
|
175
|
+
]
|
176
|
+
if subnet_list:
|
177
|
+
vpc_subnets[vpc] = subnet_list
|
178
|
+
|
179
|
+
return vpc_subnets
|
180
|
+
|
181
|
+
|
182
|
+
def _filter_instances(cluster_name_on_cloud,
|
183
|
+
status_filter: Optional[List[str]]):
|
184
|
+
instances = scp_utils.SCPClient().get_instances()
|
185
|
+
filtered_instances = []
|
186
|
+
if status_filter is not None:
|
187
|
+
for instance in instances:
|
188
|
+
if instance[
|
189
|
+
'virtualServerName'] == cluster_name_on_cloud and instance[
|
190
|
+
'virtualServerState'] in status_filter:
|
191
|
+
filtered_instances.append(instance)
|
192
|
+
return filtered_instances
|
193
|
+
else:
|
194
|
+
return instances
|
195
|
+
|
196
|
+
|
197
|
+
def _get_head_instance_id(instances):
|
198
|
+
head_instance_id = None
|
199
|
+
if len(instances) > 0:
|
200
|
+
head_instance_id = instances[0]['virtualServerId']
|
201
|
+
return head_instance_id
|
202
|
+
|
203
|
+
|
204
|
+
def _create_security_group(zone_id, vpc):
|
205
|
+
sg_name = 'sky' + ''.join(random.choices(string.ascii_lowercase, k=8))
|
206
|
+
|
207
|
+
undo_func_stack = []
|
208
|
+
try:
|
209
|
+
response = scp_utils.SCPClient().create_security_group(
|
210
|
+
zone_id, vpc, sg_name)
|
211
|
+
sg_id = response['resourceId']
|
212
|
+
undo_func_stack.append(lambda: _delete_security_group(sg_id))
|
213
|
+
while True:
|
214
|
+
sg_contents = scp_utils.SCPClient().get_security_groups(
|
215
|
+
vpc, sg_name)
|
216
|
+
sg = [
|
217
|
+
sg['securityGroupState']
|
218
|
+
for sg in sg_contents
|
219
|
+
if sg['securityGroupId'] == sg_id
|
220
|
+
]
|
221
|
+
if sg and sg[0] == 'ACTIVE':
|
222
|
+
break
|
223
|
+
time.sleep(5)
|
224
|
+
|
225
|
+
scp_utils.SCPClient().add_security_group_rule(sg_id, 'IN', None)
|
226
|
+
scp_utils.SCPClient().add_security_group_rule(sg_id, 'OUT', None)
|
227
|
+
|
228
|
+
return sg_id
|
229
|
+
except Exception as e: # pylint: disable=broad-except
|
230
|
+
_undo_functions(undo_func_stack)
|
231
|
+
logger.error(f'security group creation error: {e}')
|
232
|
+
return None
|
233
|
+
|
234
|
+
|
235
|
+
def _delete_security_group(sg_id):
|
236
|
+
scp_utils.SCPClient().delete_security_group(sg_id)
|
237
|
+
while True:
|
238
|
+
time.sleep(5)
|
239
|
+
sg_contents = scp_utils.SCPClient().get_security_groups()
|
240
|
+
sg = [
|
241
|
+
sg['securityGroupState']
|
242
|
+
for sg in sg_contents
|
243
|
+
if sg['securityGroupId'] == sg_id
|
244
|
+
]
|
245
|
+
if not sg:
|
246
|
+
break
|
247
|
+
|
248
|
+
|
249
|
+
def _undo_functions(undo_func_list):
|
250
|
+
while undo_func_list:
|
251
|
+
func = undo_func_list.pop()
|
252
|
+
func()
|
253
|
+
|
254
|
+
|
255
|
+
def _create_instance(vpc_id, instance_config):
|
256
|
+
undo_func_stack = []
|
257
|
+
try:
|
258
|
+
instance = scp_utils.SCPClient().create_instance(instance_config)
|
259
|
+
instance_id = instance['resourceId']
|
260
|
+
while True:
|
261
|
+
time.sleep(10)
|
262
|
+
instance_info = scp_utils.SCPClient().get_instance_info(instance_id)
|
263
|
+
if instance_info['virtualServerState'] == 'RUNNING':
|
264
|
+
break
|
265
|
+
undo_func_stack.append(lambda: _delete_instance(instance_id))
|
266
|
+
firewall_id = _get_firewall_id(vpc_id)
|
267
|
+
internal_ip = instance_info['ip']
|
268
|
+
in_rule_id = _add_firewall_rule(firewall_id, internal_ip, 'IN', None)
|
269
|
+
undo_func_stack.append(
|
270
|
+
lambda: _delete_firewall_rule(firewall_id, in_rule_id))
|
271
|
+
out_rule_id = _add_firewall_rule(firewall_id, internal_ip, 'OUT', None)
|
272
|
+
undo_func_stack.append(
|
273
|
+
lambda: _delete_firewall_rule(firewall_id, out_rule_id))
|
274
|
+
return instance_id
|
275
|
+
|
276
|
+
except Exception as e: # pylint: disable=broad-except
|
277
|
+
_undo_functions(undo_func_stack)
|
278
|
+
logger.error(f'instance creation error: {e}')
|
279
|
+
return None
|
280
|
+
|
281
|
+
|
282
|
+
def _delete_instance(instance_id):
|
283
|
+
scp_utils.SCPClient().terminate_instance(instance_id)
|
284
|
+
while True:
|
285
|
+
time.sleep(10)
|
286
|
+
instances = scp_utils.SCPClient().get_instances()
|
287
|
+
inst = [
|
288
|
+
instance['virtualServerId']
|
289
|
+
for instance in instances
|
290
|
+
if instance['virtualServerId'] == instance_id
|
291
|
+
]
|
292
|
+
if not inst:
|
293
|
+
break
|
294
|
+
|
295
|
+
|
296
|
+
def _get_firewall_id(vpc_id):
|
297
|
+
firewalls = scp_utils.SCPClient().get_firewalls()
|
298
|
+
firewall_id = [
|
299
|
+
firewall['firewallId']
|
300
|
+
for firewall in firewalls
|
301
|
+
if firewall['vpcId'] == vpc_id and
|
302
|
+
(firewall['firewallState'] in ['ACTIVE', 'DEPLOYING'])
|
303
|
+
][0]
|
304
|
+
return firewall_id
|
305
|
+
|
306
|
+
|
307
|
+
def _add_firewall_rule(firewall_id, internal_ip, direction,
|
308
|
+
ports: Optional[List[str]]):
|
309
|
+
attempts = 0
|
310
|
+
max_attempts = 300
|
311
|
+
|
312
|
+
while attempts < max_attempts:
|
313
|
+
try:
|
314
|
+
rule_info = scp_utils.SCPClient().add_firewall_rule(
|
315
|
+
firewall_id, internal_ip, direction, ports)
|
316
|
+
rule_id = rule_info['resourceId']
|
317
|
+
while True:
|
318
|
+
rule_info = scp_utils.SCPClient().get_firewall_rule_info(
|
319
|
+
firewall_id, rule_id)
|
320
|
+
if rule_info['ruleState'] == 'ACTIVE':
|
321
|
+
return rule_id
|
322
|
+
except Exception as e: # pylint: disable=broad-except
|
323
|
+
attempts += 1
|
324
|
+
time.sleep(10)
|
325
|
+
logger.error(f'add firewall rule error: {e}')
|
326
|
+
continue
|
327
|
+
raise RuntimeError('add firewall rule error')
|
328
|
+
|
329
|
+
|
330
|
+
def _delete_firewall_rule(firewall_id, rule_ids):
|
331
|
+
if not isinstance(rule_ids, list):
|
332
|
+
rule_ids = [rule_ids]
|
333
|
+
|
334
|
+
attempts = 0
|
335
|
+
max_attempts = 300
|
336
|
+
while attempts < max_attempts:
|
337
|
+
try:
|
338
|
+
scp_utils.SCPClient().delete_firewall_rule(firewall_id, rule_ids)
|
339
|
+
if _remaining_firewall_rule(firewall_id, rule_ids) is False:
|
340
|
+
return
|
341
|
+
except Exception as e: # pylint: disable=broad-except
|
342
|
+
attempts += 1
|
343
|
+
time.sleep(5)
|
344
|
+
logger.error(f'delete firewall rule error: {e}')
|
345
|
+
continue
|
346
|
+
raise RuntimeError('delete firewall rule error')
|
347
|
+
|
348
|
+
|
349
|
+
def _remaining_firewall_rule(firewall_id, rule_ids):
|
350
|
+
firewall_rules = scp_utils.SCPClient().get_firewall_rules(firewall_id)
|
351
|
+
for rule_id in rule_ids:
|
352
|
+
if rule_id in firewall_rules:
|
353
|
+
return True
|
354
|
+
return False
|
355
|
+
|
356
|
+
|
357
|
+
def _get_firewall_rule_ids(instance_info, firewall_id,
|
358
|
+
ports: Optional[List[str]]):
|
359
|
+
rule_ids = []
|
360
|
+
if ports is not None:
|
361
|
+
destination_ip = instance_info['ip']
|
362
|
+
rules = scp_utils.SCPClient().get_firewall_rules(firewall_id)
|
363
|
+
for rule in rules:
|
364
|
+
port_list = ','.join(rule['tcpServices'])
|
365
|
+
port = ','.join(ports)
|
366
|
+
if destination_ip == rule['destinationIpAddresses'][
|
367
|
+
0] and '0.0.0.0/0' == rule['sourceIpAddresses'][
|
368
|
+
0] and port == port_list:
|
369
|
+
rule_ids.append(rule['ruleId'])
|
370
|
+
else:
|
371
|
+
ip = instance_info['ip']
|
372
|
+
rules = scp_utils.SCPClient().get_firewall_rules(firewall_id)
|
373
|
+
for rule in rules:
|
374
|
+
if ip == rule['destinationIpAddresses'][0] and '0.0.0.0/0' == rule[
|
375
|
+
'sourceIpAddresses'][0]:
|
376
|
+
rule_ids.append(rule['ruleId'])
|
377
|
+
if ip == rule['sourceIpAddresses'][0] and '0.0.0.0/0' == rule[
|
378
|
+
'destinationIpAddresses'][0]:
|
379
|
+
rule_ids.append(rule['ruleId'])
|
380
|
+
return rule_ids
|
381
|
+
|
382
|
+
|
383
|
+
def stop_instances(
|
384
|
+
cluster_name_on_cloud: str,
|
385
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
386
|
+
worker_only: bool = False,
|
387
|
+
) -> None:
|
388
|
+
del provider_config, worker_only
|
389
|
+
instances = scp_utils.SCPClient().get_instances()
|
390
|
+
|
391
|
+
for instance in instances:
|
392
|
+
if instance['virtualServerName'] == cluster_name_on_cloud:
|
393
|
+
instance_id = instance['virtualServerId']
|
394
|
+
scp_utils.SCPClient().stop_instance(instance_id)
|
395
|
+
while True:
|
396
|
+
instance_info = scp_utils.SCPClient().get_instance_info(
|
397
|
+
instance_id)
|
398
|
+
time.sleep(2)
|
399
|
+
if instance_info['virtualServerState'] == 'STOPPED':
|
400
|
+
break
|
401
|
+
|
402
|
+
|
403
|
+
def terminate_instances(
|
404
|
+
cluster_name_on_cloud: str,
|
405
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
406
|
+
worker_only: bool = False,
|
407
|
+
) -> None:
|
408
|
+
del provider_config, worker_only
|
409
|
+
instances = scp_utils.SCPClient().get_instances()
|
410
|
+
|
411
|
+
for instance in instances:
|
412
|
+
if instance['virtualServerName'] == cluster_name_on_cloud:
|
413
|
+
try:
|
414
|
+
instance_id = instance['virtualServerId']
|
415
|
+
instance_info = scp_utils.SCPClient().get_instance_info(
|
416
|
+
instance_id)
|
417
|
+
vpc_id = instance_info['vpcId']
|
418
|
+
sg_id = instance_info['securityGroupIds'][0]['securityGroupId']
|
419
|
+
firewall_id = _get_firewall_id(vpc_id)
|
420
|
+
rule_ids = _get_firewall_rule_ids(instance_info, firewall_id,
|
421
|
+
None)
|
422
|
+
_delete_firewall_rule(firewall_id, rule_ids)
|
423
|
+
_delete_instance(instance_id)
|
424
|
+
_delete_security_group(sg_id)
|
425
|
+
except Exception as e: # pylint: disable=broad-except
|
426
|
+
logger.error(f'terminate_instances error: {e}')
|
427
|
+
|
428
|
+
|
429
|
+
def query_instances(
|
430
|
+
cluster_name_on_cloud: str,
|
431
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
432
|
+
non_terminated_only: bool = True,
|
433
|
+
) -> Dict[str, Optional[status_lib.ClusterStatus]]:
|
434
|
+
|
435
|
+
assert provider_config is not None, (cluster_name_on_cloud, provider_config)
|
436
|
+
instances = _filter_instances(cluster_name_on_cloud, None)
|
437
|
+
|
438
|
+
status_map = {
|
439
|
+
'CREATING': status_lib.ClusterStatus.INIT,
|
440
|
+
'EDITING': status_lib.ClusterStatus.INIT,
|
441
|
+
'RUNNING': status_lib.ClusterStatus.UP,
|
442
|
+
'STARTING': status_lib.ClusterStatus.INIT,
|
443
|
+
'RESTARTING': status_lib.ClusterStatus.INIT,
|
444
|
+
'STOPPING': status_lib.ClusterStatus.STOPPED,
|
445
|
+
'STOPPED': status_lib.ClusterStatus.STOPPED,
|
446
|
+
'TERMINATING': None,
|
447
|
+
'TERMINATED': None,
|
448
|
+
}
|
449
|
+
|
450
|
+
statuses = {}
|
451
|
+
for instance in instances:
|
452
|
+
status = status_map[instance['virtualServerState']]
|
453
|
+
if non_terminated_only and status is None:
|
454
|
+
continue
|
455
|
+
statuses[instance['virtualServerId']] = status
|
456
|
+
return statuses
|
457
|
+
|
458
|
+
|
459
|
+
def wait_instances(region: str, cluster_name_on_cloud: str, state: str) -> None:
|
460
|
+
del region, cluster_name_on_cloud, state
|
461
|
+
|
462
|
+
|
463
|
+
def get_cluster_info(
|
464
|
+
region: str,
|
465
|
+
cluster_name_on_cloud: str,
|
466
|
+
provider_config: Optional[Dict[str, Any]] = None) -> common.ClusterInfo:
|
467
|
+
del region
|
468
|
+
|
469
|
+
running_instances = _filter_instances(cluster_name_on_cloud, ['RUNNING'])
|
470
|
+
head_instance_id = _get_head_instance_id(running_instances)
|
471
|
+
|
472
|
+
instances = {}
|
473
|
+
for instance in running_instances:
|
474
|
+
instances[instance['virtualServerId']] = [
|
475
|
+
common.InstanceInfo(
|
476
|
+
instance_id=instance['virtualServerId'],
|
477
|
+
internal_ip=instance['ip'],
|
478
|
+
external_ip=scp_utils.SCPClient().get_external_ip(
|
479
|
+
instance['virtualServerId'], instance['ip']),
|
480
|
+
tags={})
|
481
|
+
]
|
482
|
+
|
483
|
+
return common.ClusterInfo(
|
484
|
+
instances=instances,
|
485
|
+
head_instance_id=head_instance_id,
|
486
|
+
provider_name='scp',
|
487
|
+
provider_config=provider_config,
|
488
|
+
)
|
489
|
+
|
490
|
+
|
491
|
+
def open_ports(
|
492
|
+
cluster_name_on_cloud: str,
|
493
|
+
ports: List[str],
|
494
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
495
|
+
) -> None:
|
496
|
+
|
497
|
+
del provider_config
|
498
|
+
instances = scp_utils.SCPClient().get_instances()
|
499
|
+
|
500
|
+
for instance in instances:
|
501
|
+
if instance['virtualServerName'] == cluster_name_on_cloud:
|
502
|
+
instance_info = scp_utils.SCPClient().get_instance_info(
|
503
|
+
instance['virtualServerId'])
|
504
|
+
sg_id = instance_info['securityGroupIds'][0]['securityGroupId']
|
505
|
+
scp_utils.SCPClient().add_security_group_rule(sg_id, 'IN', ports)
|
506
|
+
vpc_id = instance_info['vpcId']
|
507
|
+
internal_ip = instance_info['ip']
|
508
|
+
firewall_id = _get_firewall_id(vpc_id)
|
509
|
+
_add_firewall_rule(firewall_id, internal_ip, 'IN', ports)
|
510
|
+
|
511
|
+
|
512
|
+
def cleanup_ports(
|
513
|
+
cluster_name_on_cloud: str,
|
514
|
+
ports: List[str],
|
515
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
516
|
+
) -> None:
|
517
|
+
|
518
|
+
del provider_config
|
519
|
+
instances = scp_utils.SCPClient().get_instances()
|
520
|
+
|
521
|
+
for instance in instances:
|
522
|
+
if instance['virtualServerName'] == cluster_name_on_cloud:
|
523
|
+
instance_info = scp_utils.SCPClient().get_instance_info(
|
524
|
+
instance['virtualServerId'])
|
525
|
+
vpc_id = instance_info['vpcId']
|
526
|
+
firewall_id = _get_firewall_id(vpc_id)
|
527
|
+
rule_ids = _get_firewall_rule_ids(instance_info, firewall_id, ports)
|
528
|
+
_delete_firewall_rule(firewall_id, rule_ids)
|