skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/provision/aws/config.py
CHANGED
@@ -8,7 +8,6 @@ _default_ec2_resource() to avoid version mismatch issues.
|
|
8
8
|
# https://github.com/ray-project/ray/tree/ray-2.0.1/python/ray/autoscaler/_private/aws/config.py
|
9
9
|
# Git commit of the release 2.0.1: 03b6bc7b5a305877501110ec04710a9c57011479
|
10
10
|
import copy
|
11
|
-
import functools
|
12
11
|
import json
|
13
12
|
import logging
|
14
13
|
import time
|
@@ -16,10 +15,13 @@ from typing import Any, Dict, List, Optional, Set, Tuple
|
|
16
15
|
|
17
16
|
import colorama
|
18
17
|
|
18
|
+
from sky import exceptions
|
19
19
|
from sky import sky_logging
|
20
20
|
from sky.adaptors import aws
|
21
21
|
from sky.provision import common
|
22
22
|
from sky.provision.aws import utils
|
23
|
+
from sky.utils import annotations
|
24
|
+
from sky.utils import common_utils
|
23
25
|
|
24
26
|
logger = sky_logging.init_logger(__name__)
|
25
27
|
|
@@ -40,8 +42,9 @@ def _skypilot_log_error_and_exit_for_failover(error: str) -> None:
|
|
40
42
|
Mainly used for handling VPC/subnet errors before nodes are launched.
|
41
43
|
"""
|
42
44
|
# NOTE: keep. The backend looks for this to know no nodes are launched.
|
43
|
-
|
44
|
-
|
45
|
+
full_error = f'SKYPILOT_ERROR_NO_NODES_LAUNCHED: {error}'
|
46
|
+
logger.error(full_error)
|
47
|
+
raise RuntimeError(full_error)
|
45
48
|
|
46
49
|
|
47
50
|
def bootstrap_instances(
|
@@ -191,16 +194,56 @@ def _configure_iam_role(iam) -> Dict[str, Any]:
|
|
191
194
|
for policy_arn in attach_policy_arns:
|
192
195
|
role.attach_policy(PolicyArn=policy_arn)
|
193
196
|
|
197
|
+
# SkyPilot: 'PassRole' is required by the controllers (jobs and
|
198
|
+
# services) created with `aws.remote_identity: SERVICE_ACCOUNT` to
|
199
|
+
# create instances with the IAM role.
|
200
|
+
skypilot_pass_role_policy_doc = {
|
201
|
+
'Statement': [
|
202
|
+
{
|
203
|
+
'Effect': 'Allow',
|
204
|
+
'Action': [
|
205
|
+
'iam:GetRole',
|
206
|
+
'iam:PassRole',
|
207
|
+
],
|
208
|
+
'Resource': role.arn,
|
209
|
+
},
|
210
|
+
{
|
211
|
+
'Effect': 'Allow',
|
212
|
+
'Action': 'iam:GetInstanceProfile',
|
213
|
+
'Resource': profile.arn,
|
214
|
+
},
|
215
|
+
]
|
216
|
+
}
|
217
|
+
role.Policy('SkyPilotPassRolePolicy').put(
|
218
|
+
PolicyDocument=json.dumps(skypilot_pass_role_policy_doc))
|
219
|
+
|
194
220
|
profile.add_role(RoleName=role.name)
|
195
221
|
time.sleep(15) # wait for propagation
|
196
222
|
return {'Arn': profile.arn}
|
197
223
|
|
198
224
|
|
199
|
-
@
|
200
|
-
def _get_route_tables(ec2, vpc_id: Optional[str],
|
225
|
+
@annotations.lru_cache(scope='request', maxsize=128) # Keep bounded.
|
226
|
+
def _get_route_tables(ec2, vpc_id: Optional[str], region: str,
|
227
|
+
main: bool) -> List[Any]:
|
228
|
+
"""Get route tables associated with a VPC and region
|
229
|
+
|
230
|
+
Args:
|
231
|
+
ec2: ec2 resource object
|
232
|
+
vpc_id: vpc_id is optional, if not provided, all route tables in the
|
233
|
+
region will be returned
|
234
|
+
region: region is mandatory to allow the lru cache
|
235
|
+
to return the corect results
|
236
|
+
main: if True, only main route tables will be returned otherwise
|
237
|
+
only non-main route tables will be returned
|
238
|
+
|
239
|
+
Returns:
|
240
|
+
A list of route tables associated with the options VPC and region
|
241
|
+
"""
|
201
242
|
filters = [{'Name': 'association.main', 'Values': [str(main).lower()]}]
|
202
243
|
if vpc_id is not None:
|
203
244
|
filters.append({'Name': 'vpc-id', 'Values': [vpc_id]})
|
245
|
+
logger.debug(
|
246
|
+
f'Getting route tables with filters: {filters} in region: {region}')
|
204
247
|
return ec2.meta.client.describe_route_tables(Filters=filters).get(
|
205
248
|
'RouteTables', [])
|
206
249
|
|
@@ -213,7 +256,8 @@ def _is_subnet_public(ec2, subnet_id, vpc_id: Optional[str]) -> bool:
|
|
213
256
|
https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html
|
214
257
|
"""
|
215
258
|
# Get the route tables associated with the subnet
|
216
|
-
|
259
|
+
region = ec2.meta.client.meta.region_name
|
260
|
+
all_route_tables = _get_route_tables(ec2, vpc_id, region, main=False)
|
217
261
|
route_tables = [
|
218
262
|
rt for rt in all_route_tables
|
219
263
|
# An RT can be associated with multiple subnets, i.e.,
|
@@ -235,14 +279,15 @@ def _is_subnet_public(ec2, subnet_id, vpc_id: Optional[str]) -> bool:
|
|
235
279
|
logger.debug(f'subnet {subnet_id} route tables: {route_tables}')
|
236
280
|
if _has_igw_route(route_tables):
|
237
281
|
return True
|
238
|
-
if
|
282
|
+
if route_tables:
|
239
283
|
return False
|
240
284
|
|
241
285
|
# Handle the case that a "main" route table is implicitly associated with
|
242
286
|
# subnets. Since the associations are implicit, the filter above won't find
|
243
287
|
# any. Check there exists a main route table with routes pointing to an IGW.
|
244
288
|
logger.debug('Checking main route table')
|
245
|
-
|
289
|
+
region = ec2.meta.client.meta.region_name
|
290
|
+
main_route_tables = _get_route_tables(ec2, vpc_id, region, main=True)
|
246
291
|
return _has_igw_route(main_route_tables)
|
247
292
|
|
248
293
|
|
@@ -338,10 +383,13 @@ def _usable_subnets(
|
|
338
383
|
raise exc
|
339
384
|
|
340
385
|
if not subnets:
|
386
|
+
vpc_msg = (f'Does a default VPC exist in region '
|
387
|
+
f'{ec2.meta.client.meta.region_name}? ') if (
|
388
|
+
vpc_id_of_sg is None) else ''
|
341
389
|
_skypilot_log_error_and_exit_for_failover(
|
342
|
-
'No usable subnets found
|
343
|
-
'manually creating an instance in your specified region to '
|
344
|
-
'populate the list of subnets and
|
390
|
+
f'No usable subnets found. {vpc_msg}'
|
391
|
+
'Try manually creating an instance in your specified region to '
|
392
|
+
'populate the list of subnets and try again. '
|
345
393
|
'Note that the subnet must map public IPs '
|
346
394
|
'on instance launch unless you set `use_internal_ips: true` in '
|
347
395
|
'the `provider` config.')
|
@@ -409,7 +457,7 @@ def _vpc_id_from_security_group_ids(ec2, sg_ids: List[str]) -> Any:
|
|
409
457
|
|
410
458
|
no_sg_msg = ('Failed to detect a security group with id equal to any of '
|
411
459
|
'the configured SecurityGroupIds.')
|
412
|
-
assert
|
460
|
+
assert vpc_ids, no_sg_msg
|
413
461
|
|
414
462
|
return vpc_ids[0]
|
415
463
|
|
@@ -450,6 +498,11 @@ def _get_subnet_and_vpc_id(ec2, security_group_ids: Optional[List[str]],
|
|
450
498
|
vpc_id_of_sg = None
|
451
499
|
|
452
500
|
all_subnets = list(ec2.subnets.all())
|
501
|
+
# If no VPC is specified, use the default VPC.
|
502
|
+
# We filter only for default VPCs to avoid using subnets that users may
|
503
|
+
# not want SkyPilot to use.
|
504
|
+
if vpc_id_of_sg is None:
|
505
|
+
all_subnets = [s for s in all_subnets if s.vpc.is_default]
|
453
506
|
subnets, vpc_id = _usable_subnets(
|
454
507
|
ec2,
|
455
508
|
user_specified_subnets=None,
|
@@ -500,47 +553,76 @@ def _configure_security_group(ec2, vpc_id: str, expected_sg_name: str,
|
|
500
553
|
|
501
554
|
def _get_or_create_vpc_security_group(ec2, vpc_id: str,
|
502
555
|
expected_sg_name: str) -> Any:
|
503
|
-
|
504
|
-
vpc_to_existing_sg = {
|
505
|
-
sg.vpc_id: sg for sg in _get_security_groups_from_vpc_ids(
|
506
|
-
ec2,
|
507
|
-
[vpc_id],
|
508
|
-
[expected_sg_name],
|
509
|
-
)
|
510
|
-
}
|
556
|
+
"""Find or create a security group in the specified VPC.
|
511
557
|
|
512
|
-
|
513
|
-
|
558
|
+
Args:
|
559
|
+
ec2: The initialized EC2 client object.
|
560
|
+
vpc_id: The ID of the VPC where the security group should be queried
|
561
|
+
or created.
|
562
|
+
expected_sg_name: The expected name of the security group.
|
514
563
|
|
515
|
-
|
516
|
-
|
517
|
-
Description='Auto-created security group for Ray workers',
|
518
|
-
GroupName=expected_sg_name,
|
519
|
-
VpcId=vpc_id,
|
520
|
-
)
|
521
|
-
security_group = _get_security_groups_from_vpc_ids(ec2, [vpc_id],
|
522
|
-
[expected_sg_name])
|
564
|
+
Returns:
|
565
|
+
The security group object containing the details of the security group.
|
523
566
|
|
524
|
-
|
525
|
-
|
567
|
+
Raises:
|
568
|
+
exceptions.NoClusterLaunchedError: If the security group creation fails
|
569
|
+
and is not due to an existing duplicate.
|
570
|
+
botocore.exceptions.ClientError: If the security group creation fails
|
571
|
+
due to AWS service issues.
|
572
|
+
"""
|
573
|
+
# Figure out which security groups with this name exist for each VPC...
|
574
|
+
security_group = _get_security_group_from_vpc_id(ec2, vpc_id,
|
575
|
+
expected_sg_name)
|
576
|
+
if security_group is not None:
|
577
|
+
return security_group
|
526
578
|
|
579
|
+
try:
|
580
|
+
# create a new security group
|
581
|
+
ec2.meta.client.create_security_group(
|
582
|
+
Description='Auto-created security group for Ray workers',
|
583
|
+
GroupName=expected_sg_name,
|
584
|
+
VpcId=vpc_id,
|
585
|
+
)
|
586
|
+
except ec2.meta.client.exceptions.ClientError as e:
|
587
|
+
if e.response['Error']['Code'] == 'InvalidGroup.Duplicate':
|
588
|
+
# The security group already exists, but we didn't see it
|
589
|
+
# because of eventual consistency.
|
590
|
+
logger.warning(f'{expected_sg_name} already exists when creating.')
|
591
|
+
security_group = _get_security_group_from_vpc_id(
|
592
|
+
ec2, vpc_id, expected_sg_name)
|
593
|
+
assert (security_group is not None and
|
594
|
+
security_group.group_name == expected_sg_name), (
|
595
|
+
f'Expected {expected_sg_name} but got {security_group}')
|
596
|
+
logger.info(
|
597
|
+
f'Found existing security group {colorama.Style.BRIGHT}'
|
598
|
+
f'{security_group.group_name}{colorama.Style.RESET_ALL} '
|
599
|
+
f'[id={security_group.id}]')
|
600
|
+
return security_group
|
601
|
+
message = ('Failed to create security group. Error: '
|
602
|
+
f'{common_utils.format_exception(e)}')
|
603
|
+
logger.warning(message)
|
604
|
+
raise exceptions.NoClusterLaunchedError(message) from e
|
605
|
+
|
606
|
+
security_group = _get_security_group_from_vpc_id(ec2, vpc_id,
|
607
|
+
expected_sg_name)
|
608
|
+
assert security_group is not None, 'Failed to create security group'
|
527
609
|
logger.info(f'Created new security group {colorama.Style.BRIGHT}'
|
528
610
|
f'{security_group.group_name}{colorama.Style.RESET_ALL} '
|
529
611
|
f'[id={security_group.id}]')
|
530
612
|
return security_group
|
531
613
|
|
532
614
|
|
533
|
-
def
|
534
|
-
|
535
|
-
|
536
|
-
unique_group_names = set(group_names)
|
537
|
-
|
615
|
+
def _get_security_group_from_vpc_id(ec2, vpc_id: str,
|
616
|
+
group_name: str) -> Optional[Any]:
|
617
|
+
"""Get security group by VPC ID and group name."""
|
538
618
|
existing_groups = list(
|
539
619
|
ec2.security_groups.filter(Filters=[{
|
540
620
|
'Name': 'vpc-id',
|
541
|
-
'Values':
|
621
|
+
'Values': [vpc_id]
|
542
622
|
}]))
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
623
|
+
|
624
|
+
for sg in existing_groups:
|
625
|
+
if sg.group_name == group_name:
|
626
|
+
return sg
|
627
|
+
|
628
|
+
return None
|
sky/provision/aws/instance.py
CHANGED
@@ -12,24 +12,21 @@ import time
|
|
12
12
|
from typing import Any, Callable, Dict, List, Optional, Set, TypeVar
|
13
13
|
|
14
14
|
from sky import sky_logging
|
15
|
-
from sky import status_lib
|
16
15
|
from sky.adaptors import aws
|
17
16
|
from sky.clouds import aws as aws_cloud
|
17
|
+
from sky.clouds.utils import aws_utils
|
18
18
|
from sky.provision import common
|
19
|
+
from sky.provision import constants
|
19
20
|
from sky.provision.aws import utils
|
20
21
|
from sky.utils import common_utils
|
21
22
|
from sky.utils import resources_utils
|
23
|
+
from sky.utils import status_lib
|
22
24
|
from sky.utils import ux_utils
|
23
25
|
|
24
26
|
logger = sky_logging.init_logger(__name__)
|
25
27
|
|
26
28
|
_T = TypeVar('_T')
|
27
29
|
|
28
|
-
# Tag uniquely identifying all nodes of a cluster
|
29
|
-
TAG_RAY_CLUSTER_NAME = 'ray-cluster-name'
|
30
|
-
TAG_SKYPILOT_CLUSTER_NAME = 'skypilot-cluster-name'
|
31
|
-
TAG_RAY_NODE_KIND = 'ray-node-type' # legacy tag for backward compatibility
|
32
|
-
TAG_SKYPILOT_HEAD_NODE = 'skypilot-head-node'
|
33
30
|
# Max retries for general AWS API calls.
|
34
31
|
BOTO_MAX_RETRIES = 12
|
35
32
|
# Max retries for creating an instance.
|
@@ -58,7 +55,7 @@ _RESUME_PER_INSTANCE_TIMEOUT = 120 # 2 minutes
|
|
58
55
|
# https://aws.amazon.com/ec2/pricing/on-demand/#Data_Transfer_within_the_same_AWS_Region
|
59
56
|
|
60
57
|
|
61
|
-
def _default_ec2_resource(region: str) -> Any:
|
58
|
+
def _default_ec2_resource(region: str, check_credentials: bool = True) -> Any:
|
62
59
|
if not hasattr(aws, 'version'):
|
63
60
|
# For backward compatibility, reload the module if the aws module was
|
64
61
|
# imported before and stale. Used for, e.g., a live jobs controller
|
@@ -98,12 +95,13 @@ def _default_ec2_resource(region: str) -> Any:
|
|
98
95
|
importlib.reload(aws)
|
99
96
|
return aws.resource('ec2',
|
100
97
|
region_name=region,
|
101
|
-
max_attempts=BOTO_MAX_RETRIES
|
98
|
+
max_attempts=BOTO_MAX_RETRIES,
|
99
|
+
check_credentials=check_credentials)
|
102
100
|
|
103
101
|
|
104
102
|
def _cluster_name_filter(cluster_name_on_cloud: str) -> List[Dict[str, Any]]:
|
105
103
|
return [{
|
106
|
-
'Name': f'tag:{TAG_RAY_CLUSTER_NAME}',
|
104
|
+
'Name': f'tag:{constants.TAG_RAY_CLUSTER_NAME}',
|
107
105
|
'Values': [cluster_name_on_cloud],
|
108
106
|
}]
|
109
107
|
|
@@ -181,8 +179,8 @@ def _create_instances(ec2_fail_fast, cluster_name: str,
|
|
181
179
|
count: int, associate_public_ip_address: bool) -> List:
|
182
180
|
tags = {
|
183
181
|
'Name': cluster_name,
|
184
|
-
TAG_RAY_CLUSTER_NAME: cluster_name,
|
185
|
-
TAG_SKYPILOT_CLUSTER_NAME: cluster_name,
|
182
|
+
constants.TAG_RAY_CLUSTER_NAME: cluster_name,
|
183
|
+
constants.TAG_SKYPILOT_CLUSTER_NAME: cluster_name,
|
186
184
|
**tags
|
187
185
|
}
|
188
186
|
conf = node_config.copy()
|
@@ -212,6 +210,8 @@ def _create_instances(ec2_fail_fast, cluster_name: str,
|
|
212
210
|
assert 'NetworkInterfaces' not in conf, conf
|
213
211
|
assert security_group_ids is not None, conf
|
214
212
|
|
213
|
+
logger.debug(f'Creating {count} instances with config: \n{conf}')
|
214
|
+
|
215
215
|
# NOTE: This ensures that we try ALL availability zones before
|
216
216
|
# throwing an error.
|
217
217
|
num_subnets = len(subnet_ids)
|
@@ -250,10 +250,8 @@ def _create_instances(ec2_fail_fast, cluster_name: str,
|
|
250
250
|
|
251
251
|
def _get_head_instance_id(instances: List) -> Optional[str]:
|
252
252
|
head_instance_id = None
|
253
|
-
head_node_markers = (
|
254
|
-
|
255
|
-
(TAG_RAY_NODE_KIND, 'head'), # backward compat with Ray
|
256
|
-
)
|
253
|
+
head_node_markers = tuple(constants.HEAD_NODE_TAGS.items())
|
254
|
+
|
257
255
|
for inst in instances:
|
258
256
|
for t in inst.tags:
|
259
257
|
if (t['Key'], t['Value']) in head_node_markers:
|
@@ -288,7 +286,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
288
286
|
'Name': 'instance-state-name',
|
289
287
|
'Values': ['pending', 'running', 'stopping', 'stopped'],
|
290
288
|
}, {
|
291
|
-
'Name': f'tag:{TAG_RAY_CLUSTER_NAME}',
|
289
|
+
'Name': f'tag:{constants.TAG_RAY_CLUSTER_NAME}',
|
292
290
|
'Values': [cluster_name_on_cloud],
|
293
291
|
}]
|
294
292
|
exist_instances = list(ec2.instances.filter(Filters=filters))
|
@@ -314,31 +312,27 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
314
312
|
raise RuntimeError(f'Impossible state "{state}".')
|
315
313
|
|
316
314
|
def _create_node_tag(target_instance, is_head: bool = True) -> str:
|
315
|
+
node_type_tags = (constants.HEAD_NODE_TAGS
|
316
|
+
if is_head else constants.WORKER_NODE_TAGS)
|
317
|
+
node_tag = [{'Key': k, 'Value': v} for k, v in node_type_tags.items()]
|
317
318
|
if is_head:
|
318
|
-
node_tag
|
319
|
-
'Key': TAG_SKYPILOT_HEAD_NODE,
|
320
|
-
'Value': '1'
|
321
|
-
}, {
|
322
|
-
'Key': TAG_RAY_NODE_KIND,
|
323
|
-
'Value': 'head'
|
324
|
-
}, {
|
319
|
+
node_tag.append({
|
325
320
|
'Key': 'Name',
|
326
321
|
'Value': f'sky-{cluster_name_on_cloud}-head'
|
327
|
-
}
|
322
|
+
})
|
328
323
|
else:
|
329
|
-
node_tag
|
330
|
-
'Key': TAG_SKYPILOT_HEAD_NODE,
|
331
|
-
'Value': '0'
|
332
|
-
}, {
|
333
|
-
'Key': TAG_RAY_NODE_KIND,
|
334
|
-
'Value': 'worker'
|
335
|
-
}, {
|
324
|
+
node_tag.append({
|
336
325
|
'Key': 'Name',
|
337
326
|
'Value': f'sky-{cluster_name_on_cloud}-worker'
|
338
|
-
}
|
327
|
+
})
|
328
|
+
# Remove AWS internal tags, as they are not allowed to be set by users.
|
329
|
+
target_instance_tags = [
|
330
|
+
tag for tag in target_instance.tags
|
331
|
+
if not tag['Key'].startswith('aws:')
|
332
|
+
]
|
339
333
|
ec2.meta.client.create_tags(
|
340
334
|
Resources=[target_instance.id],
|
341
|
-
Tags=
|
335
|
+
Tags=target_instance_tags + node_tag,
|
342
336
|
)
|
343
337
|
return target_instance.id
|
344
338
|
|
@@ -444,19 +438,87 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
444
438
|
head_instance_id = _create_node_tag(resumed_instances[0])
|
445
439
|
|
446
440
|
if to_start_count > 0:
|
441
|
+
target_reservation_names = (config.node_config.get(
|
442
|
+
'CapacityReservationSpecification',
|
443
|
+
{}).get('CapacityReservationTarget',
|
444
|
+
{}).get('CapacityReservationId', []))
|
445
|
+
created_instances = []
|
446
|
+
if target_reservation_names:
|
447
|
+
node_config = copy.deepcopy(config.node_config)
|
448
|
+
# Clear the capacity reservation specification settings in the
|
449
|
+
# original node config, as we will create instances with
|
450
|
+
# reservations with specific settings for each reservation.
|
451
|
+
node_config['CapacityReservationSpecification'] = {
|
452
|
+
'CapacityReservationTarget': {}
|
453
|
+
}
|
454
|
+
|
455
|
+
reservations = aws_utils.list_reservations_for_instance_type(
|
456
|
+
node_config['InstanceType'], region=region)
|
457
|
+
# Filter the reservations by the user-specified ones, because
|
458
|
+
# reservations contain 'open' reservations as well, which do not
|
459
|
+
# need to explicitly specify in the config for creating instances.
|
460
|
+
target_reservations = []
|
461
|
+
for r in reservations:
|
462
|
+
if (r.targeted and r.name in target_reservation_names):
|
463
|
+
target_reservations.append(r)
|
464
|
+
logger.debug(f'Reservations: {reservations}')
|
465
|
+
logger.debug(f'Target reservations: {target_reservations}')
|
466
|
+
|
467
|
+
target_reservations_list = sorted(
|
468
|
+
target_reservations,
|
469
|
+
key=lambda x: x.available_resources,
|
470
|
+
reverse=True)
|
471
|
+
for r in target_reservations_list:
|
472
|
+
if r.available_resources <= 0:
|
473
|
+
# We have sorted the reservations by the available
|
474
|
+
# resources, so if the reservation is not available, the
|
475
|
+
# following reservations are not available either.
|
476
|
+
break
|
477
|
+
reservation_count = min(r.available_resources, to_start_count)
|
478
|
+
logger.debug(f'Creating {reservation_count} instances '
|
479
|
+
f'with reservation {r.name}')
|
480
|
+
node_config['CapacityReservationSpecification'][
|
481
|
+
'CapacityReservationTarget'] = {
|
482
|
+
'CapacityReservationId': r.name
|
483
|
+
}
|
484
|
+
if r.type == aws_utils.ReservationType.BLOCK:
|
485
|
+
# Capacity block reservations needs to specify the market
|
486
|
+
# type during instance creation.
|
487
|
+
node_config['InstanceMarketOptions'] = {
|
488
|
+
'MarketType': aws_utils.ReservationType.BLOCK.value
|
489
|
+
}
|
490
|
+
created_reserved_instances = _create_instances(
|
491
|
+
ec2_fail_fast,
|
492
|
+
cluster_name_on_cloud,
|
493
|
+
node_config,
|
494
|
+
tags,
|
495
|
+
reservation_count,
|
496
|
+
associate_public_ip_address=(
|
497
|
+
not config.provider_config['use_internal_ips']))
|
498
|
+
created_instances.extend(created_reserved_instances)
|
499
|
+
to_start_count -= reservation_count
|
500
|
+
if to_start_count <= 0:
|
501
|
+
break
|
502
|
+
|
447
503
|
# TODO(suquark): If there are existing instances (already running or
|
448
504
|
# resumed), then we cannot guarantee that they will be in the same
|
449
505
|
# availability zone (when there are multiple zones specified).
|
450
506
|
# This is a known issue before.
|
451
507
|
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
config.node_config,
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
508
|
+
if to_start_count > 0:
|
509
|
+
# Remove the capacity reservation specification from the node config
|
510
|
+
# as we have already created the instances with the reservations.
|
511
|
+
config.node_config.get('CapacityReservationSpecification',
|
512
|
+
{}).pop('CapacityReservationTarget', None)
|
513
|
+
created_remaining_instances = _create_instances(
|
514
|
+
ec2_fail_fast,
|
515
|
+
cluster_name_on_cloud,
|
516
|
+
config.node_config,
|
517
|
+
tags,
|
518
|
+
to_start_count,
|
519
|
+
associate_public_ip_address=(
|
520
|
+
not config.provider_config['use_internal_ips']))
|
521
|
+
created_instances.extend(created_remaining_instances)
|
460
522
|
created_instances.sort(key=lambda x: x.id)
|
461
523
|
|
462
524
|
created_instance_ids = [n.id for n in created_instances]
|
@@ -563,7 +625,7 @@ def stop_instances(
|
|
563
625
|
]
|
564
626
|
if worker_only:
|
565
627
|
filters.append({
|
566
|
-
'Name': f'tag:{TAG_RAY_NODE_KIND}',
|
628
|
+
'Name': f'tag:{constants.TAG_RAY_NODE_KIND}',
|
567
629
|
'Values': ['worker'],
|
568
630
|
})
|
569
631
|
instances = _filter_instances(ec2,
|
@@ -601,7 +663,7 @@ def terminate_instances(
|
|
601
663
|
]
|
602
664
|
if worker_only:
|
603
665
|
filters.append({
|
604
|
-
'Name': f'tag:{TAG_RAY_NODE_KIND}',
|
666
|
+
'Name': f'tag:{constants.TAG_RAY_NODE_KIND}',
|
605
667
|
'Values': ['worker'],
|
606
668
|
})
|
607
669
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Instance
|
@@ -717,16 +779,31 @@ def open_ports(
|
|
717
779
|
|
718
780
|
existing_ports: Set[int] = set()
|
719
781
|
for existing_rule in sg.ip_permissions:
|
720
|
-
# Skip any non-tcp rules.
|
721
|
-
if existing_rule['IpProtocol']
|
782
|
+
# Skip any non-tcp rules or if all traffic (-1) is specified.
|
783
|
+
if existing_rule['IpProtocol'] not in ['tcp', '-1']:
|
722
784
|
continue
|
723
785
|
# Skip any rules that don't have a FromPort or ToPort.
|
724
|
-
if 'FromPort'
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
786
|
+
if 'FromPort' in existing_rule and 'ToPort' in existing_rule:
|
787
|
+
existing_ports.update(
|
788
|
+
range(existing_rule['FromPort'], existing_rule['ToPort'] + 1))
|
789
|
+
elif existing_rule['IpProtocol'] == '-1':
|
790
|
+
# For AWS, IpProtocol = -1 means all traffic
|
791
|
+
for group_pairs in existing_rule['UserIdGroupPairs']:
|
792
|
+
if group_pairs['GroupId'] != sg.id:
|
793
|
+
# We skip the port opening when the rule allows access from
|
794
|
+
# other security groups, as that is likely added by a user
|
795
|
+
# manually and satisfy their requirement.
|
796
|
+
# The security group created by SkyPilot allows all traffic
|
797
|
+
# from the same security group, which should not be skipped.
|
798
|
+
existing_ports.add(-1)
|
799
|
+
break
|
800
|
+
break
|
801
|
+
|
802
|
+
ports_to_open = []
|
803
|
+
# Do not need to open any ports when all traffic is already allowed.
|
804
|
+
if -1 not in existing_ports:
|
805
|
+
ports_to_open = resources_utils.port_set_to_ranges(
|
806
|
+
resources_utils.port_ranges_to_set(ports) - existing_ports)
|
730
807
|
|
731
808
|
ip_permissions = []
|
732
809
|
for port in ports_to_open:
|
@@ -799,7 +876,7 @@ def wait_instances(region: str, cluster_name_on_cloud: str,
|
|
799
876
|
|
800
877
|
filters = [
|
801
878
|
{
|
802
|
-
'Name': f'tag:{TAG_RAY_CLUSTER_NAME}',
|
879
|
+
'Name': f'tag:{constants.TAG_RAY_CLUSTER_NAME}',
|
803
880
|
'Values': [cluster_name_on_cloud],
|
804
881
|
},
|
805
882
|
]
|
@@ -850,7 +927,7 @@ def get_cluster_info(
|
|
850
927
|
'Values': ['running'],
|
851
928
|
},
|
852
929
|
{
|
853
|
-
'Name': f'tag:{TAG_RAY_CLUSTER_NAME}',
|
930
|
+
'Name': f'tag:{constants.TAG_RAY_CLUSTER_NAME}',
|
854
931
|
'Values': [cluster_name_on_cloud],
|
855
932
|
},
|
856
933
|
]
|
sky/provision/azure/__init__.py
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
"""Azure provisioner for SkyPilot."""
|
2
2
|
|
3
|
+
from sky.provision.azure.config import bootstrap_instances
|
3
4
|
from sky.provision.azure.instance import cleanup_ports
|
5
|
+
from sky.provision.azure.instance import get_cluster_info
|
4
6
|
from sky.provision.azure.instance import open_ports
|
7
|
+
from sky.provision.azure.instance import query_instances
|
8
|
+
from sky.provision.azure.instance import run_instances
|
9
|
+
from sky.provision.azure.instance import stop_instances
|
10
|
+
from sky.provision.azure.instance import terminate_instances
|
11
|
+
from sky.provision.azure.instance import wait_instances
|
@@ -5,7 +5,7 @@
|
|
5
5
|
"clusterId": {
|
6
6
|
"type": "string",
|
7
7
|
"metadata": {
|
8
|
-
"description": "Unique string appended to resource names to isolate resources from different
|
8
|
+
"description": "Unique string appended to resource names to isolate resources from different SkyPilot clusters."
|
9
9
|
}
|
10
10
|
},
|
11
11
|
"subnet": {
|
@@ -13,17 +13,29 @@
|
|
13
13
|
"metadata": {
|
14
14
|
"description": "Subnet parameters."
|
15
15
|
}
|
16
|
+
},
|
17
|
+
"location": {
|
18
|
+
"type": "string",
|
19
|
+
"metadata": {
|
20
|
+
"description": "Location of where the resources are allocated."
|
21
|
+
}
|
22
|
+
},
|
23
|
+
"nsgName": {
|
24
|
+
"type": "string",
|
25
|
+
"metadata": {
|
26
|
+
"description": "Name of the Network Security Group associated with the SkyPilot cluster."
|
27
|
+
}
|
16
28
|
}
|
17
29
|
},
|
18
30
|
"variables": {
|
19
31
|
"contributor": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'b24988ac-6180-42a0-ab88-20f7382dd24c')]",
|
20
|
-
"location": "[
|
21
|
-
"msiName": "[concat('
|
22
|
-
"roleAssignmentName": "[concat('
|
23
|
-
"nsgName": "[
|
32
|
+
"location": "[parameters('location')]",
|
33
|
+
"msiName": "[concat('sky-', parameters('clusterId'), '-msi')]",
|
34
|
+
"roleAssignmentName": "[concat('sky-', parameters('clusterId'), '-ra')]",
|
35
|
+
"nsgName": "[parameters('nsgName')]",
|
24
36
|
"nsg": "[resourceId('Microsoft.Network/networkSecurityGroups', variables('nsgName'))]",
|
25
|
-
"vnetName": "[concat('
|
26
|
-
"subnetName": "[concat('
|
37
|
+
"vnetName": "[concat('sky-', parameters('clusterId'), '-vnet')]",
|
38
|
+
"subnetName": "[concat('sky-', parameters('clusterId'), '-subnet')]"
|
27
39
|
},
|
28
40
|
"resources": [
|
29
41
|
{
|