skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,220 @@
|
|
1
|
+
"""Azure configuration bootstrapping.
|
2
|
+
|
3
|
+
Creates the resource group and deploys the configuration template to Azure for
|
4
|
+
a cluster to be launched.
|
5
|
+
"""
|
6
|
+
import hashlib
|
7
|
+
import json
|
8
|
+
from pathlib import Path
|
9
|
+
import random
|
10
|
+
import time
|
11
|
+
from typing import Any, Callable, Tuple
|
12
|
+
|
13
|
+
from sky import exceptions
|
14
|
+
from sky import sky_logging
|
15
|
+
from sky.adaptors import azure
|
16
|
+
from sky.provision import common
|
17
|
+
from sky.provision import constants
|
18
|
+
from sky.utils import common_utils
|
19
|
+
|
20
|
+
logger = sky_logging.init_logger(__name__)
|
21
|
+
|
22
|
+
UNIQUE_ID_LEN = 4
|
23
|
+
_RESOURCE_GROUP_WAIT_FOR_DELETION_TIMEOUT = 480 # 8 minutes
|
24
|
+
_CLUSTER_ID = '{cluster_name_on_cloud}-{unique_id}'
|
25
|
+
|
26
|
+
|
27
|
+
def get_azure_sdk_function(client: Any, function_name: str) -> Callable:
|
28
|
+
"""Retrieve a callable function from Azure SDK client object.
|
29
|
+
|
30
|
+
Newer versions of the various client SDKs renamed function names to
|
31
|
+
have a begin_ prefix. This function supports both the old and new
|
32
|
+
versions of the SDK by first trying the old name and falling back to
|
33
|
+
the prefixed new name.
|
34
|
+
"""
|
35
|
+
func = getattr(client, function_name,
|
36
|
+
getattr(client, f'begin_{function_name}', None))
|
37
|
+
if func is None:
|
38
|
+
raise AttributeError(
|
39
|
+
f'{client.__name__!r} object has no {function_name} or '
|
40
|
+
f'begin_{function_name} attribute')
|
41
|
+
return func
|
42
|
+
|
43
|
+
|
44
|
+
def get_cluster_id_and_nsg_name(resource_group: str,
|
45
|
+
cluster_name_on_cloud: str) -> Tuple[str, str]:
|
46
|
+
hasher = hashlib.md5(resource_group.encode('utf-8'))
|
47
|
+
unique_id = hasher.hexdigest()[:UNIQUE_ID_LEN]
|
48
|
+
# We use the cluster name + resource group hash as the
|
49
|
+
# unique ID for the cluster, as we need to make sure that
|
50
|
+
# the deployments have unique names during failover.
|
51
|
+
cluster_id = _CLUSTER_ID.format(cluster_name_on_cloud=cluster_name_on_cloud,
|
52
|
+
unique_id=unique_id)
|
53
|
+
nsg_name = f'sky-{cluster_id}-nsg'
|
54
|
+
return cluster_id, nsg_name
|
55
|
+
|
56
|
+
|
57
|
+
@common.log_function_start_end
|
58
|
+
def bootstrap_instances(
|
59
|
+
region: str, cluster_name_on_cloud: str,
|
60
|
+
config: common.ProvisionConfig) -> common.ProvisionConfig:
|
61
|
+
"""See sky/provision/__init__.py"""
|
62
|
+
# TODO: use new azure sdk instead of ARM deployment.
|
63
|
+
del region # unused
|
64
|
+
provider_config = config.provider_config
|
65
|
+
subscription_id = provider_config.get('subscription_id')
|
66
|
+
if subscription_id is None:
|
67
|
+
subscription_id = azure.get_subscription_id()
|
68
|
+
# Increase the timeout to fix the Azure get-access-token (used by ray azure
|
69
|
+
# node_provider) timeout issue.
|
70
|
+
# Tracked in https://github.com/Azure/azure-cli/issues/20404#issuecomment-1249575110 # pylint: disable=line-too-long
|
71
|
+
resource_client = azure.get_client('resource', subscription_id)
|
72
|
+
provider_config['subscription_id'] = subscription_id
|
73
|
+
logger.info(f'Using subscription id: {subscription_id}')
|
74
|
+
|
75
|
+
assert (
|
76
|
+
'resource_group'
|
77
|
+
in provider_config), 'Provider config must include resource_group field'
|
78
|
+
resource_group = provider_config['resource_group']
|
79
|
+
|
80
|
+
assert ('location'
|
81
|
+
in provider_config), 'Provider config must include location field'
|
82
|
+
params = {'location': provider_config['location']}
|
83
|
+
|
84
|
+
assert ('use_external_resource_group'
|
85
|
+
in provider_config), ('Provider config must include '
|
86
|
+
'use_external_resource_group field')
|
87
|
+
use_external_resource_group = provider_config['use_external_resource_group']
|
88
|
+
|
89
|
+
if 'tags' in provider_config:
|
90
|
+
params['tags'] = provider_config['tags']
|
91
|
+
|
92
|
+
# When resource group is user specified, it already exists in certain
|
93
|
+
# region.
|
94
|
+
if not use_external_resource_group:
|
95
|
+
logger.info(f'Creating/Updating resource group: {resource_group}')
|
96
|
+
rg_create_or_update = get_azure_sdk_function(
|
97
|
+
client=resource_client.resource_groups,
|
98
|
+
function_name='create_or_update')
|
99
|
+
rg_creation_start = time.time()
|
100
|
+
retry = 0
|
101
|
+
while (time.time() - rg_creation_start <
|
102
|
+
_RESOURCE_GROUP_WAIT_FOR_DELETION_TIMEOUT):
|
103
|
+
try:
|
104
|
+
rg_create_or_update(resource_group_name=resource_group,
|
105
|
+
parameters=params)
|
106
|
+
break
|
107
|
+
except azure.exceptions().ResourceExistsError as e:
|
108
|
+
if 'ResourceGroupBeingDeleted' in str(e):
|
109
|
+
if retry % 5 == 0:
|
110
|
+
logger.info(
|
111
|
+
f'Azure resource group {resource_group} of a '
|
112
|
+
'recent terminated cluster '
|
113
|
+
f'{cluster_name_on_cloud} is being deleted. It can'
|
114
|
+
' only be provisioned after it is fully deleted. '
|
115
|
+
'Waiting...')
|
116
|
+
time.sleep(1)
|
117
|
+
retry += 1
|
118
|
+
continue
|
119
|
+
raise
|
120
|
+
except azure.exceptions().ClientAuthenticationError as e:
|
121
|
+
message = (
|
122
|
+
'Failed to authenticate with Azure. Please check your '
|
123
|
+
'Azure credentials. Error: '
|
124
|
+
f'{common_utils.format_exception(e)}').replace('\n', ' ')
|
125
|
+
logger.error(message)
|
126
|
+
raise exceptions.NoClusterLaunchedError(message) from e
|
127
|
+
else:
|
128
|
+
message = (
|
129
|
+
f'Timed out waiting for resource group {resource_group} to be '
|
130
|
+
'deleted.')
|
131
|
+
logger.error(message)
|
132
|
+
raise TimeoutError(message)
|
133
|
+
|
134
|
+
# load the template file
|
135
|
+
current_path = Path(__file__).parent
|
136
|
+
template_path = current_path.joinpath('azure-config-template.json')
|
137
|
+
with open(template_path, 'r', encoding='utf-8') as template_fp:
|
138
|
+
template = json.load(template_fp)
|
139
|
+
|
140
|
+
logger.info(f'Using cluster name: {cluster_name_on_cloud}')
|
141
|
+
|
142
|
+
cluster_id, nsg_name = get_cluster_id_and_nsg_name(
|
143
|
+
resource_group=provider_config['resource_group'],
|
144
|
+
cluster_name_on_cloud=cluster_name_on_cloud)
|
145
|
+
subnet_mask = provider_config.get('subnet_mask')
|
146
|
+
if subnet_mask is None:
|
147
|
+
# choose a random subnet, skipping most common value of 0
|
148
|
+
random.seed(cluster_id)
|
149
|
+
subnet_mask = f'10.{random.randint(1, 254)}.0.0/16'
|
150
|
+
logger.info(f'Using subnet mask: {subnet_mask}')
|
151
|
+
|
152
|
+
parameters = {
|
153
|
+
'properties': {
|
154
|
+
'mode': azure.deployment_mode().incremental,
|
155
|
+
'template': template,
|
156
|
+
'parameters': {
|
157
|
+
'subnet': {
|
158
|
+
'value': subnet_mask
|
159
|
+
},
|
160
|
+
'clusterId': {
|
161
|
+
'value': cluster_id
|
162
|
+
},
|
163
|
+
'nsgName': {
|
164
|
+
'value': nsg_name
|
165
|
+
},
|
166
|
+
'location': {
|
167
|
+
'value': params['location']
|
168
|
+
}
|
169
|
+
},
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
# Skip creating or updating the deployment if the deployment already exists
|
174
|
+
# and the cluster name is the same.
|
175
|
+
get_deployment = get_azure_sdk_function(client=resource_client.deployments,
|
176
|
+
function_name='get')
|
177
|
+
deployment_exists = False
|
178
|
+
if use_external_resource_group:
|
179
|
+
deployment_name = (
|
180
|
+
constants.EXTERNAL_RG_BOOTSTRAP_DEPLOYMENT_NAME.format(
|
181
|
+
cluster_name_on_cloud=cluster_name_on_cloud))
|
182
|
+
deployment_list = [deployment_name]
|
183
|
+
else:
|
184
|
+
deployment_name = constants.DEPLOYMENT_NAME
|
185
|
+
deployment_list = [
|
186
|
+
constants.DEPLOYMENT_NAME, constants.LEGACY_DEPLOYMENT_NAME
|
187
|
+
]
|
188
|
+
|
189
|
+
for deploy_name in deployment_list:
|
190
|
+
try:
|
191
|
+
deployment = get_deployment(resource_group_name=resource_group,
|
192
|
+
deployment_name=deploy_name)
|
193
|
+
logger.info(f'Deployment {deploy_name!r} already exists. '
|
194
|
+
'Skipping deployment creation.')
|
195
|
+
|
196
|
+
outputs = deployment.properties.outputs
|
197
|
+
if outputs is not None:
|
198
|
+
deployment_exists = True
|
199
|
+
break
|
200
|
+
except azure.exceptions().ResourceNotFoundError:
|
201
|
+
deployment_exists = False
|
202
|
+
|
203
|
+
if not deployment_exists:
|
204
|
+
logger.info(f'Creating/Updating deployment: {deployment_name}')
|
205
|
+
create_or_update = get_azure_sdk_function(
|
206
|
+
client=resource_client.deployments,
|
207
|
+
function_name='create_or_update')
|
208
|
+
# TODO (skypilot): this takes a long time (> 40 seconds) to run.
|
209
|
+
outputs = create_or_update(
|
210
|
+
resource_group_name=resource_group,
|
211
|
+
deployment_name=deployment_name,
|
212
|
+
parameters=parameters,
|
213
|
+
).result().properties.outputs
|
214
|
+
|
215
|
+
# append output resource ids to be used with vm creation
|
216
|
+
provider_config['msi'] = outputs['msi']['value']
|
217
|
+
provider_config['nsg'] = outputs['nsg']['value']
|
218
|
+
provider_config['subnet'] = outputs['subnet']['value']
|
219
|
+
|
220
|
+
return config
|