skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,220 @@
1
+ """Azure configuration bootstrapping.
2
+
3
+ Creates the resource group and deploys the configuration template to Azure for
4
+ a cluster to be launched.
5
+ """
6
+ import hashlib
7
+ import json
8
+ from pathlib import Path
9
+ import random
10
+ import time
11
+ from typing import Any, Callable, Tuple
12
+
13
+ from sky import exceptions
14
+ from sky import sky_logging
15
+ from sky.adaptors import azure
16
+ from sky.provision import common
17
+ from sky.provision import constants
18
+ from sky.utils import common_utils
19
+
20
+ logger = sky_logging.init_logger(__name__)
21
+
22
+ UNIQUE_ID_LEN = 4
23
+ _RESOURCE_GROUP_WAIT_FOR_DELETION_TIMEOUT = 480 # 8 minutes
24
+ _CLUSTER_ID = '{cluster_name_on_cloud}-{unique_id}'
25
+
26
+
27
+ def get_azure_sdk_function(client: Any, function_name: str) -> Callable:
28
+ """Retrieve a callable function from Azure SDK client object.
29
+
30
+ Newer versions of the various client SDKs renamed function names to
31
+ have a begin_ prefix. This function supports both the old and new
32
+ versions of the SDK by first trying the old name and falling back to
33
+ the prefixed new name.
34
+ """
35
+ func = getattr(client, function_name,
36
+ getattr(client, f'begin_{function_name}', None))
37
+ if func is None:
38
+ raise AttributeError(
39
+ f'{client.__name__!r} object has no {function_name} or '
40
+ f'begin_{function_name} attribute')
41
+ return func
42
+
43
+
44
+ def get_cluster_id_and_nsg_name(resource_group: str,
45
+ cluster_name_on_cloud: str) -> Tuple[str, str]:
46
+ hasher = hashlib.md5(resource_group.encode('utf-8'))
47
+ unique_id = hasher.hexdigest()[:UNIQUE_ID_LEN]
48
+ # We use the cluster name + resource group hash as the
49
+ # unique ID for the cluster, as we need to make sure that
50
+ # the deployments have unique names during failover.
51
+ cluster_id = _CLUSTER_ID.format(cluster_name_on_cloud=cluster_name_on_cloud,
52
+ unique_id=unique_id)
53
+ nsg_name = f'sky-{cluster_id}-nsg'
54
+ return cluster_id, nsg_name
55
+
56
+
57
+ @common.log_function_start_end
58
+ def bootstrap_instances(
59
+ region: str, cluster_name_on_cloud: str,
60
+ config: common.ProvisionConfig) -> common.ProvisionConfig:
61
+ """See sky/provision/__init__.py"""
62
+ # TODO: use new azure sdk instead of ARM deployment.
63
+ del region # unused
64
+ provider_config = config.provider_config
65
+ subscription_id = provider_config.get('subscription_id')
66
+ if subscription_id is None:
67
+ subscription_id = azure.get_subscription_id()
68
+ # Increase the timeout to fix the Azure get-access-token (used by ray azure
69
+ # node_provider) timeout issue.
70
+ # Tracked in https://github.com/Azure/azure-cli/issues/20404#issuecomment-1249575110 # pylint: disable=line-too-long
71
+ resource_client = azure.get_client('resource', subscription_id)
72
+ provider_config['subscription_id'] = subscription_id
73
+ logger.info(f'Using subscription id: {subscription_id}')
74
+
75
+ assert (
76
+ 'resource_group'
77
+ in provider_config), 'Provider config must include resource_group field'
78
+ resource_group = provider_config['resource_group']
79
+
80
+ assert ('location'
81
+ in provider_config), 'Provider config must include location field'
82
+ params = {'location': provider_config['location']}
83
+
84
+ assert ('use_external_resource_group'
85
+ in provider_config), ('Provider config must include '
86
+ 'use_external_resource_group field')
87
+ use_external_resource_group = provider_config['use_external_resource_group']
88
+
89
+ if 'tags' in provider_config:
90
+ params['tags'] = provider_config['tags']
91
+
92
+ # When resource group is user specified, it already exists in certain
93
+ # region.
94
+ if not use_external_resource_group:
95
+ logger.info(f'Creating/Updating resource group: {resource_group}')
96
+ rg_create_or_update = get_azure_sdk_function(
97
+ client=resource_client.resource_groups,
98
+ function_name='create_or_update')
99
+ rg_creation_start = time.time()
100
+ retry = 0
101
+ while (time.time() - rg_creation_start <
102
+ _RESOURCE_GROUP_WAIT_FOR_DELETION_TIMEOUT):
103
+ try:
104
+ rg_create_or_update(resource_group_name=resource_group,
105
+ parameters=params)
106
+ break
107
+ except azure.exceptions().ResourceExistsError as e:
108
+ if 'ResourceGroupBeingDeleted' in str(e):
109
+ if retry % 5 == 0:
110
+ logger.info(
111
+ f'Azure resource group {resource_group} of a '
112
+ 'recent terminated cluster '
113
+ f'{cluster_name_on_cloud} is being deleted. It can'
114
+ ' only be provisioned after it is fully deleted. '
115
+ 'Waiting...')
116
+ time.sleep(1)
117
+ retry += 1
118
+ continue
119
+ raise
120
+ except azure.exceptions().ClientAuthenticationError as e:
121
+ message = (
122
+ 'Failed to authenticate with Azure. Please check your '
123
+ 'Azure credentials. Error: '
124
+ f'{common_utils.format_exception(e)}').replace('\n', ' ')
125
+ logger.error(message)
126
+ raise exceptions.NoClusterLaunchedError(message) from e
127
+ else:
128
+ message = (
129
+ f'Timed out waiting for resource group {resource_group} to be '
130
+ 'deleted.')
131
+ logger.error(message)
132
+ raise TimeoutError(message)
133
+
134
+ # load the template file
135
+ current_path = Path(__file__).parent
136
+ template_path = current_path.joinpath('azure-config-template.json')
137
+ with open(template_path, 'r', encoding='utf-8') as template_fp:
138
+ template = json.load(template_fp)
139
+
140
+ logger.info(f'Using cluster name: {cluster_name_on_cloud}')
141
+
142
+ cluster_id, nsg_name = get_cluster_id_and_nsg_name(
143
+ resource_group=provider_config['resource_group'],
144
+ cluster_name_on_cloud=cluster_name_on_cloud)
145
+ subnet_mask = provider_config.get('subnet_mask')
146
+ if subnet_mask is None:
147
+ # choose a random subnet, skipping most common value of 0
148
+ random.seed(cluster_id)
149
+ subnet_mask = f'10.{random.randint(1, 254)}.0.0/16'
150
+ logger.info(f'Using subnet mask: {subnet_mask}')
151
+
152
+ parameters = {
153
+ 'properties': {
154
+ 'mode': azure.deployment_mode().incremental,
155
+ 'template': template,
156
+ 'parameters': {
157
+ 'subnet': {
158
+ 'value': subnet_mask
159
+ },
160
+ 'clusterId': {
161
+ 'value': cluster_id
162
+ },
163
+ 'nsgName': {
164
+ 'value': nsg_name
165
+ },
166
+ 'location': {
167
+ 'value': params['location']
168
+ }
169
+ },
170
+ }
171
+ }
172
+
173
+ # Skip creating or updating the deployment if the deployment already exists
174
+ # and the cluster name is the same.
175
+ get_deployment = get_azure_sdk_function(client=resource_client.deployments,
176
+ function_name='get')
177
+ deployment_exists = False
178
+ if use_external_resource_group:
179
+ deployment_name = (
180
+ constants.EXTERNAL_RG_BOOTSTRAP_DEPLOYMENT_NAME.format(
181
+ cluster_name_on_cloud=cluster_name_on_cloud))
182
+ deployment_list = [deployment_name]
183
+ else:
184
+ deployment_name = constants.DEPLOYMENT_NAME
185
+ deployment_list = [
186
+ constants.DEPLOYMENT_NAME, constants.LEGACY_DEPLOYMENT_NAME
187
+ ]
188
+
189
+ for deploy_name in deployment_list:
190
+ try:
191
+ deployment = get_deployment(resource_group_name=resource_group,
192
+ deployment_name=deploy_name)
193
+ logger.info(f'Deployment {deploy_name!r} already exists. '
194
+ 'Skipping deployment creation.')
195
+
196
+ outputs = deployment.properties.outputs
197
+ if outputs is not None:
198
+ deployment_exists = True
199
+ break
200
+ except azure.exceptions().ResourceNotFoundError:
201
+ deployment_exists = False
202
+
203
+ if not deployment_exists:
204
+ logger.info(f'Creating/Updating deployment: {deployment_name}')
205
+ create_or_update = get_azure_sdk_function(
206
+ client=resource_client.deployments,
207
+ function_name='create_or_update')
208
+ # TODO (skypilot): this takes a long time (> 40 seconds) to run.
209
+ outputs = create_or_update(
210
+ resource_group_name=resource_group,
211
+ deployment_name=deployment_name,
212
+ parameters=parameters,
213
+ ).result().properties.outputs
214
+
215
+ # append output resource ids to be used with vm creation
216
+ provider_config['msi'] = outputs['msi']['value']
217
+ provider_config['nsg'] = outputs['nsg']['value']
218
+ provider_config['subnet'] = outputs['subnet']['value']
219
+
220
+ return config