skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -65,7 +65,7 @@ class Metadata:
65
65
  if value is None:
66
66
  if instance_id in metadata:
67
67
  metadata.pop(instance_id) # del entry
68
- if len(metadata) == 0:
68
+ if not metadata:
69
69
  if os.path.exists(self.path):
70
70
  os.remove(self.path)
71
71
  return
@@ -84,7 +84,7 @@ class Metadata:
84
84
  for instance_id in list(metadata.keys()):
85
85
  if instance_id not in instance_ids:
86
86
  del metadata[instance_id]
87
- if len(metadata) == 0:
87
+ if not metadata:
88
88
  os.remove(self.path)
89
89
  return
90
90
  with open(self.path, 'w', encoding='utf-8') as f:
@@ -223,6 +223,7 @@ class SCPClient:
223
223
  def create_security_group(self, zone_id, vpc, sg_name):
224
224
  url = f'{API_ENDPOINT}/security-group/v3/security-groups'
225
225
  request_body = {
226
+ 'loggable': False,
226
227
  'securityGroupName': sg_name,
227
228
  'serviceZoneId': zone_id,
228
229
  'vpcId': vpc,
@@ -409,7 +410,7 @@ class SCPClient:
409
410
  parameter.append('vpcId=' + vpc_id)
410
411
  if sg_name is not None:
411
412
  parameter.append('securityGroupName=' + sg_name)
412
- if len(parameter) > 0:
413
+ if parameter:
413
414
  url = url + '?' + '&'.join(parameter)
414
415
  return self._get(url)
415
416
 
sky/clouds/vast.py ADDED
@@ -0,0 +1,280 @@
1
+ """ Vast Cloud. """
2
+
3
+ import typing
4
+ from typing import Dict, Iterator, List, Optional, Tuple, Union
5
+
6
+ from sky import clouds
7
+ from sky.clouds import service_catalog
8
+ from sky.utils import registry
9
+ from sky.utils import resources_utils
10
+
11
+ if typing.TYPE_CHECKING:
12
+ from sky import resources as resources_lib
13
+
14
+
15
+ @registry.CLOUD_REGISTRY.register
16
+ class Vast(clouds.Cloud):
17
+ """ Vast GPU Cloud
18
+
19
+ _REPR | The string representation for the Vast GPU cloud object.
20
+ """
21
+ _REPR = 'Vast'
22
+ _CLOUD_UNSUPPORTED_FEATURES = {
23
+ clouds.CloudImplementationFeatures.MULTI_NODE:
24
+ ('Multi-node not supported yet, as the interconnection among nodes '
25
+ 'are non-trivial on Vast.'),
26
+ clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
27
+ ('Customizing disk tier is not supported yet on Vast.'),
28
+ clouds.CloudImplementationFeatures.OPEN_PORTS:
29
+ ('Opening ports is currently not supported on Vast.'),
30
+ clouds.CloudImplementationFeatures.STORAGE_MOUNTING:
31
+ ('Mounting object stores is not supported on Vast.'),
32
+ }
33
+ #
34
+ # Vast doesn't have a max cluster name limit. This number
35
+ # is reasonably large and exists to play nicely with the
36
+ # other providers
37
+ #
38
+ _MAX_CLUSTER_NAME_LEN_LIMIT = 120
39
+ _regions: List[clouds.Region] = []
40
+
41
+ PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
42
+ STATUS_VERSION = clouds.StatusVersion.SKYPILOT
43
+
44
+ @classmethod
45
+ def _unsupported_features_for_resources(
46
+ cls, resources: 'resources_lib.Resources'
47
+ ) -> Dict[clouds.CloudImplementationFeatures, str]:
48
+ """The features not supported based on the resources provided.
49
+
50
+ This method is used by check_features_are_supported() to check if the
51
+ cloud implementation supports all the requested features.
52
+
53
+ Returns:
54
+ A dict of {feature: reason} for the features not supported by the
55
+ cloud implementation.
56
+ """
57
+ del resources # unused
58
+ return cls._CLOUD_UNSUPPORTED_FEATURES
59
+
60
+ @classmethod
61
+ def _max_cluster_name_length(cls) -> Optional[int]:
62
+ return cls._MAX_CLUSTER_NAME_LEN_LIMIT
63
+
64
+ @classmethod
65
+ def regions_with_offering(cls, instance_type: str,
66
+ accelerators: Optional[Dict[str, int]],
67
+ use_spot: bool, region: Optional[str],
68
+ zone: Optional[str]) -> List[clouds.Region]:
69
+ assert zone is None, 'Vast does not support zones.'
70
+ del accelerators, zone # unused
71
+ regions = service_catalog.get_region_zones_for_instance_type(
72
+ instance_type, use_spot, 'vast')
73
+
74
+ if region is not None:
75
+ regions = [r for r in regions if r.name == region]
76
+ return regions
77
+
78
+ @classmethod
79
+ def get_vcpus_mem_from_instance_type(
80
+ cls,
81
+ instance_type: str,
82
+ ) -> Tuple[Optional[float], Optional[float]]:
83
+ return service_catalog.get_vcpus_mem_from_instance_type(instance_type,
84
+ clouds='vast')
85
+
86
+ @classmethod
87
+ def zones_provision_loop(
88
+ cls,
89
+ *,
90
+ region: str,
91
+ num_nodes: int,
92
+ instance_type: str,
93
+ accelerators: Optional[Dict[str, int]] = None,
94
+ use_spot: bool = False,
95
+ ) -> Iterator[None]:
96
+ del num_nodes # unused
97
+ regions = cls.regions_with_offering(instance_type,
98
+ accelerators,
99
+ use_spot,
100
+ region=region,
101
+ zone=None)
102
+ for r in regions:
103
+ assert r.zones is None, r
104
+ yield r.zones
105
+
106
+ def instance_type_to_hourly_cost(self,
107
+ instance_type: str,
108
+ use_spot: bool,
109
+ region: Optional[str] = None,
110
+ zone: Optional[str] = None) -> float:
111
+ return service_catalog.get_hourly_cost(instance_type,
112
+ use_spot=use_spot,
113
+ region=region,
114
+ zone=zone,
115
+ clouds='vast')
116
+
117
+ def accelerators_to_hourly_cost(self,
118
+ accelerators: Dict[str, int],
119
+ use_spot: bool,
120
+ region: Optional[str] = None,
121
+ zone: Optional[str] = None) -> float:
122
+ """Returns the hourly cost of the accelerators, in dollars/hour."""
123
+ del accelerators, use_spot, region, zone # unused
124
+ return 0.0 # Vast includes accelerators in the hourly cost.
125
+
126
+ def get_egress_cost(self, num_gigabytes: float) -> float:
127
+ return 0.0
128
+
129
+ @classmethod
130
+ def get_default_instance_type(
131
+ cls,
132
+ cpus: Optional[str] = None,
133
+ memory: Optional[str] = None,
134
+ disk_tier: Optional[resources_utils.DiskTier] = None
135
+ ) -> Optional[str]:
136
+ """Returns the default instance type for Vast."""
137
+ return service_catalog.get_default_instance_type(cpus=cpus,
138
+ memory=memory,
139
+ disk_tier=disk_tier,
140
+ clouds='vast')
141
+
142
+ @classmethod
143
+ def get_accelerators_from_instance_type(
144
+ cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
145
+ return service_catalog.get_accelerators_from_instance_type(
146
+ instance_type, clouds='vast')
147
+
148
+ @classmethod
149
+ def get_zone_shell_cmd(cls) -> Optional[str]:
150
+ return None
151
+
152
+ def make_deploy_resources_variables(
153
+ self,
154
+ resources: 'resources_lib.Resources',
155
+ cluster_name: resources_utils.ClusterName,
156
+ region: 'clouds.Region',
157
+ zones: Optional[List['clouds.Zone']],
158
+ num_nodes: int,
159
+ dryrun: bool = False) -> Dict[str, Optional[str]]:
160
+ del zones, dryrun, cluster_name, num_nodes # unused
161
+
162
+ r = resources
163
+ acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
164
+ custom_resources = resources_utils.make_ray_custom_resources_str(
165
+ acc_dict)
166
+
167
+ if r.image_id is None:
168
+ image_id = 'vastai/base:0.0.2'
169
+ elif r.extract_docker_image() is not None:
170
+ image_id = r.extract_docker_image()
171
+ else:
172
+ image_id = r.image_id[r.region]
173
+
174
+ return {
175
+ 'instance_type': resources.instance_type,
176
+ 'custom_resources': custom_resources,
177
+ 'region': region.name,
178
+ 'image_id': image_id,
179
+ }
180
+
181
+ def _get_feasible_launchable_resources(
182
+ self, resources: 'resources_lib.Resources'
183
+ ) -> 'resources_utils.FeasibleResources':
184
+ """Returns a list of feasible resources for the given resources."""
185
+ if resources.instance_type is not None:
186
+ assert resources.is_launchable(), resources
187
+ resources = resources.copy(accelerators=None)
188
+ return resources_utils.FeasibleResources([resources], [], None)
189
+
190
+ def _make(instance_list):
191
+ resource_list = []
192
+ for instance_type in instance_list:
193
+ r = resources.copy(
194
+ cloud=Vast(),
195
+ instance_type=instance_type,
196
+ accelerators=None,
197
+ cpus=None,
198
+ )
199
+ resource_list.append(r)
200
+ return resource_list
201
+
202
+ # Currently, handle a filter on accelerators only.
203
+ accelerators = resources.accelerators
204
+ if accelerators is None:
205
+ # Return a default instance type
206
+ default_instance_type = Vast.get_default_instance_type(
207
+ cpus=resources.cpus,
208
+ memory=resources.memory,
209
+ disk_tier=resources.disk_tier)
210
+ if default_instance_type is None:
211
+ # TODO: Add hints to all return values in this method to help
212
+ # users understand why the resources are not launchable.
213
+ return resources_utils.FeasibleResources([], [], None)
214
+ else:
215
+ return resources_utils.FeasibleResources(
216
+ _make([default_instance_type]), [], None)
217
+
218
+ assert len(accelerators) == 1, resources
219
+ acc, acc_count = list(accelerators.items())[0]
220
+ (instance_list, fuzzy_candidate_list
221
+ ) = service_catalog.get_instance_type_for_accelerator(
222
+ acc,
223
+ acc_count,
224
+ use_spot=resources.use_spot,
225
+ cpus=resources.cpus,
226
+ region=resources.region,
227
+ zone=resources.zone,
228
+ memory=resources.memory,
229
+ clouds='vast')
230
+ if instance_list is None:
231
+ return resources_utils.FeasibleResources([], fuzzy_candidate_list,
232
+ None)
233
+ return resources_utils.FeasibleResources(_make(instance_list),
234
+ fuzzy_candidate_list, None)
235
+
236
+ @classmethod
237
+ def check_credentials(cls) -> Tuple[bool, Optional[str]]:
238
+ """ Verify that the user has valid credentials for Vast. """
239
+ try:
240
+ import vastai_sdk as _vast # pylint: disable=import-outside-toplevel
241
+ vast = _vast.VastAI()
242
+
243
+ # We only support file pased credential passing
244
+ if vast.creds_source != 'FILE':
245
+ return False, (
246
+ 'error \n' # First line is indented by 4 spaces
247
+ ' Credentials can be set up by running: \n'
248
+ ' $ pip install vastai\n'
249
+ ' $ echo [key] > ~/.vast_api_key\n'
250
+ ' For more information, see https://skypilot.readthedocs.io/en/latest/getting-started/installation.html#vast' # pylint: disable=line-too-long
251
+ )
252
+
253
+ return True, None
254
+
255
+ except ImportError:
256
+ return False, ('Failed to import vast. '
257
+ 'To install, run: pip install skypilot[vast]')
258
+
259
+ def get_credential_file_mounts(self) -> Dict[str, str]:
260
+ return {
261
+ '~/.config/vastai/vast_api_key': '~/.config/vastai/vast_api_key'
262
+ }
263
+
264
+ @classmethod
265
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
266
+ # NOTE: used for very advanced SkyPilot functionality
267
+ # Can implement later if desired
268
+ return None
269
+
270
+ def instance_type_exists(self, instance_type: str) -> bool:
271
+ return service_catalog.instance_type_exists(instance_type, 'vast')
272
+
273
+ def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
274
+ return service_catalog.validate_region_zone(region, zone, clouds='vast')
275
+
276
+ @classmethod
277
+ def get_image_size(cls, image_id: str, region: Optional[str]) -> float:
278
+ # TODO: use 0.0 for now to allow all images. We should change this to
279
+ # return the docker image size.
280
+ return 0.0
sky/clouds/vsphere.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """Vsphere cloud implementation."""
2
- import json
3
2
  import subprocess
4
3
  import typing
5
- from typing import Dict, Iterator, List, Optional, Tuple
4
+ from typing import Dict, Iterator, List, Optional, Tuple, Union
6
5
 
7
6
  import requests
8
7
 
@@ -12,6 +11,7 @@ from sky.provision.vsphere import vsphere_utils
12
11
  from sky.provision.vsphere.vsphere_utils import get_vsphere_credentials
13
12
  from sky.provision.vsphere.vsphere_utils import initialize_vsphere_data
14
13
  from sky.utils import common_utils
14
+ from sky.utils import registry
15
15
  from sky.utils import resources_utils
16
16
 
17
17
  if typing.TYPE_CHECKING:
@@ -25,7 +25,7 @@ _CREDENTIAL_FILES = [
25
25
  ]
26
26
 
27
27
 
28
- @clouds.CLOUD_REGISTRY.register
28
+ @registry.CLOUD_REGISTRY.register
29
29
  class Vsphere(clouds.Cloud):
30
30
  """Vsphere cloud"""
31
31
 
@@ -152,7 +152,7 @@ class Vsphere(clouds.Cloud):
152
152
  def get_accelerators_from_instance_type(
153
153
  cls,
154
154
  instance_type: str,
155
- ) -> Optional[Dict[str, int]]:
155
+ ) -> Optional[Dict[str, Union[int, float]]]:
156
156
  return service_catalog.get_accelerators_from_instance_type(
157
157
  instance_type, clouds=_CLOUD_VSPHERE)
158
158
 
@@ -171,21 +171,20 @@ class Vsphere(clouds.Cloud):
171
171
  def make_deploy_resources_variables(
172
172
  self,
173
173
  resources: 'resources_lib.Resources',
174
- cluster_name_on_cloud: str,
174
+ cluster_name: resources_utils.ClusterName,
175
175
  region: 'clouds.Region',
176
176
  zones: Optional[List['clouds.Zone']],
177
+ num_nodes: int,
177
178
  dryrun: bool = False,
178
179
  ) -> Dict[str, Optional[str]]:
179
180
  # TODO get image id here.
180
- del cluster_name_on_cloud, dryrun # unused
181
+ del cluster_name, dryrun # unused
181
182
  assert zones is not None, (region, zones)
182
183
  zone_names = [zone.name for zone in zones]
183
184
  r = resources
184
185
  acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
185
- if acc_dict is not None:
186
- custom_resources = json.dumps(acc_dict, separators=(',', ':'))
187
- else:
188
- custom_resources = None
186
+ custom_resources = resources_utils.make_ray_custom_resources_str(
187
+ acc_dict)
189
188
 
190
189
  return {
191
190
  'instance_type': resources.instance_type,
@@ -197,11 +196,13 @@ class Vsphere(clouds.Cloud):
197
196
  def _get_feasible_launchable_resources(
198
197
  self, resources: 'resources_lib.Resources'):
199
198
  if resources.use_spot:
200
- return ([], [])
199
+ # TODO: Add hints to all return values in this method to help
200
+ # users understand why the resources are not launchable.
201
+ return resources_utils.FeasibleResources([], [], None)
201
202
  if resources.instance_type is not None:
202
203
  assert resources.is_launchable(), resources
203
204
  resources = resources.copy(accelerators=None)
204
- return ([resources], [])
205
+ return resources_utils.FeasibleResources([resources], [], None)
205
206
 
206
207
  def _make(instance_list):
207
208
  resource_list = []
@@ -226,9 +227,10 @@ class Vsphere(clouds.Cloud):
226
227
  disk_tier=resources.disk_tier,
227
228
  )
228
229
  if default_instance_type is None:
229
- return ([], [])
230
+ return resources_utils.FeasibleResources([], [], None)
230
231
  else:
231
- return (_make([default_instance_type]), [])
232
+ return resources_utils.FeasibleResources(
233
+ _make([default_instance_type]), [], None)
232
234
 
233
235
  assert len(accelerators) == 1, resources
234
236
  acc, acc_count = list(accelerators.items())[0]
@@ -246,8 +248,10 @@ class Vsphere(clouds.Cloud):
246
248
  clouds=_CLOUD_VSPHERE,
247
249
  )
248
250
  if instance_list is None:
249
- return ([], fuzzy_candidate_list)
250
- return (_make(instance_list), fuzzy_candidate_list)
251
+ return resources_utils.FeasibleResources([], fuzzy_candidate_list,
252
+ None)
253
+ return resources_utils.FeasibleResources(_make(instance_list),
254
+ fuzzy_candidate_list, None)
251
255
 
252
256
  @classmethod
253
257
  def check_credentials(cls) -> Tuple[bool, Optional[str]]:
@@ -263,7 +267,7 @@ class Vsphere(clouds.Cloud):
263
267
  'Run the following commands:'
264
268
  f'\n{cls._INDENT_PREFIX} $ pip install skypilot[vSphere]'
265
269
  f'\n{cls._INDENT_PREFIX}Credentials may also need to be set. '
266
- 'For more details. See https://skypilot.readthedocs.io/en/latest/getting-started/installation.html#vmware-vsphere' # pylint: disable=line-too-long
270
+ 'For more details. See https://docs.skypilot.co/en/latest/getting-started/installation.html#vmware-vsphere' # pylint: disable=line-too-long
267
271
  f'{common_utils.format_exception(e, use_bracket=True)}')
268
272
 
269
273
  required_keys = ['name', 'username', 'password', 'clusters']
@@ -303,7 +307,7 @@ class Vsphere(clouds.Cloud):
303
307
  }
304
308
 
305
309
  @classmethod
306
- def get_current_user_identity(cls) -> Optional[List[str]]:
310
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
307
311
  # NOTE: used for very advanced SkyPilot functionality
308
312
  # Can implement later if desired
309
313
  return None