skypilot-nightly 1.0.0.dev20250603__py3-none-any.whl → 1.0.0.dev20250605__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +3 -3
- sky/adaptors/kubernetes.py +8 -0
- sky/admin_policy.py +5 -0
- sky/backends/backend_utils.py +1 -0
- sky/backends/cloud_vm_ray_backend.py +8 -4
- sky/{clouds/service_catalog → catalog}/__init__.py +6 -17
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +3 -3
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +2 -2
- sky/{clouds/service_catalog → catalog}/common.py +2 -2
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +2 -2
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +2 -2
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/ssh_catalog.py +3 -3
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +1 -1
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +1 -1
- sky/cli.py +16 -13
- sky/client/cli.py +16 -13
- sky/client/sdk.py +30 -12
- sky/clouds/aws.py +41 -40
- sky/clouds/azure.py +31 -34
- sky/clouds/cloud.py +8 -8
- sky/clouds/cudo.py +26 -26
- sky/clouds/do.py +24 -24
- sky/clouds/fluidstack.py +27 -29
- sky/clouds/gcp.py +42 -42
- sky/clouds/ibm.py +26 -26
- sky/clouds/kubernetes.py +24 -12
- sky/clouds/lambda_cloud.py +28 -30
- sky/clouds/nebius.py +26 -28
- sky/clouds/oci.py +32 -32
- sky/clouds/paperspace.py +24 -26
- sky/clouds/runpod.py +26 -28
- sky/clouds/scp.py +37 -36
- sky/clouds/utils/gcp_utils.py +3 -2
- sky/clouds/vast.py +27 -27
- sky/clouds/vsphere.py +12 -15
- sky/core.py +2 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/614-635a84e87800f99e.js +66 -0
- sky/dashboard/out/_next/static/chunks/{856-f1b1f7f47edde2e8.js → 856-3a32da4b84176f6d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-1a1eeb949dab8897.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/users-262aab38b9baaf3a.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-384ea5fa0cea8f28.js +1 -0
- sky/dashboard/out/_next/static/chunks/{webpack-f27c9a32aa3d9c6d.js → webpack-65d465f948974c0d.js} +1 -1
- sky/dashboard/out/_next/static/css/667d941a2888ce6e.css +3 -0
- sky/dashboard/out/_next/static/qjhIe-yC6nHcLKBqpzO1M/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage_utils.py +5 -2
- sky/execution.py +44 -46
- sky/global_user_state.py +119 -86
- sky/jobs/client/sdk.py +4 -1
- sky/jobs/server/core.py +6 -2
- sky/models.py +1 -0
- sky/optimizer.py +1 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/kubernetes/utils.py +35 -22
- sky/provision/vast/utils.py +1 -1
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +1 -1
- sky/provision/vsphere/vsphere_utils.py +7 -11
- sky/resources.py +24 -3
- sky/serve/server/core.py +1 -1
- sky/server/constants.py +3 -1
- sky/server/requests/executor.py +4 -1
- sky/server/requests/payloads.py +25 -0
- sky/server/requests/serializers/decoders.py +1 -1
- sky/server/server.py +33 -12
- sky/server/stream_utils.py +2 -38
- sky/setup_files/MANIFEST.in +1 -0
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +10 -4
- sky/skypilot_config.py +92 -39
- sky/templates/websocket_proxy.py +11 -1
- sky/usage/usage_lib.py +4 -3
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +178 -0
- sky/users/rbac.py +86 -0
- sky/users/server.py +66 -0
- sky/utils/accelerator_registry.py +3 -3
- sky/utils/kubernetes/deploy_remote_cluster.py +2 -1
- sky/utils/schemas.py +20 -10
- sky/workspaces/core.py +2 -2
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/METADATA +3 -1
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/RECORD +134 -130
- sky/clouds/service_catalog/constants.py +0 -8
- sky/dashboard/out/_next/static/chunks/614-3d29f98e0634b179.js +0 -66
- sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/config-35383adcb0edb5e2.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/users-07b523ccb19317ad.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/workspaces-f54921ec9eb20965.js +0 -1
- sky/dashboard/out/_next/static/css/63d3995d8b528eb1.css +0 -3
- sky/dashboard/out/_next/static/zTAFq_Iv6_yxQj3fXvJWR/_buildManifest.js +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +0 -0
- /sky/dashboard/out/_next/static/chunks/{121-8f55ee3fa6301784.js → 121-865d2bf8a3b84c6a.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{236-fef38aa6e5639300.js → 236-4c0dc6f63ccc6319.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{37-947904ccc5687bac.js → 37-beedd583fea84cc8.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{682-2be9b0f169727f2f.js → 682-6647f0417d5662f0.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{843-a097338acb89b7d7.js → 843-c296541442d4af88.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{969-d7b6fb7f602bfcb3.js → 969-c7abda31c10440ac.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{_app-67925f5e6382e22f.js → _app-cb81dc4d27f4d009.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-158b70da336d8607.js → [job]-65d04d5d77cbb6b6.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-62c9982dc3675725.js → [cluster]-beabbcd7606c1a23.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-a62a3c65dc9bc57c.js → [job]-86c47edc500f15f9.js} +0 -0
- /sky/dashboard/out/_next/static/{zTAFq_Iv6_yxQj3fXvJWR → qjhIe-yC6nHcLKBqpzO1M}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/top_level.txt +0 -0
sky/clouds/fluidstack.py
CHANGED
@@ -3,9 +3,9 @@ import os
|
|
3
3
|
import typing
|
4
4
|
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
5
5
|
|
6
|
+
from sky import catalog
|
6
7
|
from sky import clouds
|
7
8
|
from sky.adaptors import common as adaptors_common
|
8
|
-
from sky.clouds import service_catalog
|
9
9
|
from sky.provision.fluidstack import fluidstack_utils
|
10
10
|
from sky.utils import registry
|
11
11
|
from sky.utils import resources_utils
|
@@ -96,7 +96,7 @@ class Fluidstack(clouds.Cloud):
|
|
96
96
|
del accelerators, zone # unused
|
97
97
|
if use_spot:
|
98
98
|
return []
|
99
|
-
regions =
|
99
|
+
regions = catalog.get_region_zones_for_instance_type(
|
100
100
|
instance_type, use_spot, 'fluidstack')
|
101
101
|
|
102
102
|
if region is not None:
|
@@ -128,11 +128,11 @@ class Fluidstack(clouds.Cloud):
|
|
128
128
|
use_spot: bool,
|
129
129
|
region: Optional[str] = None,
|
130
130
|
zone: Optional[str] = None) -> float:
|
131
|
-
return
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
131
|
+
return catalog.get_hourly_cost(instance_type,
|
132
|
+
use_spot=use_spot,
|
133
|
+
region=region,
|
134
|
+
zone=zone,
|
135
|
+
clouds='fluidstack')
|
136
136
|
|
137
137
|
def accelerators_to_hourly_cost(self,
|
138
138
|
accelerators: Dict[str, int],
|
@@ -155,26 +155,26 @@ class Fluidstack(clouds.Cloud):
|
|
155
155
|
cpus: Optional[str] = None,
|
156
156
|
memory: Optional[str] = None,
|
157
157
|
disk_tier: Optional[DiskTier] = None) -> Optional[str]:
|
158
|
-
return
|
159
|
-
|
160
|
-
|
161
|
-
|
158
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
159
|
+
memory=memory,
|
160
|
+
disk_tier=disk_tier,
|
161
|
+
clouds='fluidstack')
|
162
162
|
|
163
163
|
@classmethod
|
164
164
|
def get_accelerators_from_instance_type(
|
165
165
|
cls,
|
166
166
|
instance_type: str,
|
167
167
|
) -> Optional[Dict[str, Union[int, float]]]:
|
168
|
-
return
|
169
|
-
|
168
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
169
|
+
clouds='fluidstack')
|
170
170
|
|
171
171
|
@classmethod
|
172
172
|
def get_vcpus_mem_from_instance_type(
|
173
173
|
cls,
|
174
174
|
instance_type: str,
|
175
175
|
) -> Tuple[Optional[float], Optional[float]]:
|
176
|
-
return
|
177
|
-
|
176
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
177
|
+
clouds='fluidstack')
|
178
178
|
|
179
179
|
@classmethod
|
180
180
|
def get_zone_shell_cmd(cls) -> Optional[str]:
|
@@ -247,16 +247,16 @@ class Fluidstack(clouds.Cloud):
|
|
247
247
|
|
248
248
|
assert len(accelerators) == 1, resources
|
249
249
|
acc, acc_count = list(accelerators.items())[0]
|
250
|
-
(instance_list,
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
250
|
+
(instance_list,
|
251
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
252
|
+
acc,
|
253
|
+
acc_count,
|
254
|
+
use_spot=resources.use_spot,
|
255
|
+
cpus=resources.cpus,
|
256
|
+
memory=resources.memory,
|
257
|
+
region=resources.region,
|
258
|
+
zone=resources.zone,
|
259
|
+
clouds='fluidstack')
|
260
260
|
if instance_list is None:
|
261
261
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
262
262
|
None)
|
@@ -306,12 +306,10 @@ class Fluidstack(clouds.Cloud):
|
|
306
306
|
return None
|
307
307
|
|
308
308
|
def instance_type_exists(self, instance_type: str) -> bool:
|
309
|
-
return
|
309
|
+
return catalog.instance_type_exists(instance_type, 'fluidstack')
|
310
310
|
|
311
311
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
312
|
-
return
|
313
|
-
zone,
|
314
|
-
clouds='fluidstack')
|
312
|
+
return catalog.validate_region_zone(region, zone, clouds='fluidstack')
|
315
313
|
|
316
314
|
@classmethod
|
317
315
|
def query_status(
|
sky/clouds/gcp.py
CHANGED
@@ -10,12 +10,12 @@ from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
|
|
10
10
|
|
11
11
|
import colorama
|
12
12
|
|
13
|
+
from sky import catalog
|
13
14
|
from sky import clouds
|
14
15
|
from sky import exceptions
|
15
16
|
from sky import sky_logging
|
16
17
|
from sky import skypilot_config
|
17
18
|
from sky.adaptors import gcp
|
18
|
-
from sky.clouds import service_catalog
|
19
19
|
from sky.clouds.utils import gcp_utils
|
20
20
|
from sky.provision.gcp import constants
|
21
21
|
from sky.provision.gcp import volume_utils
|
@@ -111,7 +111,7 @@ _IMAGE_NOT_FOUND_UX_MESSAGE = (
|
|
111
111
|
|
112
112
|
# Image ID tags
|
113
113
|
_DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-2204'
|
114
|
-
# For GPU-related package version, see sky/clouds/
|
114
|
+
# For GPU-related package version, see sky/clouds/catalog/images/provisioners/cuda.sh
|
115
115
|
_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-2204'
|
116
116
|
_DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-debian-10'
|
117
117
|
# Use COS image with GPU Direct support.
|
@@ -258,20 +258,21 @@ class GCP(clouds.Cloud):
|
|
258
258
|
use_spot: bool, region: Optional[str],
|
259
259
|
zone: Optional[str]) -> List[clouds.Region]:
|
260
260
|
if accelerators is None:
|
261
|
-
regions =
|
262
|
-
|
261
|
+
regions = catalog.get_region_zones_for_instance_type(instance_type,
|
262
|
+
use_spot,
|
263
|
+
clouds='gcp')
|
263
264
|
else:
|
264
265
|
assert len(accelerators) == 1, accelerators
|
265
266
|
acc = list(accelerators.keys())[0]
|
266
267
|
acc_count = list(accelerators.values())[0]
|
267
|
-
acc_regions =
|
268
|
+
acc_regions = catalog.get_region_zones_for_accelerators(
|
268
269
|
acc, acc_count, use_spot, clouds='gcp')
|
269
270
|
if instance_type is None:
|
270
271
|
regions = acc_regions
|
271
272
|
elif instance_type == 'TPU-VM':
|
272
273
|
regions = acc_regions
|
273
274
|
else:
|
274
|
-
vm_regions =
|
275
|
+
vm_regions = catalog.get_region_zones_for_instance_type(
|
275
276
|
instance_type, use_spot, clouds='gcp')
|
276
277
|
# Find the intersection between `acc_regions` and `vm_regions`.
|
277
278
|
regions = []
|
@@ -341,11 +342,11 @@ class GCP(clouds.Cloud):
|
|
341
342
|
use_spot: bool,
|
342
343
|
region: Optional[str] = None,
|
343
344
|
zone: Optional[str] = None) -> float:
|
344
|
-
return
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
345
|
+
return catalog.get_hourly_cost(instance_type,
|
346
|
+
use_spot=use_spot,
|
347
|
+
region=region,
|
348
|
+
zone=zone,
|
349
|
+
clouds='gcp')
|
349
350
|
|
350
351
|
def accelerators_to_hourly_cost(self,
|
351
352
|
accelerators: Dict[str, int],
|
@@ -354,12 +355,12 @@ class GCP(clouds.Cloud):
|
|
354
355
|
zone: Optional[str] = None) -> float:
|
355
356
|
assert len(accelerators) == 1, accelerators
|
356
357
|
acc, acc_count = list(accelerators.items())[0]
|
357
|
-
return
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
358
|
+
return catalog.get_accelerator_hourly_cost(acc,
|
359
|
+
acc_count,
|
360
|
+
use_spot=use_spot,
|
361
|
+
region=region,
|
362
|
+
zone=zone,
|
363
|
+
clouds='gcp')
|
363
364
|
|
364
365
|
def get_egress_cost(self, num_gigabytes: float):
|
365
366
|
# In general, query this from the cloud:
|
@@ -439,10 +440,10 @@ class GCP(clouds.Cloud):
|
|
439
440
|
memory: Optional[str] = None,
|
440
441
|
disk_tier: Optional[resources_utils.DiskTier] = None
|
441
442
|
) -> Optional[str]:
|
442
|
-
return
|
443
|
-
|
444
|
-
|
445
|
-
|
443
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
444
|
+
memory=memory,
|
445
|
+
disk_tier=disk_tier,
|
446
|
+
clouds='gcp')
|
446
447
|
|
447
448
|
@classmethod
|
448
449
|
def failover_disk_tier(
|
@@ -563,8 +564,7 @@ class GCP(clouds.Cloud):
|
|
563
564
|
assert region_name in resources.image_id, resources.image_id
|
564
565
|
image_id = resources.image_id[region_name]
|
565
566
|
if image_id.startswith('skypilot:'):
|
566
|
-
image_id =
|
567
|
-
clouds='gcp')
|
567
|
+
image_id = catalog.get_image_id_from_tag(image_id, clouds='gcp')
|
568
568
|
|
569
569
|
assert image_id is not None, (image_id, r)
|
570
570
|
resources_vars['image_id'] = image_id
|
@@ -689,16 +689,16 @@ class GCP(clouds.Cloud):
|
|
689
689
|
|
690
690
|
# For TPU VMs, the instance type is fixed to 'TPU-VM'. However, we still
|
691
691
|
# need to call the below function to get the fuzzy candidate list.
|
692
|
-
(instance_list,
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
692
|
+
(instance_list,
|
693
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
694
|
+
acc,
|
695
|
+
acc_count,
|
696
|
+
cpus=resources.cpus if not use_tpu_vm else None,
|
697
|
+
memory=resources.memory if not use_tpu_vm else None,
|
698
|
+
use_spot=resources.use_spot,
|
699
|
+
region=resources.region,
|
700
|
+
zone=resources.zone,
|
701
|
+
clouds='gcp')
|
702
702
|
|
703
703
|
if instance_list is None:
|
704
704
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
@@ -765,16 +765,16 @@ class GCP(clouds.Cloud):
|
|
765
765
|
# GCP handles accelerators separately from regular instance types.
|
766
766
|
# This method supports automatically inferring the GPU type for
|
767
767
|
# the instance type that come with GPUs pre-attached.
|
768
|
-
return
|
769
|
-
|
768
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
769
|
+
clouds='gcp')
|
770
770
|
|
771
771
|
@classmethod
|
772
772
|
def get_vcpus_mem_from_instance_type(
|
773
773
|
cls,
|
774
774
|
instance_type: str,
|
775
775
|
) -> Tuple[Optional[float], Optional[float]]:
|
776
|
-
return
|
777
|
-
|
776
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
777
|
+
clouds='gcp')
|
778
778
|
|
779
779
|
@classmethod
|
780
780
|
def _find_application_key_path(cls) -> str:
|
@@ -1058,7 +1058,7 @@ class GCP(clouds.Cloud):
|
|
1058
1058
|
return user_identity[0].replace('\n', '')
|
1059
1059
|
|
1060
1060
|
def instance_type_exists(self, instance_type):
|
1061
|
-
return
|
1061
|
+
return catalog.instance_type_exists(instance_type, 'gcp')
|
1062
1062
|
|
1063
1063
|
def need_cleanup_after_preemption_or_failure(
|
1064
1064
|
self, resources: 'resources.Resources') -> bool:
|
@@ -1093,9 +1093,9 @@ class GCP(clouds.Cloud):
|
|
1093
1093
|
def _check_instance_type_accelerators_combination(
|
1094
1094
|
resources: 'resources.Resources') -> None:
|
1095
1095
|
resources = resources.assert_launchable()
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1096
|
+
catalog.check_accelerator_attachable_to_host(resources.instance_type,
|
1097
|
+
resources.accelerators,
|
1098
|
+
resources.zone, 'gcp')
|
1099
1099
|
|
1100
1100
|
@classmethod
|
1101
1101
|
def check_disk_tier(
|
@@ -1322,7 +1322,7 @@ class GCP(clouds.Cloud):
|
|
1322
1322
|
region = resources.region
|
1323
1323
|
|
1324
1324
|
# pylint: disable=import-outside-toplevel
|
1325
|
-
from sky.
|
1325
|
+
from sky.catalog import gcp_catalog
|
1326
1326
|
|
1327
1327
|
quota_code = gcp_catalog.get_quota_code(accelerator, use_spot)
|
1328
1328
|
|
sky/clouds/ibm.py
CHANGED
@@ -5,11 +5,11 @@ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
5
5
|
|
6
6
|
import colorama
|
7
7
|
|
8
|
+
from sky import catalog
|
8
9
|
from sky import clouds
|
9
10
|
from sky import sky_logging
|
10
11
|
from sky.adaptors import ibm
|
11
12
|
from sky.adaptors.ibm import CREDENTIAL_FILE
|
12
|
-
from sky.clouds import service_catalog
|
13
13
|
from sky.utils import registry
|
14
14
|
from sky.utils import resources_utils
|
15
15
|
from sky.utils import status_lib
|
@@ -71,7 +71,7 @@ class IBM(clouds.Cloud):
|
|
71
71
|
del accelerators # unused
|
72
72
|
if use_spot:
|
73
73
|
return []
|
74
|
-
regions =
|
74
|
+
regions = catalog.get_region_zones_for_instance_type(
|
75
75
|
instance_type, use_spot, 'ibm')
|
76
76
|
|
77
77
|
if region is not None:
|
@@ -131,11 +131,11 @@ class IBM(clouds.Cloud):
|
|
131
131
|
zone: Optional[str] = None) -> float:
|
132
132
|
# Currently doesn't support spot instances, hence use_spot set to False.
|
133
133
|
del use_spot
|
134
|
-
return
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
134
|
+
return catalog.get_hourly_cost(instance_type,
|
135
|
+
use_spot=False,
|
136
|
+
region=region,
|
137
|
+
zone=zone,
|
138
|
+
clouds='ibm')
|
139
139
|
|
140
140
|
def accelerators_to_hourly_cost(self,
|
141
141
|
accelerators: Dict[str, int],
|
@@ -243,8 +243,8 @@ class IBM(clouds.Cloud):
|
|
243
243
|
cls,
|
244
244
|
instance_type: str,
|
245
245
|
) -> Tuple[Optional[float], Optional[float]]:
|
246
|
-
return
|
247
|
-
|
246
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
247
|
+
clouds='ibm')
|
248
248
|
|
249
249
|
@classmethod
|
250
250
|
def get_accelerators_from_instance_type(
|
@@ -252,8 +252,8 @@ class IBM(clouds.Cloud):
|
|
252
252
|
instance_type: str,
|
253
253
|
) -> Optional[Dict[str, Union[int, float]]]:
|
254
254
|
"""Returns {acc: acc_count} held by 'instance_type', if any."""
|
255
|
-
return
|
256
|
-
|
255
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
256
|
+
clouds='ibm')
|
257
257
|
|
258
258
|
@classmethod
|
259
259
|
def get_default_instance_type(
|
@@ -262,10 +262,10 @@ class IBM(clouds.Cloud):
|
|
262
262
|
memory: Optional[str] = None,
|
263
263
|
disk_tier: Optional['resources_utils.DiskTier'] = None
|
264
264
|
) -> Optional[str]:
|
265
|
-
return
|
266
|
-
|
267
|
-
|
268
|
-
|
265
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
266
|
+
memory=memory,
|
267
|
+
disk_tier=disk_tier,
|
268
|
+
clouds='ibm')
|
269
269
|
|
270
270
|
def _get_feasible_launchable_resources(
|
271
271
|
self, resources: 'resources_lib.Resources'
|
@@ -309,15 +309,15 @@ class IBM(clouds.Cloud):
|
|
309
309
|
|
310
310
|
assert len(accelerators) == 1, resources
|
311
311
|
acc, acc_count = list(accelerators.items())[0]
|
312
|
-
(instance_list,
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
312
|
+
(instance_list,
|
313
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
314
|
+
acc,
|
315
|
+
acc_count,
|
316
|
+
cpus=resources.cpus,
|
317
|
+
memory=resources.memory,
|
318
|
+
region=resources.region,
|
319
|
+
zone=resources.zone,
|
320
|
+
clouds='ibm')
|
321
321
|
if instance_list is None:
|
322
322
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
323
323
|
None)
|
@@ -462,11 +462,11 @@ class IBM(clouds.Cloud):
|
|
462
462
|
|
463
463
|
def instance_type_exists(self, instance_type):
|
464
464
|
"""Returns whether the instance type exists for this cloud."""
|
465
|
-
return
|
465
|
+
return catalog.instance_type_exists(instance_type, clouds='ibm')
|
466
466
|
|
467
467
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
468
468
|
"""Validates the region and zone."""
|
469
|
-
return
|
469
|
+
return catalog.validate_region_zone(region, zone, clouds='ibm')
|
470
470
|
|
471
471
|
@classmethod
|
472
472
|
def query_status(cls, name: str, tag_filters: Dict[str, str],
|
sky/clouds/kubernetes.py
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
"""Kubernetes."""
|
2
|
-
import os
|
3
2
|
import re
|
3
|
+
import subprocess
|
4
|
+
import tempfile
|
4
5
|
import typing
|
5
6
|
from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
|
6
7
|
|
7
8
|
import colorama
|
8
9
|
|
10
|
+
from sky import catalog
|
9
11
|
from sky import clouds
|
10
12
|
from sky import exceptions
|
11
13
|
from sky import sky_logging
|
12
14
|
from sky import skypilot_config
|
13
15
|
from sky.adaptors import kubernetes
|
14
|
-
from sky.clouds import service_catalog
|
15
16
|
from sky.provision import instance_setup
|
16
17
|
from sky.provision.kubernetes import network_utils
|
17
18
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
@@ -28,10 +29,6 @@ if typing.TYPE_CHECKING:
|
|
28
29
|
|
29
30
|
logger = sky_logging.init_logger(__name__)
|
30
31
|
|
31
|
-
# Check if KUBECONFIG is set, and use it if it is.
|
32
|
-
DEFAULT_KUBECONFIG_PATH = '~/.kube/config'
|
33
|
-
CREDENTIAL_PATH = os.environ.get('KUBECONFIG', DEFAULT_KUBECONFIG_PATH)
|
34
|
-
|
35
32
|
# Namespace for SkyPilot resources shared across multiple tenants on the
|
36
33
|
# same cluster (even if they might be running in different namespaces).
|
37
34
|
# E.g., FUSE device manager daemonset is run in this namespace.
|
@@ -471,14 +468,14 @@ class Kubernetes(clouds.Cloud):
|
|
471
468
|
# Select image based on whether we are using GPUs or not.
|
472
469
|
image_id = self.IMAGE_GPU if acc_count > 0 else self.IMAGE_CPU
|
473
470
|
# Get the container image ID from the service catalog.
|
474
|
-
image_id =
|
475
|
-
|
471
|
+
image_id = catalog.get_image_id_from_tag(image_id,
|
472
|
+
clouds='kubernetes')
|
476
473
|
return image_id
|
477
474
|
|
478
475
|
image_id = _get_image_id(resources)
|
479
476
|
# TODO(romilb): Create a lightweight image for SSH jump host
|
480
|
-
ssh_jump_image =
|
481
|
-
|
477
|
+
ssh_jump_image = catalog.get_image_id_from_tag(self.IMAGE_CPU,
|
478
|
+
clouds='kubernetes')
|
482
479
|
|
483
480
|
# Set environment variables for the pod. Note that SkyPilot env vars
|
484
481
|
# are set separately when the task is run. These env vars are
|
@@ -788,6 +785,7 @@ class Kubernetes(clouds.Cloud):
|
|
788
785
|
"""Checks if the user has access credentials to
|
789
786
|
Kubernetes."""
|
790
787
|
# Check for port forward dependencies
|
788
|
+
logger.info(f'Checking compute credentials for {cls.canonical_name()}')
|
791
789
|
reasons = kubernetes_utils.check_port_forward_mode_dependencies(False)
|
792
790
|
if reasons is not None:
|
793
791
|
formatted = '\n'.join(
|
@@ -850,10 +848,24 @@ class Kubernetes(clouds.Cloud):
|
|
850
848
|
return ''.join(message_parts)
|
851
849
|
|
852
850
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
853
|
-
|
851
|
+
credential_paths = kubernetes_utils.get_kubeconfig_paths()
|
852
|
+
if credential_paths:
|
853
|
+
# For single kubeconfig path, keep the original path.
|
854
|
+
kubeconfig_file = credential_paths[0]
|
855
|
+
if len(credential_paths) > 1:
|
856
|
+
# For multiple kubeconfig paths, merge them into a single file.
|
857
|
+
# TODO(aylei): GC merged kubeconfig files.
|
858
|
+
kubeconfig_file = tempfile.NamedTemporaryFile(
|
859
|
+
prefix='merged-kubeconfig-', suffix='.yaml',
|
860
|
+
delete=False).name
|
861
|
+
subprocess.run(
|
862
|
+
'kubectl config view --flatten '
|
863
|
+
f'> {kubeconfig_file}',
|
864
|
+
shell=True,
|
865
|
+
check=True)
|
854
866
|
# Upload kubeconfig to the default path to avoid having to set
|
855
867
|
# KUBECONFIG in the environment.
|
856
|
-
return {DEFAULT_KUBECONFIG_PATH:
|
868
|
+
return {kubernetes.DEFAULT_KUBECONFIG_PATH: kubeconfig_file}
|
857
869
|
else:
|
858
870
|
return {}
|
859
871
|
|
sky/clouds/lambda_cloud.py
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
import typing
|
3
3
|
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
4
4
|
|
5
|
+
from sky import catalog
|
5
6
|
from sky import clouds
|
6
7
|
from sky.adaptors import common as adaptors_common
|
7
|
-
from sky.clouds import service_catalog
|
8
8
|
from sky.provision.lambda_cloud import lambda_utils
|
9
9
|
from sky.utils import registry
|
10
10
|
from sky.utils import resources_utils
|
@@ -73,7 +73,7 @@ class Lambda(clouds.Cloud):
|
|
73
73
|
del accelerators, zone # unused
|
74
74
|
if use_spot:
|
75
75
|
return []
|
76
|
-
regions =
|
76
|
+
regions = catalog.get_region_zones_for_instance_type(
|
77
77
|
instance_type, use_spot, 'lambda')
|
78
78
|
|
79
79
|
if region is not None:
|
@@ -105,11 +105,11 @@ class Lambda(clouds.Cloud):
|
|
105
105
|
use_spot: bool,
|
106
106
|
region: Optional[str] = None,
|
107
107
|
zone: Optional[str] = None) -> float:
|
108
|
-
return
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
108
|
+
return catalog.get_hourly_cost(instance_type,
|
109
|
+
use_spot=use_spot,
|
110
|
+
region=region,
|
111
|
+
zone=zone,
|
112
|
+
clouds='lambda')
|
113
113
|
|
114
114
|
def accelerators_to_hourly_cost(self,
|
115
115
|
accelerators: Dict[str, int],
|
@@ -133,26 +133,26 @@ class Lambda(clouds.Cloud):
|
|
133
133
|
memory: Optional[str] = None,
|
134
134
|
disk_tier: Optional['resources_utils.DiskTier'] = None
|
135
135
|
) -> Optional[str]:
|
136
|
-
return
|
137
|
-
|
138
|
-
|
139
|
-
|
136
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
137
|
+
memory=memory,
|
138
|
+
disk_tier=disk_tier,
|
139
|
+
clouds='lambda')
|
140
140
|
|
141
141
|
@classmethod
|
142
142
|
def get_accelerators_from_instance_type(
|
143
143
|
cls,
|
144
144
|
instance_type: str,
|
145
145
|
) -> Optional[Dict[str, Union[int, float]]]:
|
146
|
-
return
|
147
|
-
|
146
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
147
|
+
clouds='lambda')
|
148
148
|
|
149
149
|
@classmethod
|
150
150
|
def get_vcpus_mem_from_instance_type(
|
151
151
|
cls,
|
152
152
|
instance_type: str,
|
153
153
|
) -> Tuple[Optional[float], Optional[float]]:
|
154
|
-
return
|
155
|
-
|
154
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
155
|
+
clouds='lambda')
|
156
156
|
|
157
157
|
@classmethod
|
158
158
|
def get_zone_shell_cmd(cls) -> Optional[str]:
|
@@ -230,16 +230,16 @@ class Lambda(clouds.Cloud):
|
|
230
230
|
|
231
231
|
assert len(accelerators) == 1, resources
|
232
232
|
acc, acc_count = list(accelerators.items())[0]
|
233
|
-
(instance_list,
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
233
|
+
(instance_list,
|
234
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
235
|
+
acc,
|
236
|
+
acc_count,
|
237
|
+
use_spot=resources.use_spot,
|
238
|
+
cpus=resources.cpus,
|
239
|
+
memory=resources.memory,
|
240
|
+
region=resources.region,
|
241
|
+
zone=resources.zone,
|
242
|
+
clouds='lambda')
|
243
243
|
if instance_list is None:
|
244
244
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
245
245
|
None)
|
@@ -278,16 +278,14 @@ class Lambda(clouds.Cloud):
|
|
278
278
|
return None
|
279
279
|
|
280
280
|
def instance_type_exists(self, instance_type: str) -> bool:
|
281
|
-
return
|
281
|
+
return catalog.instance_type_exists(instance_type, 'lambda')
|
282
282
|
|
283
283
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
284
|
-
return
|
285
|
-
zone,
|
286
|
-
clouds='lambda')
|
284
|
+
return catalog.validate_region_zone(region, zone, clouds='lambda')
|
287
285
|
|
288
286
|
@classmethod
|
289
287
|
def regions(cls) -> List['clouds.Region']:
|
290
|
-
return
|
288
|
+
return catalog.regions(clouds='lambda')
|
291
289
|
|
292
290
|
@classmethod
|
293
291
|
def query_status(cls, name: str, tag_filters: Dict[str, str],
|