skypilot-nightly 1.0.0.dev20251009__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +6 -2
- sky/adaptors/aws.py +25 -7
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +59 -149
- sky/backends/backend_utils.py +104 -63
- sky/backends/cloud_vm_ray_backend.py +84 -39
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +25 -13
- sky/client/cli/command.py +335 -86
- sky/client/cli/flags.py +4 -2
- sky/client/cli/table_utils.py +17 -9
- sky/client/sdk.py +59 -12
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +2 -0
- sky/clouds/aws.py +71 -16
- sky/clouds/azure.py +12 -5
- sky/clouds/cloud.py +19 -9
- sky/clouds/cudo.py +12 -5
- sky/clouds/do.py +4 -1
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +12 -5
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +62 -25
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +12 -5
- sky/clouds/oci.py +12 -5
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +4 -1
- sky/clouds/runpod.py +12 -5
- sky/clouds/scp.py +12 -5
- sky/clouds/seeweb.py +4 -1
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/vast.py +12 -5
- sky/clouds/vsphere.py +4 -1
- sky/core.py +12 -11
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/{1871-49141c317f3a9020.js → 1871-74503c8e80fd253b.js} +1 -1
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/{3785.a19328ba41517b8b.js → 3785.ad6adaa2a0fa9768.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{4725.10f7a9a5d3ea8208.js → 4725.a830b5c9e7867c92.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-477555ab7c0b13d8.js → [cluster]-a37d2063af475a1c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{clusters-2f61f65487f6d8ff.js → clusters-d44859594e6f8064.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-553b8b5cb65e100b.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-910a22500c50596f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-bc979970c247d8f3.js → [pool]-6edeb7d06032adfc.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/{jobs-a35a9dc3c5ccd657.js → jobs-479dde13399cf270.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{users-98d2ed979084162a.js → users-5ab3b907622cf0fe.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{volumes-835d14ba94808f79.js → volumes-b84b948ff357c43e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-e8688c35c06f0ac5.js → [name]-c5a3eeee1c218af1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-69c80d677d3c2949.js → workspaces-22b23febb3e89ce1.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +143 -19
- sky/data/storage.py +168 -11
- sky/exceptions.py +13 -1
- sky/execution.py +13 -0
- sky/global_user_state.py +189 -113
- sky/jobs/client/sdk.py +32 -10
- sky/jobs/client/sdk_async.py +9 -3
- sky/jobs/constants.py +3 -1
- sky/jobs/controller.py +164 -192
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +59 -82
- sky/jobs/scheduler.py +20 -9
- sky/jobs/server/core.py +105 -23
- sky/jobs/server/server.py +40 -28
- sky/jobs/server/utils.py +32 -11
- sky/jobs/state.py +588 -110
- sky/jobs/utils.py +442 -209
- sky/logs/agent.py +1 -1
- sky/metrics/utils.py +45 -6
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +7 -0
- sky/provision/aws/instance.py +2 -1
- sky/provision/azure/instance.py +2 -1
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +2 -1
- sky/provision/do/instance.py +2 -1
- sky/provision/fluidstack/instance.py +4 -3
- sky/provision/gcp/instance.py +2 -1
- sky/provision/hyperbolic/instance.py +2 -1
- sky/provision/instance_setup.py +10 -2
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +222 -89
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/utils.py +114 -53
- sky/provision/kubernetes/volume.py +5 -4
- sky/provision/lambda_cloud/instance.py +2 -1
- sky/provision/nebius/instance.py +2 -1
- sky/provision/oci/instance.py +2 -1
- sky/provision/paperspace/instance.py +2 -1
- sky/provision/provisioner.py +11 -2
- sky/provision/runpod/instance.py +2 -1
- sky/provision/scp/instance.py +2 -1
- sky/provision/seeweb/instance.py +3 -3
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +2 -1
- sky/provision/vsphere/instance.py +2 -1
- sky/resources.py +1 -1
- sky/schemas/api/responses.py +9 -5
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/jobsv1_pb2.py +52 -52
- sky/schemas/generated/jobsv1_pb2.pyi +4 -2
- sky/schemas/generated/managed_jobsv1_pb2.py +39 -35
- sky/schemas/generated/managed_jobsv1_pb2.pyi +21 -5
- sky/serve/client/impl.py +11 -3
- sky/serve/replica_managers.py +5 -2
- sky/serve/serve_utils.py +9 -2
- sky/serve/server/impl.py +7 -2
- sky/serve/server/server.py +18 -15
- sky/serve/service.py +2 -2
- sky/server/auth/oauth2_proxy.py +2 -5
- sky/server/common.py +31 -28
- sky/server/constants.py +5 -1
- sky/server/daemons.py +27 -19
- sky/server/requests/executor.py +138 -74
- sky/server/requests/payloads.py +9 -1
- sky/server/requests/preconditions.py +13 -10
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +485 -153
- sky/server/requests/serializers/decoders.py +26 -13
- sky/server/requests/serializers/encoders.py +56 -11
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +70 -18
- sky/server/server.py +283 -104
- sky/server/stream_utils.py +233 -59
- sky/server/uvicorn.py +18 -17
- sky/setup_files/alembic.ini +4 -0
- sky/setup_files/dependencies.py +32 -13
- sky/sky_logging.py +0 -2
- sky/skylet/constants.py +30 -7
- sky/skylet/events.py +7 -0
- sky/skylet/log_lib.py +8 -2
- sky/skylet/log_lib.pyi +1 -1
- sky/skylet/services.py +26 -13
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +87 -75
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +67 -54
- sky/templates/kubernetes-ray.yml.j2 +8 -1
- sky/templates/nebius-ray.yml.j2 +1 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/websocket_proxy.py +142 -12
- sky/users/permission.py +8 -1
- sky/utils/admin_policy_utils.py +16 -3
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +8 -2
- sky/utils/command_runner.py +11 -0
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +7 -4
- sky/utils/context.py +57 -51
- sky/utils/context_utils.py +30 -12
- sky/utils/controller_utils.py +35 -8
- sky/utils/db/db_utils.py +37 -10
- sky/utils/db/migration_utils.py +8 -4
- sky/utils/locks.py +24 -6
- sky/utils/resource_checker.py +4 -1
- sky/utils/resources_utils.py +53 -29
- sky/utils/schemas.py +23 -4
- sky/utils/subprocess_utils.py +17 -4
- sky/volumes/server/server.py +7 -6
- sky/workspaces/server.py +13 -12
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/METADATA +306 -55
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/RECORD +215 -195
- sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-3b40c39626f99c89.js +0 -11
- sky/dashboard/out/_next/static/chunks/2755.97300e1362fe7c98.js +0 -26
- sky/dashboard/out/_next/static/chunks/3015-8d748834fcc60b46.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.1fafbf42b3bcebff.js +0 -1
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-f6818c84ed8f1c86.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-66237729cdf9749e.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9360.71e83b2ddc844ec2.js +0 -31
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8f058b0346db2aff.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4f7079dcab6ed653.js +0 -16
- sky/dashboard/out/_next/static/chunks/webpack-6a5ddd0184bfa22c.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/hIViZcQBkn0HE8SpaSsUU/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{hIViZcQBkn0HE8SpaSsUU → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/clouds/fluidstack.py
CHANGED
|
@@ -73,7 +73,9 @@ class Fluidstack(clouds.Cloud):
|
|
|
73
73
|
|
|
74
74
|
@classmethod
|
|
75
75
|
def _unsupported_features_for_resources(
|
|
76
|
-
cls,
|
|
76
|
+
cls,
|
|
77
|
+
resources: 'resources_lib.Resources',
|
|
78
|
+
region: Optional[str] = None,
|
|
77
79
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
78
80
|
"""The features not supported based on the resources provided.
|
|
79
81
|
|
|
@@ -92,10 +94,15 @@ class Fluidstack(clouds.Cloud):
|
|
|
92
94
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
93
95
|
|
|
94
96
|
@classmethod
|
|
95
|
-
def regions_with_offering(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
def regions_with_offering(
|
|
98
|
+
cls,
|
|
99
|
+
instance_type: str,
|
|
100
|
+
accelerators: Optional[Dict[str, int]],
|
|
101
|
+
use_spot: bool,
|
|
102
|
+
region: Optional[str],
|
|
103
|
+
zone: Optional[str],
|
|
104
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
105
|
+
) -> List[clouds.Region]:
|
|
99
106
|
assert zone is None, 'FluidStack does not support zones.'
|
|
100
107
|
del accelerators, zone # unused
|
|
101
108
|
if use_spot:
|
sky/clouds/gcp.py
CHANGED
|
@@ -211,7 +211,9 @@ class GCP(clouds.Cloud):
|
|
|
211
211
|
|
|
212
212
|
@classmethod
|
|
213
213
|
def _unsupported_features_for_resources(
|
|
214
|
-
cls,
|
|
214
|
+
cls,
|
|
215
|
+
resources: 'resources.Resources',
|
|
216
|
+
region: Optional[str] = None,
|
|
215
217
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
216
218
|
unsupported = {}
|
|
217
219
|
if gcp_utils.is_tpu_vm_pod(resources):
|
|
@@ -255,10 +257,15 @@ class GCP(clouds.Cloud):
|
|
|
255
257
|
|
|
256
258
|
#### Regions/Zones ####
|
|
257
259
|
@classmethod
|
|
258
|
-
def regions_with_offering(
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
260
|
+
def regions_with_offering(
|
|
261
|
+
cls,
|
|
262
|
+
instance_type: str,
|
|
263
|
+
accelerators: Optional[Dict[str, int]],
|
|
264
|
+
use_spot: bool,
|
|
265
|
+
region: Optional[str],
|
|
266
|
+
zone: Optional[str],
|
|
267
|
+
resources: Optional['resources.Resources'] = None,
|
|
268
|
+
) -> List[clouds.Region]:
|
|
262
269
|
if accelerators is None:
|
|
263
270
|
regions = catalog.get_region_zones_for_instance_type(instance_type,
|
|
264
271
|
use_spot,
|
sky/clouds/hyperbolic.py
CHANGED
|
@@ -65,7 +65,9 @@ class Hyperbolic(clouds.Cloud):
|
|
|
65
65
|
|
|
66
66
|
@classmethod
|
|
67
67
|
def _unsupported_features_for_resources(
|
|
68
|
-
cls,
|
|
68
|
+
cls,
|
|
69
|
+
resources: 'resources_lib.Resources',
|
|
70
|
+
region: Optional[str] = None,
|
|
69
71
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
70
72
|
del resources
|
|
71
73
|
return cls._CLOUD_UNSUPPORTED_FEATURES
|
|
@@ -78,10 +80,15 @@ class Hyperbolic(clouds.Cloud):
|
|
|
78
80
|
return catalog.instance_type_exists(instance_type, 'hyperbolic')
|
|
79
81
|
|
|
80
82
|
@classmethod
|
|
81
|
-
def regions_with_offering(
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
83
|
+
def regions_with_offering(
|
|
84
|
+
cls,
|
|
85
|
+
instance_type: str,
|
|
86
|
+
accelerators: Optional[Dict[str, int]],
|
|
87
|
+
use_spot: bool,
|
|
88
|
+
region: Optional[str],
|
|
89
|
+
zone: Optional[str],
|
|
90
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
91
|
+
) -> List[clouds.Region]:
|
|
85
92
|
assert zone is None, 'Hyperbolic does not support zones.'
|
|
86
93
|
del accelerators, zone # unused
|
|
87
94
|
|
sky/clouds/ibm.py
CHANGED
|
@@ -37,7 +37,9 @@ class IBM(clouds.Cloud):
|
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
39
|
def _unsupported_features_for_resources(
|
|
40
|
-
cls,
|
|
40
|
+
cls,
|
|
41
|
+
resources: 'resources_lib.Resources',
|
|
42
|
+
region: Optional[str] = None,
|
|
41
43
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
42
44
|
features = {
|
|
43
45
|
clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
|
|
@@ -68,10 +70,15 @@ class IBM(clouds.Cloud):
|
|
|
68
70
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
69
71
|
|
|
70
72
|
@classmethod
|
|
71
|
-
def regions_with_offering(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
def regions_with_offering(
|
|
74
|
+
cls,
|
|
75
|
+
instance_type: str,
|
|
76
|
+
accelerators: Optional[Dict[str, int]],
|
|
77
|
+
use_spot: bool,
|
|
78
|
+
region: Optional[str],
|
|
79
|
+
zone: Optional[str],
|
|
80
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
81
|
+
) -> List[clouds.Region]:
|
|
75
82
|
del accelerators # unused
|
|
76
83
|
if use_spot:
|
|
77
84
|
return []
|
sky/clouds/kubernetes.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Kubernetes."""
|
|
2
|
+
import concurrent.futures
|
|
2
3
|
import os
|
|
3
4
|
import re
|
|
4
5
|
import subprocess
|
|
@@ -98,34 +99,50 @@ class Kubernetes(clouds.Cloud):
|
|
|
98
99
|
|
|
99
100
|
@classmethod
|
|
100
101
|
def _unsupported_features_for_resources(
|
|
101
|
-
cls,
|
|
102
|
+
cls,
|
|
103
|
+
resources: 'resources_lib.Resources',
|
|
104
|
+
region: Optional[str] = None,
|
|
102
105
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
103
106
|
# TODO(aylei): features need to be regional (per context) to make
|
|
104
107
|
# multi-kubernetes selection/failover work.
|
|
105
108
|
unsupported_features = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
|
|
106
|
-
context = resources.region
|
|
109
|
+
context = region if region is not None else resources.region
|
|
107
110
|
if context is None:
|
|
108
|
-
|
|
111
|
+
contexts = cls.existing_allowed_contexts()
|
|
112
|
+
else:
|
|
113
|
+
contexts = [context]
|
|
109
114
|
unsupported_features[clouds.CloudImplementationFeatures.STOP] = (
|
|
110
115
|
'Stopping clusters is not supported on Kubernetes.')
|
|
111
116
|
unsupported_features[clouds.CloudImplementationFeatures.AUTOSTOP] = (
|
|
112
117
|
'Auto-stop is not supported on Kubernetes.')
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
118
|
+
for context in contexts:
|
|
119
|
+
# Allow spot instances if supported by the cluster
|
|
120
|
+
try:
|
|
121
|
+
# Run spot label check and network type detection concurrently
|
|
122
|
+
# as they are independent operations
|
|
123
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
124
|
+
max_workers=2) as executor:
|
|
125
|
+
spot_future = executor.submit(
|
|
126
|
+
kubernetes_utils.get_spot_label, context)
|
|
127
|
+
network_future = executor.submit(cls._detect_network_type,
|
|
128
|
+
context,
|
|
129
|
+
resources.network_tier)
|
|
130
|
+
|
|
131
|
+
spot_label_key, _ = spot_future.result()
|
|
132
|
+
if spot_label_key is not None:
|
|
133
|
+
unsupported_features.pop(
|
|
134
|
+
clouds.CloudImplementationFeatures.SPOT_INSTANCE,
|
|
135
|
+
None)
|
|
136
|
+
|
|
137
|
+
# Allow custom network tier if supported by the cluster
|
|
138
|
+
# (e.g., Nebius clusters with high performance networking)
|
|
139
|
+
network_type, _ = network_future.result()
|
|
140
|
+
if network_type.supports_high_performance_networking():
|
|
141
|
+
unsupported_features.pop(
|
|
142
|
+
clouds.CloudImplementationFeatures.
|
|
143
|
+
CUSTOM_NETWORK_TIER, None)
|
|
144
|
+
except exceptions.KubeAPIUnreachableError as e:
|
|
145
|
+
cls._log_unreachable_context(context, str(e))
|
|
129
146
|
return unsupported_features
|
|
130
147
|
|
|
131
148
|
@classmethod
|
|
@@ -241,10 +258,15 @@ class Kubernetes(clouds.Cloud):
|
|
|
241
258
|
'refresh Kubernetes availability if permanent.')
|
|
242
259
|
|
|
243
260
|
@classmethod
|
|
244
|
-
def regions_with_offering(
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
261
|
+
def regions_with_offering(
|
|
262
|
+
cls,
|
|
263
|
+
instance_type: Optional[str],
|
|
264
|
+
accelerators: Optional[Dict[str, int]],
|
|
265
|
+
use_spot: bool,
|
|
266
|
+
region: Optional[str],
|
|
267
|
+
zone: Optional[str],
|
|
268
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
269
|
+
) -> List[clouds.Region]:
|
|
248
270
|
del accelerators, zone, use_spot # unused
|
|
249
271
|
existing_contexts = cls.existing_allowed_contexts()
|
|
250
272
|
|
|
@@ -254,6 +276,19 @@ class Kubernetes(clouds.Cloud):
|
|
|
254
276
|
|
|
255
277
|
if region is not None:
|
|
256
278
|
regions = [r for r in regions if r.name == region]
|
|
279
|
+
if resources is not None:
|
|
280
|
+
filtered_regions = []
|
|
281
|
+
resources_required_features = resources.get_required_cloud_features(
|
|
282
|
+
)
|
|
283
|
+
for r in regions:
|
|
284
|
+
try:
|
|
285
|
+
cls.check_features_are_supported(
|
|
286
|
+
resources, resources_required_features, r.name)
|
|
287
|
+
filtered_regions.append(r)
|
|
288
|
+
except exceptions.NotSupportedError as e:
|
|
289
|
+
logger.info(f'Filter out context: {r.name}, reason: {e}')
|
|
290
|
+
continue
|
|
291
|
+
regions = filtered_regions
|
|
257
292
|
|
|
258
293
|
# Check if requested instance type will fit in the cluster.
|
|
259
294
|
# TODO(zhwu,romilb): autoscaler type needs to be regional (per
|
|
@@ -785,7 +820,8 @@ class Kubernetes(clouds.Cloud):
|
|
|
785
820
|
accelerators=resources.accelerators,
|
|
786
821
|
use_spot=resources.use_spot,
|
|
787
822
|
region=resources.region,
|
|
788
|
-
zone=resources.zone
|
|
823
|
+
zone=resources.zone,
|
|
824
|
+
resources=resources)
|
|
789
825
|
if not regions:
|
|
790
826
|
return resources_utils.FeasibleResources([], [], None)
|
|
791
827
|
resources = resources.copy(accelerators=None)
|
|
@@ -845,7 +881,8 @@ class Kubernetes(clouds.Cloud):
|
|
|
845
881
|
accelerators=None,
|
|
846
882
|
use_spot=resources.use_spot,
|
|
847
883
|
region=resources.region,
|
|
848
|
-
zone=resources.zone
|
|
884
|
+
zone=resources.zone,
|
|
885
|
+
resources=resources)
|
|
849
886
|
if not available_regions:
|
|
850
887
|
return resources_utils.FeasibleResources([], [], None)
|
|
851
888
|
# No fuzzy lists for Kubernetes
|
sky/clouds/lambda_cloud.py
CHANGED
|
@@ -59,7 +59,9 @@ class Lambda(clouds.Cloud):
|
|
|
59
59
|
|
|
60
60
|
@classmethod
|
|
61
61
|
def _unsupported_features_for_resources(
|
|
62
|
-
cls,
|
|
62
|
+
cls,
|
|
63
|
+
resources: 'resources_lib.Resources',
|
|
64
|
+
region: Optional[str] = None,
|
|
63
65
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
64
66
|
del resources # unused
|
|
65
67
|
return cls._CLOUD_UNSUPPORTED_FEATURES
|
|
@@ -69,10 +71,15 @@ class Lambda(clouds.Cloud):
|
|
|
69
71
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
70
72
|
|
|
71
73
|
@classmethod
|
|
72
|
-
def regions_with_offering(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
def regions_with_offering(
|
|
75
|
+
cls,
|
|
76
|
+
instance_type: str,
|
|
77
|
+
accelerators: Optional[Dict[str, int]],
|
|
78
|
+
use_spot: bool,
|
|
79
|
+
region: Optional[str],
|
|
80
|
+
zone: Optional[str],
|
|
81
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
82
|
+
) -> List[clouds.Region]:
|
|
76
83
|
assert zone is None, 'Lambda does not support zones.'
|
|
77
84
|
del accelerators, zone # unused
|
|
78
85
|
if use_spot:
|
sky/clouds/nebius.py
CHANGED
|
@@ -78,7 +78,9 @@ class Nebius(clouds.Cloud):
|
|
|
78
78
|
|
|
79
79
|
@classmethod
|
|
80
80
|
def _unsupported_features_for_resources(
|
|
81
|
-
cls,
|
|
81
|
+
cls,
|
|
82
|
+
resources: 'resources_lib.Resources',
|
|
83
|
+
region: Optional[str] = None,
|
|
82
84
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
83
85
|
unsupported = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
|
|
84
86
|
|
|
@@ -101,10 +103,15 @@ class Nebius(clouds.Cloud):
|
|
|
101
103
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
102
104
|
|
|
103
105
|
@classmethod
|
|
104
|
-
def regions_with_offering(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
def regions_with_offering(
|
|
107
|
+
cls,
|
|
108
|
+
instance_type: str,
|
|
109
|
+
accelerators: Optional[Dict[str, int]],
|
|
110
|
+
use_spot: bool,
|
|
111
|
+
region: Optional[str],
|
|
112
|
+
zone: Optional[str],
|
|
113
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
114
|
+
) -> List[clouds.Region]:
|
|
108
115
|
assert zone is None, 'Nebius does not support zones.'
|
|
109
116
|
del accelerators, zone # unused
|
|
110
117
|
regions = catalog.get_region_zones_for_instance_type(
|
sky/clouds/oci.py
CHANGED
|
@@ -69,7 +69,9 @@ class OCI(clouds.Cloud):
|
|
|
69
69
|
|
|
70
70
|
@classmethod
|
|
71
71
|
def _unsupported_features_for_resources(
|
|
72
|
-
cls,
|
|
72
|
+
cls,
|
|
73
|
+
resources: 'resources_lib.Resources',
|
|
74
|
+
region: Optional[str] = None,
|
|
73
75
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
74
76
|
unsupported_features = {
|
|
75
77
|
clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
|
|
@@ -96,10 +98,15 @@ class OCI(clouds.Cloud):
|
|
|
96
98
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
97
99
|
|
|
98
100
|
@classmethod
|
|
99
|
-
def regions_with_offering(
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
def regions_with_offering(
|
|
102
|
+
cls,
|
|
103
|
+
instance_type: str,
|
|
104
|
+
accelerators: Optional[Dict[str, int]],
|
|
105
|
+
use_spot: bool,
|
|
106
|
+
region: Optional[str],
|
|
107
|
+
zone: Optional[str],
|
|
108
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
109
|
+
) -> List[clouds.Region]:
|
|
103
110
|
del accelerators # unused
|
|
104
111
|
|
|
105
112
|
regions = catalog.get_region_zones_for_instance_type(
|
sky/clouds/paperspace.py
CHANGED
|
@@ -60,7 +60,9 @@ class Paperspace(clouds.Cloud):
|
|
|
60
60
|
|
|
61
61
|
@classmethod
|
|
62
62
|
def _unsupported_features_for_resources(
|
|
63
|
-
cls,
|
|
63
|
+
cls,
|
|
64
|
+
resources: 'resources_lib.Resources',
|
|
65
|
+
region: Optional[str] = None,
|
|
64
66
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
65
67
|
"""The features not supported based on the resources provided.
|
|
66
68
|
|
|
@@ -86,6 +88,7 @@ class Paperspace(clouds.Cloud):
|
|
|
86
88
|
use_spot: bool,
|
|
87
89
|
region: Optional[str],
|
|
88
90
|
zone: Optional[str],
|
|
91
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
89
92
|
) -> List[clouds.Region]:
|
|
90
93
|
assert zone is None, 'Paperspace does not support zones.'
|
|
91
94
|
del accelerators, zone # unused
|
sky/clouds/primeintellect.py
CHANGED
|
@@ -65,6 +65,7 @@ class PrimeIntellect(clouds.Cloud):
|
|
|
65
65
|
use_spot: bool,
|
|
66
66
|
region: Optional[str],
|
|
67
67
|
zone: Optional[str],
|
|
68
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
68
69
|
) -> List[clouds.Region]:
|
|
69
70
|
"""Returns the regions that offer the specified resources."""
|
|
70
71
|
del accelerators
|
|
@@ -299,7 +300,9 @@ class PrimeIntellect(clouds.Cloud):
|
|
|
299
300
|
|
|
300
301
|
@classmethod
|
|
301
302
|
def _unsupported_features_for_resources(
|
|
302
|
-
cls,
|
|
303
|
+
cls,
|
|
304
|
+
resources: 'resources_lib.Resources',
|
|
305
|
+
region: Optional[str] = None,
|
|
303
306
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
304
307
|
"""The features not supported based on the resources provided.
|
|
305
308
|
|
sky/clouds/runpod.py
CHANGED
|
@@ -53,7 +53,9 @@ class RunPod(clouds.Cloud):
|
|
|
53
53
|
|
|
54
54
|
@classmethod
|
|
55
55
|
def _unsupported_features_for_resources(
|
|
56
|
-
cls,
|
|
56
|
+
cls,
|
|
57
|
+
resources: 'resources_lib.Resources',
|
|
58
|
+
region: Optional[str] = None,
|
|
57
59
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
58
60
|
"""The features not supported based on the resources provided.
|
|
59
61
|
|
|
@@ -72,10 +74,15 @@ class RunPod(clouds.Cloud):
|
|
|
72
74
|
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
|
73
75
|
|
|
74
76
|
@classmethod
|
|
75
|
-
def regions_with_offering(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
77
|
+
def regions_with_offering(
|
|
78
|
+
cls,
|
|
79
|
+
instance_type: str,
|
|
80
|
+
accelerators: Optional[Dict[str, int]],
|
|
81
|
+
use_spot: bool,
|
|
82
|
+
region: Optional[str],
|
|
83
|
+
zone: Optional[str],
|
|
84
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
85
|
+
) -> List[clouds.Region]:
|
|
79
86
|
del accelerators # unused
|
|
80
87
|
regions = catalog.get_region_zones_for_instance_type(
|
|
81
88
|
instance_type, use_spot, 'runpod')
|
sky/clouds/scp.py
CHANGED
|
@@ -74,7 +74,9 @@ class SCP(clouds.Cloud):
|
|
|
74
74
|
|
|
75
75
|
@classmethod
|
|
76
76
|
def _unsupported_features_for_resources(
|
|
77
|
-
cls,
|
|
77
|
+
cls,
|
|
78
|
+
resources: 'resources_lib.Resources',
|
|
79
|
+
region: Optional[str] = None,
|
|
78
80
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
79
81
|
features = cls._CLOUD_UNSUPPORTED_FEATURES
|
|
80
82
|
if resources.use_spot:
|
|
@@ -92,10 +94,15 @@ class SCP(clouds.Cloud):
|
|
|
92
94
|
return catalog.regions(clouds='scp')
|
|
93
95
|
|
|
94
96
|
@classmethod
|
|
95
|
-
def regions_with_offering(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
def regions_with_offering(
|
|
98
|
+
cls,
|
|
99
|
+
instance_type: Optional[str],
|
|
100
|
+
accelerators: Optional[Dict[str, int]],
|
|
101
|
+
use_spot: bool,
|
|
102
|
+
region: Optional[str],
|
|
103
|
+
zone: Optional[str],
|
|
104
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
105
|
+
) -> List[clouds.Region]:
|
|
99
106
|
|
|
100
107
|
del accelerators, zone # unused
|
|
101
108
|
if use_spot:
|
sky/clouds/seeweb.py
CHANGED
|
@@ -84,7 +84,9 @@ class Seeweb(clouds.Cloud):
|
|
|
84
84
|
|
|
85
85
|
@classmethod
|
|
86
86
|
def _unsupported_features_for_resources(
|
|
87
|
-
cls,
|
|
87
|
+
cls,
|
|
88
|
+
resources: 'resources_lib.Resources',
|
|
89
|
+
region: Optional[str] = None,
|
|
88
90
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
89
91
|
return cls._CLOUD_UNSUPPORTED_FEATURES
|
|
90
92
|
|
|
@@ -108,6 +110,7 @@ class Seeweb(clouds.Cloud):
|
|
|
108
110
|
use_spot: bool,
|
|
109
111
|
region: Optional[str],
|
|
110
112
|
zone: Optional[str],
|
|
113
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
111
114
|
) -> List[clouds.Region]:
|
|
112
115
|
assert zone is None, 'Seeweb does not support zones.'
|
|
113
116
|
del zone
|