skypilot-nightly 1.0.0.dev20251009__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (231) hide show
  1. sky/__init__.py +6 -2
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/coreweave.py +278 -0
  4. sky/adaptors/kubernetes.py +64 -0
  5. sky/adaptors/shadeform.py +89 -0
  6. sky/admin_policy.py +20 -0
  7. sky/authentication.py +59 -149
  8. sky/backends/backend_utils.py +104 -63
  9. sky/backends/cloud_vm_ray_backend.py +84 -39
  10. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  11. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  12. sky/catalog/kubernetes_catalog.py +24 -28
  13. sky/catalog/runpod_catalog.py +5 -1
  14. sky/catalog/shadeform_catalog.py +165 -0
  15. sky/check.py +25 -13
  16. sky/client/cli/command.py +335 -86
  17. sky/client/cli/flags.py +4 -2
  18. sky/client/cli/table_utils.py +17 -9
  19. sky/client/sdk.py +59 -12
  20. sky/cloud_stores.py +73 -0
  21. sky/clouds/__init__.py +2 -0
  22. sky/clouds/aws.py +71 -16
  23. sky/clouds/azure.py +12 -5
  24. sky/clouds/cloud.py +19 -9
  25. sky/clouds/cudo.py +12 -5
  26. sky/clouds/do.py +4 -1
  27. sky/clouds/fluidstack.py +12 -5
  28. sky/clouds/gcp.py +12 -5
  29. sky/clouds/hyperbolic.py +12 -5
  30. sky/clouds/ibm.py +12 -5
  31. sky/clouds/kubernetes.py +62 -25
  32. sky/clouds/lambda_cloud.py +12 -5
  33. sky/clouds/nebius.py +12 -5
  34. sky/clouds/oci.py +12 -5
  35. sky/clouds/paperspace.py +4 -1
  36. sky/clouds/primeintellect.py +4 -1
  37. sky/clouds/runpod.py +12 -5
  38. sky/clouds/scp.py +12 -5
  39. sky/clouds/seeweb.py +4 -1
  40. sky/clouds/shadeform.py +400 -0
  41. sky/clouds/ssh.py +4 -2
  42. sky/clouds/vast.py +12 -5
  43. sky/clouds/vsphere.py +4 -1
  44. sky/core.py +12 -11
  45. sky/dashboard/out/404.html +1 -1
  46. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  47. sky/dashboard/out/_next/static/chunks/{1871-49141c317f3a9020.js → 1871-74503c8e80fd253b.js} +1 -1
  48. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  50. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/{3785.a19328ba41517b8b.js → 3785.ad6adaa2a0fa9768.js} +1 -1
  52. sky/dashboard/out/_next/static/chunks/{4725.10f7a9a5d3ea8208.js → 4725.a830b5c9e7867c92.js} +1 -1
  53. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  55. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  58. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  59. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  60. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  61. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  62. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-477555ab7c0b13d8.js → [cluster]-a37d2063af475a1c.js} +1 -1
  63. sky/dashboard/out/_next/static/chunks/pages/{clusters-2f61f65487f6d8ff.js → clusters-d44859594e6f8064.js} +1 -1
  64. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-553b8b5cb65e100b.js → [context]-c0b5935149902e6f.js} +1 -1
  65. sky/dashboard/out/_next/static/chunks/pages/{infra-910a22500c50596f.js → infra-aed0ea19df7cf961.js} +1 -1
  66. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  67. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-bc979970c247d8f3.js → [pool]-6edeb7d06032adfc.js} +2 -2
  68. sky/dashboard/out/_next/static/chunks/pages/{jobs-a35a9dc3c5ccd657.js → jobs-479dde13399cf270.js} +1 -1
  69. sky/dashboard/out/_next/static/chunks/pages/{users-98d2ed979084162a.js → users-5ab3b907622cf0fe.js} +1 -1
  70. sky/dashboard/out/_next/static/chunks/pages/{volumes-835d14ba94808f79.js → volumes-b84b948ff357c43e.js} +1 -1
  71. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-e8688c35c06f0ac5.js → [name]-c5a3eeee1c218af1.js} +1 -1
  72. sky/dashboard/out/_next/static/chunks/pages/{workspaces-69c80d677d3c2949.js → workspaces-22b23febb3e89ce1.js} +1 -1
  73. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  74. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  75. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  76. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  77. sky/dashboard/out/clusters/[cluster].html +1 -1
  78. sky/dashboard/out/clusters.html +1 -1
  79. sky/dashboard/out/config.html +1 -1
  80. sky/dashboard/out/index.html +1 -1
  81. sky/dashboard/out/infra/[context].html +1 -1
  82. sky/dashboard/out/infra.html +1 -1
  83. sky/dashboard/out/jobs/[job].html +1 -1
  84. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  85. sky/dashboard/out/jobs.html +1 -1
  86. sky/dashboard/out/users.html +1 -1
  87. sky/dashboard/out/volumes.html +1 -1
  88. sky/dashboard/out/workspace/new.html +1 -1
  89. sky/dashboard/out/workspaces/[name].html +1 -1
  90. sky/dashboard/out/workspaces.html +1 -1
  91. sky/data/data_utils.py +92 -1
  92. sky/data/mounting_utils.py +143 -19
  93. sky/data/storage.py +168 -11
  94. sky/exceptions.py +13 -1
  95. sky/execution.py +13 -0
  96. sky/global_user_state.py +189 -113
  97. sky/jobs/client/sdk.py +32 -10
  98. sky/jobs/client/sdk_async.py +9 -3
  99. sky/jobs/constants.py +3 -1
  100. sky/jobs/controller.py +164 -192
  101. sky/jobs/file_content_utils.py +80 -0
  102. sky/jobs/log_gc.py +201 -0
  103. sky/jobs/recovery_strategy.py +59 -82
  104. sky/jobs/scheduler.py +20 -9
  105. sky/jobs/server/core.py +105 -23
  106. sky/jobs/server/server.py +40 -28
  107. sky/jobs/server/utils.py +32 -11
  108. sky/jobs/state.py +588 -110
  109. sky/jobs/utils.py +442 -209
  110. sky/logs/agent.py +1 -1
  111. sky/metrics/utils.py +45 -6
  112. sky/optimizer.py +1 -1
  113. sky/provision/__init__.py +7 -0
  114. sky/provision/aws/instance.py +2 -1
  115. sky/provision/azure/instance.py +2 -1
  116. sky/provision/common.py +2 -0
  117. sky/provision/cudo/instance.py +2 -1
  118. sky/provision/do/instance.py +2 -1
  119. sky/provision/fluidstack/instance.py +4 -3
  120. sky/provision/gcp/instance.py +2 -1
  121. sky/provision/hyperbolic/instance.py +2 -1
  122. sky/provision/instance_setup.py +10 -2
  123. sky/provision/kubernetes/constants.py +0 -1
  124. sky/provision/kubernetes/instance.py +222 -89
  125. sky/provision/kubernetes/network.py +12 -8
  126. sky/provision/kubernetes/utils.py +114 -53
  127. sky/provision/kubernetes/volume.py +5 -4
  128. sky/provision/lambda_cloud/instance.py +2 -1
  129. sky/provision/nebius/instance.py +2 -1
  130. sky/provision/oci/instance.py +2 -1
  131. sky/provision/paperspace/instance.py +2 -1
  132. sky/provision/provisioner.py +11 -2
  133. sky/provision/runpod/instance.py +2 -1
  134. sky/provision/scp/instance.py +2 -1
  135. sky/provision/seeweb/instance.py +3 -3
  136. sky/provision/shadeform/__init__.py +11 -0
  137. sky/provision/shadeform/config.py +12 -0
  138. sky/provision/shadeform/instance.py +351 -0
  139. sky/provision/shadeform/shadeform_utils.py +83 -0
  140. sky/provision/vast/instance.py +2 -1
  141. sky/provision/vsphere/instance.py +2 -1
  142. sky/resources.py +1 -1
  143. sky/schemas/api/responses.py +9 -5
  144. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  145. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  146. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  147. sky/schemas/generated/jobsv1_pb2.py +52 -52
  148. sky/schemas/generated/jobsv1_pb2.pyi +4 -2
  149. sky/schemas/generated/managed_jobsv1_pb2.py +39 -35
  150. sky/schemas/generated/managed_jobsv1_pb2.pyi +21 -5
  151. sky/serve/client/impl.py +11 -3
  152. sky/serve/replica_managers.py +5 -2
  153. sky/serve/serve_utils.py +9 -2
  154. sky/serve/server/impl.py +7 -2
  155. sky/serve/server/server.py +18 -15
  156. sky/serve/service.py +2 -2
  157. sky/server/auth/oauth2_proxy.py +2 -5
  158. sky/server/common.py +31 -28
  159. sky/server/constants.py +5 -1
  160. sky/server/daemons.py +27 -19
  161. sky/server/requests/executor.py +138 -74
  162. sky/server/requests/payloads.py +9 -1
  163. sky/server/requests/preconditions.py +13 -10
  164. sky/server/requests/request_names.py +120 -0
  165. sky/server/requests/requests.py +485 -153
  166. sky/server/requests/serializers/decoders.py +26 -13
  167. sky/server/requests/serializers/encoders.py +56 -11
  168. sky/server/requests/threads.py +106 -0
  169. sky/server/rest.py +70 -18
  170. sky/server/server.py +283 -104
  171. sky/server/stream_utils.py +233 -59
  172. sky/server/uvicorn.py +18 -17
  173. sky/setup_files/alembic.ini +4 -0
  174. sky/setup_files/dependencies.py +32 -13
  175. sky/sky_logging.py +0 -2
  176. sky/skylet/constants.py +30 -7
  177. sky/skylet/events.py +7 -0
  178. sky/skylet/log_lib.py +8 -2
  179. sky/skylet/log_lib.pyi +1 -1
  180. sky/skylet/services.py +26 -13
  181. sky/skylet/subprocess_daemon.py +103 -29
  182. sky/skypilot_config.py +87 -75
  183. sky/ssh_node_pools/server.py +9 -8
  184. sky/task.py +67 -54
  185. sky/templates/kubernetes-ray.yml.j2 +8 -1
  186. sky/templates/nebius-ray.yml.j2 +1 -0
  187. sky/templates/shadeform-ray.yml.j2 +72 -0
  188. sky/templates/websocket_proxy.py +142 -12
  189. sky/users/permission.py +8 -1
  190. sky/utils/admin_policy_utils.py +16 -3
  191. sky/utils/asyncio_utils.py +78 -0
  192. sky/utils/auth_utils.py +153 -0
  193. sky/utils/cli_utils/status_utils.py +8 -2
  194. sky/utils/command_runner.py +11 -0
  195. sky/utils/common.py +3 -1
  196. sky/utils/common_utils.py +7 -4
  197. sky/utils/context.py +57 -51
  198. sky/utils/context_utils.py +30 -12
  199. sky/utils/controller_utils.py +35 -8
  200. sky/utils/db/db_utils.py +37 -10
  201. sky/utils/db/migration_utils.py +8 -4
  202. sky/utils/locks.py +24 -6
  203. sky/utils/resource_checker.py +4 -1
  204. sky/utils/resources_utils.py +53 -29
  205. sky/utils/schemas.py +23 -4
  206. sky/utils/subprocess_utils.py +17 -4
  207. sky/volumes/server/server.py +7 -6
  208. sky/workspaces/server.py +13 -12
  209. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/METADATA +306 -55
  210. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/RECORD +215 -195
  211. sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +0 -1
  212. sky/dashboard/out/_next/static/chunks/1141-3b40c39626f99c89.js +0 -11
  213. sky/dashboard/out/_next/static/chunks/2755.97300e1362fe7c98.js +0 -26
  214. sky/dashboard/out/_next/static/chunks/3015-8d748834fcc60b46.js +0 -1
  215. sky/dashboard/out/_next/static/chunks/3294.1fafbf42b3bcebff.js +0 -1
  216. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  217. sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +0 -1
  218. sky/dashboard/out/_next/static/chunks/6990-f6818c84ed8f1c86.js +0 -1
  219. sky/dashboard/out/_next/static/chunks/8969-66237729cdf9749e.js +0 -1
  220. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  221. sky/dashboard/out/_next/static/chunks/9360.71e83b2ddc844ec2.js +0 -31
  222. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8f058b0346db2aff.js +0 -16
  223. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4f7079dcab6ed653.js +0 -16
  224. sky/dashboard/out/_next/static/chunks/webpack-6a5ddd0184bfa22c.js +0 -1
  225. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  226. sky/dashboard/out/_next/static/hIViZcQBkn0HE8SpaSsUU/_buildManifest.js +0 -1
  227. /sky/dashboard/out/_next/static/{hIViZcQBkn0HE8SpaSsUU → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  228. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +0 -0
  229. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  230. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  231. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/clouds/fluidstack.py CHANGED
@@ -73,7 +73,9 @@ class Fluidstack(clouds.Cloud):
73
73
 
74
74
  @classmethod
75
75
  def _unsupported_features_for_resources(
76
- cls, resources: 'resources_lib.Resources'
76
+ cls,
77
+ resources: 'resources_lib.Resources',
78
+ region: Optional[str] = None,
77
79
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
78
80
  """The features not supported based on the resources provided.
79
81
 
@@ -92,10 +94,15 @@ class Fluidstack(clouds.Cloud):
92
94
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
93
95
 
94
96
  @classmethod
95
- def regions_with_offering(cls, instance_type: str,
96
- accelerators: Optional[Dict[str, int]],
97
- use_spot: bool, region: Optional[str],
98
- zone: Optional[str]) -> List[clouds.Region]:
97
+ def regions_with_offering(
98
+ cls,
99
+ instance_type: str,
100
+ accelerators: Optional[Dict[str, int]],
101
+ use_spot: bool,
102
+ region: Optional[str],
103
+ zone: Optional[str],
104
+ resources: Optional['resources_lib.Resources'] = None,
105
+ ) -> List[clouds.Region]:
99
106
  assert zone is None, 'FluidStack does not support zones.'
100
107
  del accelerators, zone # unused
101
108
  if use_spot:
sky/clouds/gcp.py CHANGED
@@ -211,7 +211,9 @@ class GCP(clouds.Cloud):
211
211
 
212
212
  @classmethod
213
213
  def _unsupported_features_for_resources(
214
- cls, resources: 'resources.Resources'
214
+ cls,
215
+ resources: 'resources.Resources',
216
+ region: Optional[str] = None,
215
217
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
216
218
  unsupported = {}
217
219
  if gcp_utils.is_tpu_vm_pod(resources):
@@ -255,10 +257,15 @@ class GCP(clouds.Cloud):
255
257
 
256
258
  #### Regions/Zones ####
257
259
  @classmethod
258
- def regions_with_offering(cls, instance_type: str,
259
- accelerators: Optional[Dict[str, int]],
260
- use_spot: bool, region: Optional[str],
261
- zone: Optional[str]) -> List[clouds.Region]:
260
+ def regions_with_offering(
261
+ cls,
262
+ instance_type: str,
263
+ accelerators: Optional[Dict[str, int]],
264
+ use_spot: bool,
265
+ region: Optional[str],
266
+ zone: Optional[str],
267
+ resources: Optional['resources.Resources'] = None,
268
+ ) -> List[clouds.Region]:
262
269
  if accelerators is None:
263
270
  regions = catalog.get_region_zones_for_instance_type(instance_type,
264
271
  use_spot,
sky/clouds/hyperbolic.py CHANGED
@@ -65,7 +65,9 @@ class Hyperbolic(clouds.Cloud):
65
65
 
66
66
  @classmethod
67
67
  def _unsupported_features_for_resources(
68
- cls, resources: 'resources_lib.Resources'
68
+ cls,
69
+ resources: 'resources_lib.Resources',
70
+ region: Optional[str] = None,
69
71
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
70
72
  del resources
71
73
  return cls._CLOUD_UNSUPPORTED_FEATURES
@@ -78,10 +80,15 @@ class Hyperbolic(clouds.Cloud):
78
80
  return catalog.instance_type_exists(instance_type, 'hyperbolic')
79
81
 
80
82
  @classmethod
81
- def regions_with_offering(cls, instance_type: str,
82
- accelerators: Optional[Dict[str, int]],
83
- use_spot: bool, region: Optional[str],
84
- zone: Optional[str]) -> List[clouds.Region]:
83
+ def regions_with_offering(
84
+ cls,
85
+ instance_type: str,
86
+ accelerators: Optional[Dict[str, int]],
87
+ use_spot: bool,
88
+ region: Optional[str],
89
+ zone: Optional[str],
90
+ resources: Optional['resources_lib.Resources'] = None,
91
+ ) -> List[clouds.Region]:
85
92
  assert zone is None, 'Hyperbolic does not support zones.'
86
93
  del accelerators, zone # unused
87
94
 
sky/clouds/ibm.py CHANGED
@@ -37,7 +37,9 @@ class IBM(clouds.Cloud):
37
37
 
38
38
  @classmethod
39
39
  def _unsupported_features_for_resources(
40
- cls, resources: 'resources_lib.Resources'
40
+ cls,
41
+ resources: 'resources_lib.Resources',
42
+ region: Optional[str] = None,
41
43
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
42
44
  features = {
43
45
  clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
@@ -68,10 +70,15 @@ class IBM(clouds.Cloud):
68
70
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
69
71
 
70
72
  @classmethod
71
- def regions_with_offering(cls, instance_type: str,
72
- accelerators: Optional[Dict[str, int]],
73
- use_spot: bool, region: Optional[str],
74
- zone: Optional[str]) -> List[clouds.Region]:
73
+ def regions_with_offering(
74
+ cls,
75
+ instance_type: str,
76
+ accelerators: Optional[Dict[str, int]],
77
+ use_spot: bool,
78
+ region: Optional[str],
79
+ zone: Optional[str],
80
+ resources: Optional['resources_lib.Resources'] = None,
81
+ ) -> List[clouds.Region]:
75
82
  del accelerators # unused
76
83
  if use_spot:
77
84
  return []
sky/clouds/kubernetes.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Kubernetes."""
2
+ import concurrent.futures
2
3
  import os
3
4
  import re
4
5
  import subprocess
@@ -98,34 +99,50 @@ class Kubernetes(clouds.Cloud):
98
99
 
99
100
  @classmethod
100
101
  def _unsupported_features_for_resources(
101
- cls, resources: 'resources_lib.Resources'
102
+ cls,
103
+ resources: 'resources_lib.Resources',
104
+ region: Optional[str] = None,
102
105
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
103
106
  # TODO(aylei): features need to be regional (per context) to make
104
107
  # multi-kubernetes selection/failover work.
105
108
  unsupported_features = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
106
- context = resources.region
109
+ context = region if region is not None else resources.region
107
110
  if context is None:
108
- context = kubernetes_utils.get_current_kube_config_context_name()
111
+ contexts = cls.existing_allowed_contexts()
112
+ else:
113
+ contexts = [context]
109
114
  unsupported_features[clouds.CloudImplementationFeatures.STOP] = (
110
115
  'Stopping clusters is not supported on Kubernetes.')
111
116
  unsupported_features[clouds.CloudImplementationFeatures.AUTOSTOP] = (
112
117
  'Auto-stop is not supported on Kubernetes.')
113
- # Allow spot instances if supported by the cluster
114
- try:
115
- spot_label_key, _ = kubernetes_utils.get_spot_label(context)
116
- if spot_label_key is not None:
117
- unsupported_features.pop(
118
- clouds.CloudImplementationFeatures.SPOT_INSTANCE, None)
119
- # Allow custom network tier if supported by the cluster
120
- # (e.g., Nebius clusters with high performance networking)
121
- network_type, _ = cls._detect_network_type(context,
122
- resources.network_tier)
123
- if network_type.supports_high_performance_networking():
124
- unsupported_features.pop(
125
- clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER,
126
- None)
127
- except exceptions.KubeAPIUnreachableError as e:
128
- cls._log_unreachable_context(context, str(e))
118
+ for context in contexts:
119
+ # Allow spot instances if supported by the cluster
120
+ try:
121
+ # Run spot label check and network type detection concurrently
122
+ # as they are independent operations
123
+ with concurrent.futures.ThreadPoolExecutor(
124
+ max_workers=2) as executor:
125
+ spot_future = executor.submit(
126
+ kubernetes_utils.get_spot_label, context)
127
+ network_future = executor.submit(cls._detect_network_type,
128
+ context,
129
+ resources.network_tier)
130
+
131
+ spot_label_key, _ = spot_future.result()
132
+ if spot_label_key is not None:
133
+ unsupported_features.pop(
134
+ clouds.CloudImplementationFeatures.SPOT_INSTANCE,
135
+ None)
136
+
137
+ # Allow custom network tier if supported by the cluster
138
+ # (e.g., Nebius clusters with high performance networking)
139
+ network_type, _ = network_future.result()
140
+ if network_type.supports_high_performance_networking():
141
+ unsupported_features.pop(
142
+ clouds.CloudImplementationFeatures.
143
+ CUSTOM_NETWORK_TIER, None)
144
+ except exceptions.KubeAPIUnreachableError as e:
145
+ cls._log_unreachable_context(context, str(e))
129
146
  return unsupported_features
130
147
 
131
148
  @classmethod
@@ -241,10 +258,15 @@ class Kubernetes(clouds.Cloud):
241
258
  'refresh Kubernetes availability if permanent.')
242
259
 
243
260
  @classmethod
244
- def regions_with_offering(cls, instance_type: Optional[str],
245
- accelerators: Optional[Dict[str, int]],
246
- use_spot: bool, region: Optional[str],
247
- zone: Optional[str]) -> List[clouds.Region]:
261
+ def regions_with_offering(
262
+ cls,
263
+ instance_type: Optional[str],
264
+ accelerators: Optional[Dict[str, int]],
265
+ use_spot: bool,
266
+ region: Optional[str],
267
+ zone: Optional[str],
268
+ resources: Optional['resources_lib.Resources'] = None,
269
+ ) -> List[clouds.Region]:
248
270
  del accelerators, zone, use_spot # unused
249
271
  existing_contexts = cls.existing_allowed_contexts()
250
272
 
@@ -254,6 +276,19 @@ class Kubernetes(clouds.Cloud):
254
276
 
255
277
  if region is not None:
256
278
  regions = [r for r in regions if r.name == region]
279
+ if resources is not None:
280
+ filtered_regions = []
281
+ resources_required_features = resources.get_required_cloud_features(
282
+ )
283
+ for r in regions:
284
+ try:
285
+ cls.check_features_are_supported(
286
+ resources, resources_required_features, r.name)
287
+ filtered_regions.append(r)
288
+ except exceptions.NotSupportedError as e:
289
+ logger.info(f'Filter out context: {r.name}, reason: {e}')
290
+ continue
291
+ regions = filtered_regions
257
292
 
258
293
  # Check if requested instance type will fit in the cluster.
259
294
  # TODO(zhwu,romilb): autoscaler type needs to be regional (per
@@ -785,7 +820,8 @@ class Kubernetes(clouds.Cloud):
785
820
  accelerators=resources.accelerators,
786
821
  use_spot=resources.use_spot,
787
822
  region=resources.region,
788
- zone=resources.zone)
823
+ zone=resources.zone,
824
+ resources=resources)
789
825
  if not regions:
790
826
  return resources_utils.FeasibleResources([], [], None)
791
827
  resources = resources.copy(accelerators=None)
@@ -845,7 +881,8 @@ class Kubernetes(clouds.Cloud):
845
881
  accelerators=None,
846
882
  use_spot=resources.use_spot,
847
883
  region=resources.region,
848
- zone=resources.zone)
884
+ zone=resources.zone,
885
+ resources=resources)
849
886
  if not available_regions:
850
887
  return resources_utils.FeasibleResources([], [], None)
851
888
  # No fuzzy lists for Kubernetes
@@ -59,7 +59,9 @@ class Lambda(clouds.Cloud):
59
59
 
60
60
  @classmethod
61
61
  def _unsupported_features_for_resources(
62
- cls, resources: 'resources_lib.Resources'
62
+ cls,
63
+ resources: 'resources_lib.Resources',
64
+ region: Optional[str] = None,
63
65
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
64
66
  del resources # unused
65
67
  return cls._CLOUD_UNSUPPORTED_FEATURES
@@ -69,10 +71,15 @@ class Lambda(clouds.Cloud):
69
71
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
70
72
 
71
73
  @classmethod
72
- def regions_with_offering(cls, instance_type: str,
73
- accelerators: Optional[Dict[str, int]],
74
- use_spot: bool, region: Optional[str],
75
- zone: Optional[str]) -> List[clouds.Region]:
74
+ def regions_with_offering(
75
+ cls,
76
+ instance_type: str,
77
+ accelerators: Optional[Dict[str, int]],
78
+ use_spot: bool,
79
+ region: Optional[str],
80
+ zone: Optional[str],
81
+ resources: Optional['resources_lib.Resources'] = None,
82
+ ) -> List[clouds.Region]:
76
83
  assert zone is None, 'Lambda does not support zones.'
77
84
  del accelerators, zone # unused
78
85
  if use_spot:
sky/clouds/nebius.py CHANGED
@@ -78,7 +78,9 @@ class Nebius(clouds.Cloud):
78
78
 
79
79
  @classmethod
80
80
  def _unsupported_features_for_resources(
81
- cls, resources: 'resources_lib.Resources'
81
+ cls,
82
+ resources: 'resources_lib.Resources',
83
+ region: Optional[str] = None,
82
84
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
83
85
  unsupported = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
84
86
 
@@ -101,10 +103,15 @@ class Nebius(clouds.Cloud):
101
103
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
102
104
 
103
105
  @classmethod
104
- def regions_with_offering(cls, instance_type: str,
105
- accelerators: Optional[Dict[str, int]],
106
- use_spot: bool, region: Optional[str],
107
- zone: Optional[str]) -> List[clouds.Region]:
106
+ def regions_with_offering(
107
+ cls,
108
+ instance_type: str,
109
+ accelerators: Optional[Dict[str, int]],
110
+ use_spot: bool,
111
+ region: Optional[str],
112
+ zone: Optional[str],
113
+ resources: Optional['resources_lib.Resources'] = None,
114
+ ) -> List[clouds.Region]:
108
115
  assert zone is None, 'Nebius does not support zones.'
109
116
  del accelerators, zone # unused
110
117
  regions = catalog.get_region_zones_for_instance_type(
sky/clouds/oci.py CHANGED
@@ -69,7 +69,9 @@ class OCI(clouds.Cloud):
69
69
 
70
70
  @classmethod
71
71
  def _unsupported_features_for_resources(
72
- cls, resources: 'resources_lib.Resources'
72
+ cls,
73
+ resources: 'resources_lib.Resources',
74
+ region: Optional[str] = None,
73
75
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
74
76
  unsupported_features = {
75
77
  clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
@@ -96,10 +98,15 @@ class OCI(clouds.Cloud):
96
98
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
97
99
 
98
100
  @classmethod
99
- def regions_with_offering(cls, instance_type: str,
100
- accelerators: Optional[Dict[str, int]],
101
- use_spot: bool, region: Optional[str],
102
- zone: Optional[str]) -> List[clouds.Region]:
101
+ def regions_with_offering(
102
+ cls,
103
+ instance_type: str,
104
+ accelerators: Optional[Dict[str, int]],
105
+ use_spot: bool,
106
+ region: Optional[str],
107
+ zone: Optional[str],
108
+ resources: Optional['resources_lib.Resources'] = None,
109
+ ) -> List[clouds.Region]:
103
110
  del accelerators # unused
104
111
 
105
112
  regions = catalog.get_region_zones_for_instance_type(
sky/clouds/paperspace.py CHANGED
@@ -60,7 +60,9 @@ class Paperspace(clouds.Cloud):
60
60
 
61
61
  @classmethod
62
62
  def _unsupported_features_for_resources(
63
- cls, resources: 'resources_lib.Resources'
63
+ cls,
64
+ resources: 'resources_lib.Resources',
65
+ region: Optional[str] = None,
64
66
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
65
67
  """The features not supported based on the resources provided.
66
68
 
@@ -86,6 +88,7 @@ class Paperspace(clouds.Cloud):
86
88
  use_spot: bool,
87
89
  region: Optional[str],
88
90
  zone: Optional[str],
91
+ resources: Optional['resources_lib.Resources'] = None,
89
92
  ) -> List[clouds.Region]:
90
93
  assert zone is None, 'Paperspace does not support zones.'
91
94
  del accelerators, zone # unused
@@ -65,6 +65,7 @@ class PrimeIntellect(clouds.Cloud):
65
65
  use_spot: bool,
66
66
  region: Optional[str],
67
67
  zone: Optional[str],
68
+ resources: Optional['resources_lib.Resources'] = None,
68
69
  ) -> List[clouds.Region]:
69
70
  """Returns the regions that offer the specified resources."""
70
71
  del accelerators
@@ -299,7 +300,9 @@ class PrimeIntellect(clouds.Cloud):
299
300
 
300
301
  @classmethod
301
302
  def _unsupported_features_for_resources(
302
- cls, resources: 'resources_lib.Resources'
303
+ cls,
304
+ resources: 'resources_lib.Resources',
305
+ region: Optional[str] = None,
303
306
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
304
307
  """The features not supported based on the resources provided.
305
308
 
sky/clouds/runpod.py CHANGED
@@ -53,7 +53,9 @@ class RunPod(clouds.Cloud):
53
53
 
54
54
  @classmethod
55
55
  def _unsupported_features_for_resources(
56
- cls, resources: 'resources_lib.Resources'
56
+ cls,
57
+ resources: 'resources_lib.Resources',
58
+ region: Optional[str] = None,
57
59
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
58
60
  """The features not supported based on the resources provided.
59
61
 
@@ -72,10 +74,15 @@ class RunPod(clouds.Cloud):
72
74
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
73
75
 
74
76
  @classmethod
75
- def regions_with_offering(cls, instance_type: str,
76
- accelerators: Optional[Dict[str, int]],
77
- use_spot: bool, region: Optional[str],
78
- zone: Optional[str]) -> List[clouds.Region]:
77
+ def regions_with_offering(
78
+ cls,
79
+ instance_type: str,
80
+ accelerators: Optional[Dict[str, int]],
81
+ use_spot: bool,
82
+ region: Optional[str],
83
+ zone: Optional[str],
84
+ resources: Optional['resources_lib.Resources'] = None,
85
+ ) -> List[clouds.Region]:
79
86
  del accelerators # unused
80
87
  regions = catalog.get_region_zones_for_instance_type(
81
88
  instance_type, use_spot, 'runpod')
sky/clouds/scp.py CHANGED
@@ -74,7 +74,9 @@ class SCP(clouds.Cloud):
74
74
 
75
75
  @classmethod
76
76
  def _unsupported_features_for_resources(
77
- cls, resources: 'resources_lib.Resources'
77
+ cls,
78
+ resources: 'resources_lib.Resources',
79
+ region: Optional[str] = None,
78
80
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
79
81
  features = cls._CLOUD_UNSUPPORTED_FEATURES
80
82
  if resources.use_spot:
@@ -92,10 +94,15 @@ class SCP(clouds.Cloud):
92
94
  return catalog.regions(clouds='scp')
93
95
 
94
96
  @classmethod
95
- def regions_with_offering(cls, instance_type: Optional[str],
96
- accelerators: Optional[Dict[str, int]],
97
- use_spot: bool, region: Optional[str],
98
- zone: Optional[str]) -> List[clouds.Region]:
97
+ def regions_with_offering(
98
+ cls,
99
+ instance_type: Optional[str],
100
+ accelerators: Optional[Dict[str, int]],
101
+ use_spot: bool,
102
+ region: Optional[str],
103
+ zone: Optional[str],
104
+ resources: Optional['resources_lib.Resources'] = None,
105
+ ) -> List[clouds.Region]:
99
106
 
100
107
  del accelerators, zone # unused
101
108
  if use_spot:
sky/clouds/seeweb.py CHANGED
@@ -84,7 +84,9 @@ class Seeweb(clouds.Cloud):
84
84
 
85
85
  @classmethod
86
86
  def _unsupported_features_for_resources(
87
- cls, resources: 'resources_lib.Resources'
87
+ cls,
88
+ resources: 'resources_lib.Resources',
89
+ region: Optional[str] = None,
88
90
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
89
91
  return cls._CLOUD_UNSUPPORTED_FEATURES
90
92
 
@@ -108,6 +110,7 @@ class Seeweb(clouds.Cloud):
108
110
  use_spot: bool,
109
111
  region: Optional[str],
110
112
  zone: Optional[str],
113
+ resources: Optional['resources_lib.Resources'] = None,
111
114
  ) -> List[clouds.Region]:
112
115
  assert zone is None, 'Seeweb does not support zones.'
113
116
  del zone