skypilot-nightly 1.0.0.dev20250624__py3-none-any.whl → 1.0.0.dev20250626__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +1 -6
  3. sky/backends/backend_utils.py +26 -11
  4. sky/backends/cloud_vm_ray_backend.py +16 -5
  5. sky/client/cli/command.py +232 -9
  6. sky/client/sdk.py +195 -91
  7. sky/clouds/aws.py +10 -7
  8. sky/clouds/azure.py +10 -7
  9. sky/clouds/cloud.py +2 -0
  10. sky/clouds/cudo.py +2 -0
  11. sky/clouds/do.py +10 -7
  12. sky/clouds/fluidstack.py +2 -0
  13. sky/clouds/gcp.py +10 -7
  14. sky/clouds/hyperbolic.py +10 -7
  15. sky/clouds/ibm.py +2 -0
  16. sky/clouds/kubernetes.py +26 -9
  17. sky/clouds/lambda_cloud.py +10 -7
  18. sky/clouds/nebius.py +10 -7
  19. sky/clouds/oci.py +10 -7
  20. sky/clouds/paperspace.py +10 -7
  21. sky/clouds/runpod.py +10 -7
  22. sky/clouds/scp.py +10 -7
  23. sky/clouds/ssh.py +36 -0
  24. sky/clouds/vast.py +10 -7
  25. sky/clouds/vsphere.py +2 -0
  26. sky/core.py +21 -0
  27. sky/dag.py +14 -0
  28. sky/dashboard/out/404.html +1 -1
  29. sky/dashboard/out/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_buildManifest.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/141-fa5a20cbf401b351.js +11 -0
  31. sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/25.76c246239df93d50.js +6 -0
  33. sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +1 -0
  34. sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +16 -0
  39. sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
  40. sky/dashboard/out/_next/static/chunks/785.dc2686c3c1235554.js +1 -0
  41. sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +6 -0
  42. sky/dashboard/out/_next/static/chunks/875.52c962183328b3f2.js +25 -0
  43. sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/984.e8bac186a24e5178.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/990-0ad5ea1699e03ee8.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/pages/{_app-ce31493da9747ef4.js → _app-9a3ce3170d2edcec.js} +1 -1
  48. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
  49. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +6 -0
  50. sky/dashboard/out/_next/static/chunks/pages/{clusters-7e9736af1c6345a6.js → clusters-f119a5630a1efd61.js} +1 -1
  51. sky/dashboard/out/_next/static/chunks/pages/config-6b255eae088da6a3.js +1 -0
  52. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-b302aea4d65766bf.js +1 -0
  53. sky/dashboard/out/_next/static/chunks/pages/infra-ee8cc4d449945d19.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
  55. sky/dashboard/out/_next/static/chunks/pages/jobs-0a5695ff3075d94a.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/pages/users-4978cbb093e141e7.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
  58. sky/dashboard/out/_next/static/chunks/pages/workspace/{new-31aa8bdcb7592635.js → new-5b59bce9eb208d84.js} +1 -1
  59. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js +1 -0
  60. sky/dashboard/out/_next/static/chunks/pages/workspaces-50e230828730cfb3.js +1 -0
  61. sky/dashboard/out/_next/static/chunks/webpack-08fdb9e6070127fc.js +1 -0
  62. sky/dashboard/out/_next/static/css/52082cf558ec9705.css +3 -0
  63. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  64. sky/dashboard/out/clusters/[cluster].html +1 -1
  65. sky/dashboard/out/clusters.html +1 -1
  66. sky/dashboard/out/config.html +1 -1
  67. sky/dashboard/out/index.html +1 -1
  68. sky/dashboard/out/infra/[context].html +1 -1
  69. sky/dashboard/out/infra.html +1 -1
  70. sky/dashboard/out/jobs/[job].html +1 -1
  71. sky/dashboard/out/jobs.html +1 -1
  72. sky/dashboard/out/users.html +1 -1
  73. sky/dashboard/out/volumes.html +1 -0
  74. sky/dashboard/out/workspace/new.html +1 -1
  75. sky/dashboard/out/workspaces/[name].html +1 -1
  76. sky/dashboard/out/workspaces.html +1 -1
  77. sky/data/storage_utils.py +2 -4
  78. sky/exceptions.py +15 -0
  79. sky/execution.py +5 -0
  80. sky/global_user_state.py +129 -0
  81. sky/jobs/client/sdk.py +13 -11
  82. sky/jobs/server/core.py +4 -0
  83. sky/models.py +16 -0
  84. sky/provision/__init__.py +26 -0
  85. sky/provision/kubernetes/__init__.py +3 -0
  86. sky/provision/kubernetes/instance.py +38 -77
  87. sky/provision/kubernetes/utils.py +70 -4
  88. sky/provision/kubernetes/volume.py +147 -0
  89. sky/resources.py +20 -76
  90. sky/serve/client/sdk.py +13 -13
  91. sky/serve/server/core.py +5 -1
  92. sky/server/common.py +40 -5
  93. sky/server/constants.py +5 -1
  94. sky/server/metrics.py +105 -0
  95. sky/server/requests/executor.py +30 -14
  96. sky/server/requests/payloads.py +16 -0
  97. sky/server/requests/requests.py +35 -1
  98. sky/server/rest.py +153 -0
  99. sky/server/server.py +70 -43
  100. sky/server/state.py +20 -0
  101. sky/server/stream_utils.py +8 -3
  102. sky/server/uvicorn.py +153 -13
  103. sky/setup_files/dependencies.py +2 -0
  104. sky/skylet/constants.py +19 -3
  105. sky/skypilot_config.py +3 -0
  106. sky/ssh_node_pools/__init__.py +1 -0
  107. sky/ssh_node_pools/core.py +133 -0
  108. sky/ssh_node_pools/server.py +232 -0
  109. sky/task.py +141 -18
  110. sky/templates/kubernetes-ray.yml.j2 +30 -1
  111. sky/users/permission.py +2 -0
  112. sky/utils/context.py +3 -1
  113. sky/utils/kubernetes/deploy_remote_cluster.py +12 -185
  114. sky/utils/kubernetes/ssh_utils.py +221 -0
  115. sky/utils/resources_utils.py +66 -0
  116. sky/utils/rich_utils.py +6 -0
  117. sky/utils/schemas.py +146 -3
  118. sky/utils/status_lib.py +10 -0
  119. sky/utils/validator.py +11 -1
  120. sky/volumes/__init__.py +0 -0
  121. sky/volumes/client/__init__.py +0 -0
  122. sky/volumes/client/sdk.py +64 -0
  123. sky/volumes/server/__init__.py +0 -0
  124. sky/volumes/server/core.py +199 -0
  125. sky/volumes/server/server.py +85 -0
  126. sky/volumes/utils.py +158 -0
  127. sky/volumes/volume.py +198 -0
  128. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/METADATA +2 -1
  129. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/RECORD +135 -115
  130. sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +0 -1
  131. sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
  132. sky/dashboard/out/_next/static/chunks/37-4650f214e2119168.js +0 -6
  133. sky/dashboard/out/_next/static/chunks/42.2273cc2415291ceb.js +0 -6
  134. sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +0 -1
  135. sky/dashboard/out/_next/static/chunks/470-1494c899266cf5c9.js +0 -1
  136. sky/dashboard/out/_next/static/chunks/513.309df9e18a9ff005.js +0 -1
  137. sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
  138. sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
  139. sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
  140. sky/dashboard/out/_next/static/chunks/843-bde186946d353355.js +0 -11
  141. sky/dashboard/out/_next/static/chunks/856-bfddc18e16f3873c.js +0 -1
  142. sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
  143. sky/dashboard/out/_next/static/chunks/973-56412c7976b4655b.js +0 -1
  144. sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
  145. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-4e065c812a52460b.js +0 -6
  146. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-520ec1ab65e2f2a4.js +0 -6
  147. sky/dashboard/out/_next/static/chunks/pages/config-e4f473661889e7cd.js +0 -1
  148. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-00fd23b9577492ca.js +0 -1
  149. sky/dashboard/out/_next/static/chunks/pages/infra-8a4bf7370d4d9bb7.js +0 -1
  150. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-171c27f4ca94861c.js +0 -16
  151. sky/dashboard/out/_next/static/chunks/pages/jobs-55e5bcb16d563231.js +0 -1
  152. sky/dashboard/out/_next/static/chunks/pages/users-c9f4d785cdaa52d8.js +0 -1
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-ecc5a7003776cfa7.js +0 -1
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-f00cba35691483b1.js +0 -1
  155. sky/dashboard/out/_next/static/chunks/webpack-c85998e6a5722f21.js +0 -1
  156. sky/dashboard/out/_next/static/css/6ab927686b492a4a.css +0 -3
  157. sky/dashboard/out/_next/static/zsALxITkbP8J8NVwSDwMo/_buildManifest.js +0 -1
  158. /sky/dashboard/out/_next/static/{zsALxITkbP8J8NVwSDwMo → bs6UB9V4Jq10TIZ5x-kBK}/_ssgManifest.js +0 -0
  159. /sky/dashboard/out/_next/static/chunks/{938-ce7991c156584b06.js → 938-068520cc11738deb.js} +0 -0
  160. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/WHEEL +0 -0
  161. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/entry_points.txt +0 -0
  162. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/licenses/LICENSE +0 -0
  163. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/top_level.txt +0 -0
sky/clouds/cloud.py CHANGED
@@ -27,6 +27,7 @@ from sky.utils import ux_utils
27
27
  if typing.TYPE_CHECKING:
28
28
  from sky import resources as resources_lib
29
29
  from sky.utils import status_lib
30
+ from sky.volumes import volume as volume_lib
30
31
 
31
32
 
32
33
  class CloudImplementationFeatures(enum.Enum):
@@ -307,6 +308,7 @@ class Cloud:
307
308
  zones: Optional[List['Zone']],
308
309
  num_nodes: int,
309
310
  dryrun: bool = False,
311
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
310
312
  ) -> Dict[str, Any]:
311
313
  """Converts planned sky.Resources to cloud-specific resource variables.
312
314
 
sky/clouds/cudo.py CHANGED
@@ -12,6 +12,7 @@ from sky.utils import resources_utils
12
12
  if typing.TYPE_CHECKING:
13
13
  # Renaming to avoid shadowing variables.
14
14
  from sky import resources as resources_lib
15
+ from sky.volumes import volume as volume_lib
15
16
 
16
17
  _CREDENTIAL_FILES = [
17
18
  # credential files for Cudo,
@@ -201,6 +202,7 @@ class Cudo(clouds.Cloud):
201
202
  zones: Optional[List['clouds.Zone']],
202
203
  num_nodes: int,
203
204
  dryrun: bool = False,
205
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
204
206
  ) -> Dict[str, Optional[str]]:
205
207
  del zones, cluster_name # unused
206
208
  resources = resources.assert_launchable()
sky/clouds/do.py CHANGED
@@ -14,6 +14,7 @@ from sky.utils import resources_utils
14
14
 
15
15
  if typing.TYPE_CHECKING:
16
16
  from sky import resources as resources_lib
17
+ from sky.volumes import volume as volume_lib
17
18
 
18
19
  _CREDENTIAL_FILE = 'config.yaml'
19
20
 
@@ -175,13 +176,15 @@ class DO(clouds.Cloud):
175
176
  return None
176
177
 
177
178
  def make_deploy_resources_variables(
178
- self,
179
- resources: 'resources_lib.Resources',
180
- cluster_name: resources_utils.ClusterName,
181
- region: 'clouds.Region',
182
- zones: Optional[List['clouds.Zone']],
183
- num_nodes: int,
184
- dryrun: bool = False) -> Dict[str, Optional[str]]:
179
+ self,
180
+ resources: 'resources_lib.Resources',
181
+ cluster_name: resources_utils.ClusterName,
182
+ region: 'clouds.Region',
183
+ zones: Optional[List['clouds.Zone']],
184
+ num_nodes: int,
185
+ dryrun: bool = False,
186
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
187
+ ) -> Dict[str, Optional[str]]:
185
188
  del zones, dryrun, cluster_name
186
189
 
187
190
  resources = resources.assert_launchable()
sky/clouds/fluidstack.py CHANGED
@@ -21,6 +21,7 @@ if typing.TYPE_CHECKING:
21
21
 
22
22
  # Renaming to avoid shadowing variables.
23
23
  from sky import resources as resources_lib
24
+ from sky.volumes import volume as volume_lib
24
25
  else:
25
26
  requests = adaptors_common.LazyImport('requests')
26
27
 
@@ -188,6 +189,7 @@ class Fluidstack(clouds.Cloud):
188
189
  zones: Optional[List[clouds.Zone]],
189
190
  num_nodes: int,
190
191
  dryrun: bool = False,
192
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
191
193
  ) -> Dict[str, Optional[str]]:
192
194
 
193
195
  assert zones is None, 'FluidStack does not support zones.'
sky/clouds/gcp.py CHANGED
@@ -29,6 +29,7 @@ from sky.utils import ux_utils
29
29
  if typing.TYPE_CHECKING:
30
30
  from sky import resources
31
31
  from sky.utils import status_lib
32
+ from sky.volumes import volume as volume_lib
32
33
 
33
34
  logger = sky_logging.init_logger(__name__)
34
35
 
@@ -465,13 +466,15 @@ class GCP(clouds.Cloud):
465
466
  assert False, 'Low disk tier should always be supported on GCP.'
466
467
 
467
468
  def make_deploy_resources_variables(
468
- self,
469
- resources: 'resources.Resources',
470
- cluster_name: resources_utils.ClusterName,
471
- region: 'clouds.Region',
472
- zones: Optional[List['clouds.Zone']],
473
- num_nodes: int,
474
- dryrun: bool = False) -> Dict[str, Optional[str]]:
469
+ self,
470
+ resources: 'resources.Resources',
471
+ cluster_name: resources_utils.ClusterName,
472
+ region: 'clouds.Region',
473
+ zones: Optional[List['clouds.Zone']],
474
+ num_nodes: int,
475
+ dryrun: bool = False,
476
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
477
+ ) -> Dict[str, Optional[str]]:
475
478
  assert zones is not None, (region, zones)
476
479
 
477
480
  region_name = region.name
sky/clouds/hyperbolic.py CHANGED
@@ -13,6 +13,7 @@ from sky.utils.resources_utils import DiskTier
13
13
 
14
14
  if typing.TYPE_CHECKING:
15
15
  from sky import resources as resources_lib
16
+ from sky.volumes import volume as volume_lib
16
17
 
17
18
 
18
19
  @registry.CLOUD_REGISTRY.register
@@ -244,13 +245,15 @@ class Hyperbolic(clouds.Cloud):
244
245
  return 0.0
245
246
 
246
247
  def make_deploy_resources_variables(
247
- self,
248
- resources: 'resources_lib.Resources',
249
- cluster_name: resources_utils.ClusterName,
250
- region: 'clouds.Region',
251
- zones: Optional[List['clouds.Zone']],
252
- num_nodes: int,
253
- dryrun: bool = False) -> Dict[str, Any]:
248
+ self,
249
+ resources: 'resources_lib.Resources',
250
+ cluster_name: resources_utils.ClusterName,
251
+ region: 'clouds.Region',
252
+ zones: Optional[List['clouds.Zone']],
253
+ num_nodes: int,
254
+ dryrun: bool = False,
255
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
256
+ ) -> Dict[str, Any]:
254
257
  """Returns a dict of variables for the deployment template."""
255
258
  del dryrun, region, cluster_name # unused
256
259
  assert zones is None, ('Hyperbolic does not support zones', zones)
sky/clouds/ibm.py CHANGED
@@ -18,6 +18,7 @@ from sky.utils import ux_utils
18
18
  if typing.TYPE_CHECKING:
19
19
  # renaming to avoid shadowing variables
20
20
  from sky import resources as resources_lib
21
+ from sky.volumes import volume as volume_lib
21
22
 
22
23
  logger = sky_logging.init_logger(__name__)
23
24
 
@@ -175,6 +176,7 @@ class IBM(clouds.Cloud):
175
176
  zones: Optional[List['clouds.Zone']],
176
177
  num_nodes: int,
177
178
  dryrun: bool = False,
179
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
178
180
  ) -> Dict[str, Any]:
179
181
  """Converts planned sky.Resources to cloud-specific resource variables.
180
182
 
sky/clouds/kubernetes.py CHANGED
@@ -25,6 +25,7 @@ from sky.utils import common_utils
25
25
  from sky.utils import registry
26
26
  from sky.utils import resources_utils
27
27
  from sky.utils import schemas
28
+ from sky.volumes import volume as volume_lib
28
29
 
29
30
  if typing.TYPE_CHECKING:
30
31
  # Renaming to avoid shadowing variables.
@@ -394,7 +395,9 @@ class Kubernetes(clouds.Cloud):
394
395
  return 0
395
396
 
396
397
  @staticmethod
397
- def _calculate_provision_timeout(num_nodes: int) -> int:
398
+ def _calculate_provision_timeout(
399
+ num_nodes: int,
400
+ volume_mounts: Optional[List['volume_lib.VolumeMount']]) -> int:
398
401
  """Calculate provision timeout based on number of nodes.
399
402
 
400
403
  The timeout scales linearly with the number of nodes to account for
@@ -409,19 +412,33 @@ class Kubernetes(clouds.Cloud):
409
412
  base_timeout = 10 # Base timeout for single node
410
413
  per_node_timeout = 0.2 # Additional seconds per node
411
414
  max_timeout = 60 # Cap at 1 minute
415
+ if volume_mounts is not None:
416
+ for volume_mount in volume_mounts:
417
+ if (volume_mount.volume_config.type ==
418
+ volume_lib.VolumeType.PVC.value):
419
+ if (volume_mount.volume_config.config.get(
420
+ 'access_mode', '') ==
421
+ volume_lib.VolumeAccessMode.READ_WRITE_MANY.value):
422
+ # GKE may take several minutes to provision a PV
423
+ # supporting READ_WRITE_MANY with filestore.
424
+ base_timeout = 180
425
+ max_timeout = 240
426
+ break
412
427
 
413
428
  return int(
414
429
  min(base_timeout + (per_node_timeout * (num_nodes - 1)),
415
430
  max_timeout))
416
431
 
417
432
  def make_deploy_resources_variables(
418
- self,
419
- resources: 'resources_lib.Resources',
420
- cluster_name: 'resources_utils.ClusterName',
421
- region: Optional['clouds.Region'],
422
- zones: Optional[List['clouds.Zone']],
423
- num_nodes: int,
424
- dryrun: bool = False) -> Dict[str, Optional[str]]:
433
+ self,
434
+ resources: 'resources_lib.Resources',
435
+ cluster_name: 'resources_utils.ClusterName',
436
+ region: Optional['clouds.Region'],
437
+ zones: Optional[List['clouds.Zone']],
438
+ num_nodes: int,
439
+ dryrun: bool = False,
440
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
441
+ ) -> Dict[str, Optional[str]]:
425
442
  del cluster_name, zones, dryrun # Unused.
426
443
  if region is None:
427
444
  context = kubernetes_utils.get_current_kube_config_context_name()
@@ -562,7 +579,7 @@ class Kubernetes(clouds.Cloud):
562
579
  # We use a linear scaling formula to determine the timeout based on the
563
580
  # number of nodes.
564
581
 
565
- timeout = self._calculate_provision_timeout(num_nodes)
582
+ timeout = self._calculate_provision_timeout(num_nodes, volume_mounts)
566
583
  timeout = skypilot_config.get_nested(
567
584
  ('kubernetes', 'provision_timeout'),
568
585
  timeout,
@@ -15,6 +15,7 @@ if typing.TYPE_CHECKING:
15
15
 
16
16
  # Renaming to avoid shadowing variables.
17
17
  from sky import resources as resources_lib
18
+ from sky.volumes import volume as volume_lib
18
19
  else:
19
20
  requests = adaptors_common.LazyImport('requests')
20
21
 
@@ -159,13 +160,15 @@ class Lambda(clouds.Cloud):
159
160
  return None
160
161
 
161
162
  def make_deploy_resources_variables(
162
- self,
163
- resources: 'resources_lib.Resources',
164
- cluster_name: 'resources_utils.ClusterName',
165
- region: 'clouds.Region',
166
- zones: Optional[List['clouds.Zone']],
167
- num_nodes: int,
168
- dryrun: bool = False) -> Dict[str, Any]:
163
+ self,
164
+ resources: 'resources_lib.Resources',
165
+ cluster_name: 'resources_utils.ClusterName',
166
+ region: 'clouds.Region',
167
+ zones: Optional[List['clouds.Zone']],
168
+ num_nodes: int,
169
+ dryrun: bool = False,
170
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
171
+ ) -> Dict[str, Any]:
169
172
  del cluster_name, dryrun # Unused.
170
173
  assert zones is None, 'Lambda does not support zones.'
171
174
  resources = resources.assert_launchable()
sky/clouds/nebius.py CHANGED
@@ -16,6 +16,7 @@ from sky.utils import resources_utils
16
16
 
17
17
  if typing.TYPE_CHECKING:
18
18
  from sky import resources as resources_lib
19
+ from sky.volumes import volume as volume_lib
19
20
 
20
21
  _INDENT_PREFIX = ' '
21
22
 
@@ -196,13 +197,15 @@ class Nebius(clouds.Cloud):
196
197
  return None
197
198
 
198
199
  def make_deploy_resources_variables(
199
- self,
200
- resources: 'resources_lib.Resources',
201
- cluster_name: resources_utils.ClusterName,
202
- region: 'clouds.Region',
203
- zones: Optional[List['clouds.Zone']],
204
- num_nodes: int,
205
- dryrun: bool = False) -> Dict[str, Any]:
200
+ self,
201
+ resources: 'resources_lib.Resources',
202
+ cluster_name: resources_utils.ClusterName,
203
+ region: 'clouds.Region',
204
+ zones: Optional[List['clouds.Zone']],
205
+ num_nodes: int,
206
+ dryrun: bool = False,
207
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
208
+ ) -> Dict[str, Any]:
206
209
  del dryrun, cluster_name
207
210
  assert zones is None, ('Nebius does not support zones', zones)
208
211
 
sky/clouds/oci.py CHANGED
@@ -40,6 +40,7 @@ from sky.utils import ux_utils
40
40
  if typing.TYPE_CHECKING:
41
41
  # Renaming to avoid shadowing variables.
42
42
  from sky import resources as resources_lib
43
+ from sky.volumes import volume as volume_lib
43
44
 
44
45
  logger = logging.getLogger(__name__)
45
46
 
@@ -207,13 +208,15 @@ class OCI(clouds.Cloud):
207
208
  return None
208
209
 
209
210
  def make_deploy_resources_variables(
210
- self,
211
- resources: 'resources_lib.Resources',
212
- cluster_name: resources_utils.ClusterName,
213
- region: Optional['clouds.Region'],
214
- zones: Optional[List['clouds.Zone']],
215
- num_nodes: int,
216
- dryrun: bool = False) -> Dict[str, Any]:
211
+ self,
212
+ resources: 'resources_lib.Resources',
213
+ cluster_name: resources_utils.ClusterName,
214
+ region: Optional['clouds.Region'],
215
+ zones: Optional[List['clouds.Zone']],
216
+ num_nodes: int,
217
+ dryrun: bool = False,
218
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
219
+ ) -> Dict[str, Any]:
217
220
  del cluster_name, dryrun # Unused.
218
221
  assert region is not None, resources
219
222
 
sky/clouds/paperspace.py CHANGED
@@ -14,6 +14,7 @@ if typing.TYPE_CHECKING:
14
14
  import requests
15
15
 
16
16
  from sky import resources as resources_lib
17
+ from sky.volumes import volume as volume_lib
17
18
  else:
18
19
  requests = adaptors_common.LazyImport('requests')
19
20
 
@@ -179,13 +180,15 @@ class Paperspace(clouds.Cloud):
179
180
  return None
180
181
 
181
182
  def make_deploy_resources_variables(
182
- self,
183
- resources: 'resources_lib.Resources',
184
- cluster_name: resources_utils.ClusterName,
185
- region: 'clouds.Region',
186
- zones: Optional[List['clouds.Zone']],
187
- num_nodes: int,
188
- dryrun: bool = False) -> Dict[str, Optional[str]]:
183
+ self,
184
+ resources: 'resources_lib.Resources',
185
+ cluster_name: resources_utils.ClusterName,
186
+ region: 'clouds.Region',
187
+ zones: Optional[List['clouds.Zone']],
188
+ num_nodes: int,
189
+ dryrun: bool = False,
190
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
191
+ ) -> Dict[str, Optional[str]]:
189
192
  del zones, dryrun, cluster_name
190
193
 
191
194
  resources = resources.assert_launchable()
sky/clouds/runpod.py CHANGED
@@ -10,6 +10,7 @@ from sky.utils import resources_utils
10
10
 
11
11
  if typing.TYPE_CHECKING:
12
12
  from sky import resources as resources_lib
13
+ from sky.volumes import volume as volume_lib
13
14
 
14
15
  _CREDENTIAL_FILES = [
15
16
  'config.toml',
@@ -160,13 +161,15 @@ class RunPod(clouds.Cloud):
160
161
  return None
161
162
 
162
163
  def make_deploy_resources_variables(
163
- self,
164
- resources: 'resources_lib.Resources',
165
- cluster_name: resources_utils.ClusterName,
166
- region: 'clouds.Region',
167
- zones: Optional[List['clouds.Zone']],
168
- num_nodes: int,
169
- dryrun: bool = False) -> Dict[str, Optional[Union[str, bool]]]:
164
+ self,
165
+ resources: 'resources_lib.Resources',
166
+ cluster_name: resources_utils.ClusterName,
167
+ region: 'clouds.Region',
168
+ zones: Optional[List['clouds.Zone']],
169
+ num_nodes: int,
170
+ dryrun: bool = False,
171
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
172
+ ) -> Dict[str, Optional[Union[str, bool]]]:
170
173
  del dryrun, cluster_name # unused
171
174
  assert zones is not None, (region, zones)
172
175
 
sky/clouds/scp.py CHANGED
@@ -19,6 +19,7 @@ from sky.utils import status_lib
19
19
  if typing.TYPE_CHECKING:
20
20
  # Renaming to avoid shadowing variables.
21
21
  from sky import resources as resources_lib
22
+ from sky.volumes import volume as volume_lib
22
23
 
23
24
  _CREDENTIAL_FILES = [
24
25
  'scp_credential',
@@ -183,13 +184,15 @@ class SCP(clouds.Cloud):
183
184
  return None
184
185
 
185
186
  def make_deploy_resources_variables(
186
- self,
187
- resources: 'resources_lib.Resources',
188
- cluster_name: 'resources_utils.ClusterName',
189
- region: 'clouds.Region',
190
- zones: Optional[List['clouds.Zone']],
191
- num_nodes: int,
192
- dryrun: bool = False) -> Dict[str, Optional[str]]:
187
+ self,
188
+ resources: 'resources_lib.Resources',
189
+ cluster_name: 'resources_utils.ClusterName',
190
+ region: 'clouds.Region',
191
+ zones: Optional[List['clouds.Zone']],
192
+ num_nodes: int,
193
+ dryrun: bool = False,
194
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
195
+ ) -> Dict[str, Optional[str]]:
193
196
  del cluster_name, dryrun # Unused.
194
197
  assert zones is None, 'SCP does not support zones.'
195
198
 
sky/clouds/ssh.py CHANGED
@@ -211,6 +211,42 @@ class SSH(kubernetes.Kubernetes):
211
211
 
212
212
  return success, ctx2text
213
213
 
214
+ @classmethod
215
+ def check_single_context(cls, context: str) -> Tuple[bool, str]:
216
+ """Checks if the context is valid and accessible."""
217
+ reasons = kubernetes_utils.check_port_forward_mode_dependencies(False)
218
+ if reasons is not None:
219
+ formatted = '\n'.join(
220
+ [reasons[0]] +
221
+ [f'{cls._INDENT_PREFIX}' + r for r in reasons[1:]])
222
+ return (False, formatted)
223
+
224
+ # Add ssh- prefix to the context
225
+ if not context.startswith('ssh-'):
226
+ context = f'ssh-{context}'
227
+
228
+ # Get SSH contexts
229
+ try:
230
+ existing_allowed_contexts = cls.existing_allowed_contexts()
231
+ except Exception as e: # pylint: disable=broad-except
232
+ return (False, f'Failed to get SSH contexts: {str(e)}')
233
+
234
+ if not existing_allowed_contexts:
235
+ return (False,
236
+ 'No SSH Node Pools are up. Run `sky ssh up` to set up '
237
+ f'Node Pools from {SSH_NODE_POOLS_PATH}.')
238
+
239
+ if context not in existing_allowed_contexts:
240
+ return (False, f'SSH Node Pool {context} is not set up. '
241
+ f'Run `sky ssh up --infra {context}` to set up.')
242
+
243
+ # Check if the context is valid
244
+ suc, text = super()._check_single_context(context)
245
+ if not suc:
246
+ return (False, text)
247
+
248
+ return (True, 'SSH Node Pool is set up.')
249
+
214
250
  @classmethod
215
251
  def expand_infras(cls) -> List[str]:
216
252
  return [
sky/clouds/vast.py CHANGED
@@ -10,6 +10,7 @@ from sky.utils import resources_utils
10
10
 
11
11
  if typing.TYPE_CHECKING:
12
12
  from sky import resources as resources_lib
13
+ from sky.volumes import volume as volume_lib
13
14
 
14
15
 
15
16
  @registry.CLOUD_REGISTRY.register
@@ -155,13 +156,15 @@ class Vast(clouds.Cloud):
155
156
  return None
156
157
 
157
158
  def make_deploy_resources_variables(
158
- self,
159
- resources: 'resources_lib.Resources',
160
- cluster_name: resources_utils.ClusterName,
161
- region: 'clouds.Region',
162
- zones: Optional[List['clouds.Zone']],
163
- num_nodes: int,
164
- dryrun: bool = False) -> Dict[str, Optional[str]]:
159
+ self,
160
+ resources: 'resources_lib.Resources',
161
+ cluster_name: resources_utils.ClusterName,
162
+ region: 'clouds.Region',
163
+ zones: Optional[List['clouds.Zone']],
164
+ num_nodes: int,
165
+ dryrun: bool = False,
166
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
167
+ ) -> Dict[str, Optional[str]]:
165
168
  del zones, dryrun, cluster_name, num_nodes # unused
166
169
 
167
170
  resources = resources.assert_launchable()
sky/clouds/vsphere.py CHANGED
@@ -18,6 +18,7 @@ if typing.TYPE_CHECKING:
18
18
 
19
19
  # Renaming to avoid shadowing variables.
20
20
  from sky import resources as resources_lib
21
+ from sky.volumes import volume as volume_lib
21
22
  else:
22
23
  requests = adaptors_common.LazyImport('requests')
23
24
 
@@ -184,6 +185,7 @@ class Vsphere(clouds.Cloud):
184
185
  zones: Optional[List['clouds.Zone']],
185
186
  num_nodes: int,
186
187
  dryrun: bool = False,
188
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
187
189
  ) -> Dict[str, Optional[str]]:
188
190
  # TODO get image id here.
189
191
  del cluster_name, dryrun # unused
sky/core.py CHANGED
@@ -76,6 +76,7 @@ def optimize(
76
76
  for a task.
77
77
  exceptions.NoCloudAccessError: if no public clouds are enabled.
78
78
  """
79
+ dag.resolve_and_validate_volumes()
79
80
  # TODO: We apply the admin policy only on the first DAG optimization which
80
81
  # is shown on `sky launch`. The optimizer is also invoked during failover,
81
82
  # but we do not apply the admin policy there. We should apply the admin
@@ -1306,6 +1307,26 @@ def ssh_up(infra: Optional[str] = None, cleanup: bool = False) -> None:
1306
1307
  )
1307
1308
 
1308
1309
 
1310
+ @usage_lib.entrypoint
1311
+ def ssh_status(context_name: str) -> Tuple[bool, str]:
1312
+ """Check the status of an SSH Node Pool context.
1313
+
1314
+ Args:
1315
+ context_name: The SSH context name (e.g., 'ssh-my-cluster')
1316
+
1317
+ Returns:
1318
+ Tuple[bool, str]: (is_ready, reason)
1319
+ - is_ready: True if the SSH Node Pool is ready, False otherwise
1320
+ - reason: Explanation of the status
1321
+ """
1322
+ try:
1323
+ is_ready, reason = clouds.SSH.check_single_context(context_name)
1324
+ return is_ready, reason
1325
+ except Exception as e: # pylint: disable=broad-except
1326
+ return False, ('Failed to check SSH context: '
1327
+ f'{common_utils.format_exception(e)}')
1328
+
1329
+
1309
1330
  def get_all_contexts() -> List[str]:
1310
1331
  """Get all available contexts from Kubernetes and SSH clouds.
1311
1332
 
sky/dag.py CHANGED
@@ -83,6 +83,20 @@ class Dag:
83
83
  task.validate(skip_file_mounts=skip_file_mounts,
84
84
  skip_workdir=skip_workdir)
85
85
 
86
+ def resolve_and_validate_volumes(self) -> None:
87
+ for task in self.tasks:
88
+ task.resolve_and_validate_volumes()
89
+
90
+ def pre_mount_volumes(self) -> None:
91
+ vol_map = {}
92
+ # Deduplicate volume mounts.
93
+ for task in self.tasks:
94
+ if task.volume_mounts is not None:
95
+ for volume_mount in task.volume_mounts:
96
+ vol_map[volume_mount.volume_name] = volume_mount
97
+ for volume_mount in vol_map.values():
98
+ volume_mount.pre_mount()
99
+
86
100
 
87
101
  class _DagContext(threading.local):
88
102
  """A thread-local stack of Dags."""
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/6ab927686b492a4a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/6ab927686b492a4a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-c85998e6a5722f21.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce31493da9747ef4.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-1be831200e60c5c0.js" defer=""></script><script src="/dashboard/_next/static/zsALxITkbP8J8NVwSDwMo/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/zsALxITkbP8J8NVwSDwMo/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"zsALxITkbP8J8NVwSDwMo","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/52082cf558ec9705.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/52082cf558ec9705.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-08fdb9e6070127fc.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-9a3ce3170d2edcec.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-1be831200e60c5c0.js" defer=""></script><script src="/dashboard/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"bs6UB9V4Jq10TIZ5x-kBK","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
@@ -0,0 +1 @@
1
+ self.__BUILD_MANIFEST=function(s,c,e,a,t,u,n,r,b,i,j,k,f,o){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-6b0d9e5031b70c58.js"],"/_error":["static/chunks/pages/_error-1be831200e60c5c0.js"],"/clusters":["static/chunks/pages/clusters-f119a5630a1efd61.js"],"/clusters/[cluster]":[s,c,e,a,t,r,i,u,n,j,b,k,f,o,"static/chunks/871-3db673be3ee3750b.js","static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js"],"/clusters/[cluster]/[job]":[s,c,e,a,t,u,n,"static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js"],"/config":["static/chunks/pages/config-6b255eae088da6a3.js"],"/infra":["static/chunks/pages/infra-ee8cc4d449945d19.js"],"/infra/[context]":["static/chunks/pages/infra/[context]-b302aea4d65766bf.js"],"/jobs":["static/chunks/pages/jobs-0a5695ff3075d94a.js"],"/jobs/[job]":[s,c,e,a,t,r,u,n,b,"static/chunks/pages/jobs/[job]-e4b23128db0774cd.js"],"/users":["static/chunks/pages/users-4978cbb093e141e7.js"],"/volumes":["static/chunks/pages/volumes-476b670ef33d1ecd.js"],"/workspace/new":["static/chunks/pages/workspace/new-5b59bce9eb208d84.js"],"/workspaces":["static/chunks/pages/workspaces-50e230828730cfb3.js"],"/workspaces/[name]":[s,c,e,a,t,r,i,u,n,j,b,k,f,o,"static/chunks/141-fa5a20cbf401b351.js","static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/config","/infra","/infra/[context]","/jobs","/jobs/[job]","/users","/volumes","/workspace/new","/workspaces","/workspaces/[name]"]}}("static/chunks/616-d6128fa9e7cae6e6.js","static/chunks/230-d6e363362017ff3a.js","static/chunks/799-3625946b2ec2eb30.js","static/chunks/664-047bc03493fda379.js","static/chunks/804-4c9fc53aa74bc191.js","static/chunks/989-db34c16ad7ea6155.js","static/chunks/470-92dd1614396389be.js","static/chunks/798-c0525dc3f21e488d.js","static/chunks/969-d3a0b53f728d280a.js","static/chunks/947-6620842ef80ae879.js","static/chunks/990-0ad5ea1699e03ee8.js","static/chunks/43-36177d00f6956ab2.js","static/chunks/973-81b2d057178adb76.js","static/chunks/938-068520cc11738deb.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();