skypilot-nightly 1.0.0.dev20250624__py3-none-any.whl → 1.0.0.dev20250626__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +1 -6
- sky/backends/backend_utils.py +26 -11
- sky/backends/cloud_vm_ray_backend.py +16 -5
- sky/client/cli/command.py +232 -9
- sky/client/sdk.py +195 -91
- sky/clouds/aws.py +10 -7
- sky/clouds/azure.py +10 -7
- sky/clouds/cloud.py +2 -0
- sky/clouds/cudo.py +2 -0
- sky/clouds/do.py +10 -7
- sky/clouds/fluidstack.py +2 -0
- sky/clouds/gcp.py +10 -7
- sky/clouds/hyperbolic.py +10 -7
- sky/clouds/ibm.py +2 -0
- sky/clouds/kubernetes.py +26 -9
- sky/clouds/lambda_cloud.py +10 -7
- sky/clouds/nebius.py +10 -7
- sky/clouds/oci.py +10 -7
- sky/clouds/paperspace.py +10 -7
- sky/clouds/runpod.py +10 -7
- sky/clouds/scp.py +10 -7
- sky/clouds/ssh.py +36 -0
- sky/clouds/vast.py +10 -7
- sky/clouds/vsphere.py +2 -0
- sky/core.py +21 -0
- sky/dag.py +14 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/141-fa5a20cbf401b351.js +11 -0
- sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
- sky/dashboard/out/_next/static/chunks/25.76c246239df93d50.js +6 -0
- sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +1 -0
- sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +1 -0
- sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
- sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
- sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
- sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +16 -0
- sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
- sky/dashboard/out/_next/static/chunks/785.dc2686c3c1235554.js +1 -0
- sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +6 -0
- sky/dashboard/out/_next/static/chunks/875.52c962183328b3f2.js +25 -0
- sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +1 -0
- sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +1 -0
- sky/dashboard/out/_next/static/chunks/984.e8bac186a24e5178.js +1 -0
- sky/dashboard/out/_next/static/chunks/990-0ad5ea1699e03ee8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce31493da9747ef4.js → _app-9a3ce3170d2edcec.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-7e9736af1c6345a6.js → clusters-f119a5630a1efd61.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/config-6b255eae088da6a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-b302aea4d65766bf.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-ee8cc4d449945d19.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-0a5695ff3075d94a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-4978cbb093e141e7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/{new-31aa8bdcb7592635.js → new-5b59bce9eb208d84.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-50e230828730cfb3.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-08fdb9e6070127fc.js +1 -0
- sky/dashboard/out/_next/static/css/52082cf558ec9705.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage_utils.py +2 -4
- sky/exceptions.py +15 -0
- sky/execution.py +5 -0
- sky/global_user_state.py +129 -0
- sky/jobs/client/sdk.py +13 -11
- sky/jobs/server/core.py +4 -0
- sky/models.py +16 -0
- sky/provision/__init__.py +26 -0
- sky/provision/kubernetes/__init__.py +3 -0
- sky/provision/kubernetes/instance.py +38 -77
- sky/provision/kubernetes/utils.py +70 -4
- sky/provision/kubernetes/volume.py +147 -0
- sky/resources.py +20 -76
- sky/serve/client/sdk.py +13 -13
- sky/serve/server/core.py +5 -1
- sky/server/common.py +40 -5
- sky/server/constants.py +5 -1
- sky/server/metrics.py +105 -0
- sky/server/requests/executor.py +30 -14
- sky/server/requests/payloads.py +16 -0
- sky/server/requests/requests.py +35 -1
- sky/server/rest.py +153 -0
- sky/server/server.py +70 -43
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +8 -3
- sky/server/uvicorn.py +153 -13
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +19 -3
- sky/skypilot_config.py +3 -0
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +133 -0
- sky/ssh_node_pools/server.py +232 -0
- sky/task.py +141 -18
- sky/templates/kubernetes-ray.yml.j2 +30 -1
- sky/users/permission.py +2 -0
- sky/utils/context.py +3 -1
- sky/utils/kubernetes/deploy_remote_cluster.py +12 -185
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/resources_utils.py +66 -0
- sky/utils/rich_utils.py +6 -0
- sky/utils/schemas.py +146 -3
- sky/utils/status_lib.py +10 -0
- sky/utils/validator.py +11 -1
- sky/volumes/__init__.py +0 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +64 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +199 -0
- sky/volumes/server/server.py +85 -0
- sky/volumes/utils.py +158 -0
- sky/volumes/volume.py +198 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/RECORD +135 -115
- sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +0 -1
- sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
- sky/dashboard/out/_next/static/chunks/37-4650f214e2119168.js +0 -6
- sky/dashboard/out/_next/static/chunks/42.2273cc2415291ceb.js +0 -6
- sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +0 -1
- sky/dashboard/out/_next/static/chunks/470-1494c899266cf5c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/513.309df9e18a9ff005.js +0 -1
- sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
- sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
- sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
- sky/dashboard/out/_next/static/chunks/843-bde186946d353355.js +0 -11
- sky/dashboard/out/_next/static/chunks/856-bfddc18e16f3873c.js +0 -1
- sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
- sky/dashboard/out/_next/static/chunks/973-56412c7976b4655b.js +0 -1
- sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-4e065c812a52460b.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-520ec1ab65e2f2a4.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/config-e4f473661889e7cd.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-00fd23b9577492ca.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-8a4bf7370d4d9bb7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-171c27f4ca94861c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-55e5bcb16d563231.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-c9f4d785cdaa52d8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-ecc5a7003776cfa7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-f00cba35691483b1.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-c85998e6a5722f21.js +0 -1
- sky/dashboard/out/_next/static/css/6ab927686b492a4a.css +0 -3
- sky/dashboard/out/_next/static/zsALxITkbP8J8NVwSDwMo/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{zsALxITkbP8J8NVwSDwMo → bs6UB9V4Jq10TIZ5x-kBK}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{938-ce7991c156584b06.js → 938-068520cc11738deb.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/top_level.txt +0 -0
sky/clouds/cloud.py
CHANGED
@@ -27,6 +27,7 @@ from sky.utils import ux_utils
|
|
27
27
|
if typing.TYPE_CHECKING:
|
28
28
|
from sky import resources as resources_lib
|
29
29
|
from sky.utils import status_lib
|
30
|
+
from sky.volumes import volume as volume_lib
|
30
31
|
|
31
32
|
|
32
33
|
class CloudImplementationFeatures(enum.Enum):
|
@@ -307,6 +308,7 @@ class Cloud:
|
|
307
308
|
zones: Optional[List['Zone']],
|
308
309
|
num_nodes: int,
|
309
310
|
dryrun: bool = False,
|
311
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
310
312
|
) -> Dict[str, Any]:
|
311
313
|
"""Converts planned sky.Resources to cloud-specific resource variables.
|
312
314
|
|
sky/clouds/cudo.py
CHANGED
@@ -12,6 +12,7 @@ from sky.utils import resources_utils
|
|
12
12
|
if typing.TYPE_CHECKING:
|
13
13
|
# Renaming to avoid shadowing variables.
|
14
14
|
from sky import resources as resources_lib
|
15
|
+
from sky.volumes import volume as volume_lib
|
15
16
|
|
16
17
|
_CREDENTIAL_FILES = [
|
17
18
|
# credential files for Cudo,
|
@@ -201,6 +202,7 @@ class Cudo(clouds.Cloud):
|
|
201
202
|
zones: Optional[List['clouds.Zone']],
|
202
203
|
num_nodes: int,
|
203
204
|
dryrun: bool = False,
|
205
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
204
206
|
) -> Dict[str, Optional[str]]:
|
205
207
|
del zones, cluster_name # unused
|
206
208
|
resources = resources.assert_launchable()
|
sky/clouds/do.py
CHANGED
@@ -14,6 +14,7 @@ from sky.utils import resources_utils
|
|
14
14
|
|
15
15
|
if typing.TYPE_CHECKING:
|
16
16
|
from sky import resources as resources_lib
|
17
|
+
from sky.volumes import volume as volume_lib
|
17
18
|
|
18
19
|
_CREDENTIAL_FILE = 'config.yaml'
|
19
20
|
|
@@ -175,13 +176,15 @@ class DO(clouds.Cloud):
|
|
175
176
|
return None
|
176
177
|
|
177
178
|
def make_deploy_resources_variables(
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
179
|
+
self,
|
180
|
+
resources: 'resources_lib.Resources',
|
181
|
+
cluster_name: resources_utils.ClusterName,
|
182
|
+
region: 'clouds.Region',
|
183
|
+
zones: Optional[List['clouds.Zone']],
|
184
|
+
num_nodes: int,
|
185
|
+
dryrun: bool = False,
|
186
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
187
|
+
) -> Dict[str, Optional[str]]:
|
185
188
|
del zones, dryrun, cluster_name
|
186
189
|
|
187
190
|
resources = resources.assert_launchable()
|
sky/clouds/fluidstack.py
CHANGED
@@ -21,6 +21,7 @@ if typing.TYPE_CHECKING:
|
|
21
21
|
|
22
22
|
# Renaming to avoid shadowing variables.
|
23
23
|
from sky import resources as resources_lib
|
24
|
+
from sky.volumes import volume as volume_lib
|
24
25
|
else:
|
25
26
|
requests = adaptors_common.LazyImport('requests')
|
26
27
|
|
@@ -188,6 +189,7 @@ class Fluidstack(clouds.Cloud):
|
|
188
189
|
zones: Optional[List[clouds.Zone]],
|
189
190
|
num_nodes: int,
|
190
191
|
dryrun: bool = False,
|
192
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
191
193
|
) -> Dict[str, Optional[str]]:
|
192
194
|
|
193
195
|
assert zones is None, 'FluidStack does not support zones.'
|
sky/clouds/gcp.py
CHANGED
@@ -29,6 +29,7 @@ from sky.utils import ux_utils
|
|
29
29
|
if typing.TYPE_CHECKING:
|
30
30
|
from sky import resources
|
31
31
|
from sky.utils import status_lib
|
32
|
+
from sky.volumes import volume as volume_lib
|
32
33
|
|
33
34
|
logger = sky_logging.init_logger(__name__)
|
34
35
|
|
@@ -465,13 +466,15 @@ class GCP(clouds.Cloud):
|
|
465
466
|
assert False, 'Low disk tier should always be supported on GCP.'
|
466
467
|
|
467
468
|
def make_deploy_resources_variables(
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
469
|
+
self,
|
470
|
+
resources: 'resources.Resources',
|
471
|
+
cluster_name: resources_utils.ClusterName,
|
472
|
+
region: 'clouds.Region',
|
473
|
+
zones: Optional[List['clouds.Zone']],
|
474
|
+
num_nodes: int,
|
475
|
+
dryrun: bool = False,
|
476
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
477
|
+
) -> Dict[str, Optional[str]]:
|
475
478
|
assert zones is not None, (region, zones)
|
476
479
|
|
477
480
|
region_name = region.name
|
sky/clouds/hyperbolic.py
CHANGED
@@ -13,6 +13,7 @@ from sky.utils.resources_utils import DiskTier
|
|
13
13
|
|
14
14
|
if typing.TYPE_CHECKING:
|
15
15
|
from sky import resources as resources_lib
|
16
|
+
from sky.volumes import volume as volume_lib
|
16
17
|
|
17
18
|
|
18
19
|
@registry.CLOUD_REGISTRY.register
|
@@ -244,13 +245,15 @@ class Hyperbolic(clouds.Cloud):
|
|
244
245
|
return 0.0
|
245
246
|
|
246
247
|
def make_deploy_resources_variables(
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
248
|
+
self,
|
249
|
+
resources: 'resources_lib.Resources',
|
250
|
+
cluster_name: resources_utils.ClusterName,
|
251
|
+
region: 'clouds.Region',
|
252
|
+
zones: Optional[List['clouds.Zone']],
|
253
|
+
num_nodes: int,
|
254
|
+
dryrun: bool = False,
|
255
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
256
|
+
) -> Dict[str, Any]:
|
254
257
|
"""Returns a dict of variables for the deployment template."""
|
255
258
|
del dryrun, region, cluster_name # unused
|
256
259
|
assert zones is None, ('Hyperbolic does not support zones', zones)
|
sky/clouds/ibm.py
CHANGED
@@ -18,6 +18,7 @@ from sky.utils import ux_utils
|
|
18
18
|
if typing.TYPE_CHECKING:
|
19
19
|
# renaming to avoid shadowing variables
|
20
20
|
from sky import resources as resources_lib
|
21
|
+
from sky.volumes import volume as volume_lib
|
21
22
|
|
22
23
|
logger = sky_logging.init_logger(__name__)
|
23
24
|
|
@@ -175,6 +176,7 @@ class IBM(clouds.Cloud):
|
|
175
176
|
zones: Optional[List['clouds.Zone']],
|
176
177
|
num_nodes: int,
|
177
178
|
dryrun: bool = False,
|
179
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
178
180
|
) -> Dict[str, Any]:
|
179
181
|
"""Converts planned sky.Resources to cloud-specific resource variables.
|
180
182
|
|
sky/clouds/kubernetes.py
CHANGED
@@ -25,6 +25,7 @@ from sky.utils import common_utils
|
|
25
25
|
from sky.utils import registry
|
26
26
|
from sky.utils import resources_utils
|
27
27
|
from sky.utils import schemas
|
28
|
+
from sky.volumes import volume as volume_lib
|
28
29
|
|
29
30
|
if typing.TYPE_CHECKING:
|
30
31
|
# Renaming to avoid shadowing variables.
|
@@ -394,7 +395,9 @@ class Kubernetes(clouds.Cloud):
|
|
394
395
|
return 0
|
395
396
|
|
396
397
|
@staticmethod
|
397
|
-
def _calculate_provision_timeout(
|
398
|
+
def _calculate_provision_timeout(
|
399
|
+
num_nodes: int,
|
400
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']]) -> int:
|
398
401
|
"""Calculate provision timeout based on number of nodes.
|
399
402
|
|
400
403
|
The timeout scales linearly with the number of nodes to account for
|
@@ -409,19 +412,33 @@ class Kubernetes(clouds.Cloud):
|
|
409
412
|
base_timeout = 10 # Base timeout for single node
|
410
413
|
per_node_timeout = 0.2 # Additional seconds per node
|
411
414
|
max_timeout = 60 # Cap at 1 minute
|
415
|
+
if volume_mounts is not None:
|
416
|
+
for volume_mount in volume_mounts:
|
417
|
+
if (volume_mount.volume_config.type ==
|
418
|
+
volume_lib.VolumeType.PVC.value):
|
419
|
+
if (volume_mount.volume_config.config.get(
|
420
|
+
'access_mode', '') ==
|
421
|
+
volume_lib.VolumeAccessMode.READ_WRITE_MANY.value):
|
422
|
+
# GKE may take several minutes to provision a PV
|
423
|
+
# supporting READ_WRITE_MANY with filestore.
|
424
|
+
base_timeout = 180
|
425
|
+
max_timeout = 240
|
426
|
+
break
|
412
427
|
|
413
428
|
return int(
|
414
429
|
min(base_timeout + (per_node_timeout * (num_nodes - 1)),
|
415
430
|
max_timeout))
|
416
431
|
|
417
432
|
def make_deploy_resources_variables(
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
433
|
+
self,
|
434
|
+
resources: 'resources_lib.Resources',
|
435
|
+
cluster_name: 'resources_utils.ClusterName',
|
436
|
+
region: Optional['clouds.Region'],
|
437
|
+
zones: Optional[List['clouds.Zone']],
|
438
|
+
num_nodes: int,
|
439
|
+
dryrun: bool = False,
|
440
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
441
|
+
) -> Dict[str, Optional[str]]:
|
425
442
|
del cluster_name, zones, dryrun # Unused.
|
426
443
|
if region is None:
|
427
444
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
@@ -562,7 +579,7 @@ class Kubernetes(clouds.Cloud):
|
|
562
579
|
# We use a linear scaling formula to determine the timeout based on the
|
563
580
|
# number of nodes.
|
564
581
|
|
565
|
-
timeout = self._calculate_provision_timeout(num_nodes)
|
582
|
+
timeout = self._calculate_provision_timeout(num_nodes, volume_mounts)
|
566
583
|
timeout = skypilot_config.get_nested(
|
567
584
|
('kubernetes', 'provision_timeout'),
|
568
585
|
timeout,
|
sky/clouds/lambda_cloud.py
CHANGED
@@ -15,6 +15,7 @@ if typing.TYPE_CHECKING:
|
|
15
15
|
|
16
16
|
# Renaming to avoid shadowing variables.
|
17
17
|
from sky import resources as resources_lib
|
18
|
+
from sky.volumes import volume as volume_lib
|
18
19
|
else:
|
19
20
|
requests = adaptors_common.LazyImport('requests')
|
20
21
|
|
@@ -159,13 +160,15 @@ class Lambda(clouds.Cloud):
|
|
159
160
|
return None
|
160
161
|
|
161
162
|
def make_deploy_resources_variables(
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
163
|
+
self,
|
164
|
+
resources: 'resources_lib.Resources',
|
165
|
+
cluster_name: 'resources_utils.ClusterName',
|
166
|
+
region: 'clouds.Region',
|
167
|
+
zones: Optional[List['clouds.Zone']],
|
168
|
+
num_nodes: int,
|
169
|
+
dryrun: bool = False,
|
170
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
171
|
+
) -> Dict[str, Any]:
|
169
172
|
del cluster_name, dryrun # Unused.
|
170
173
|
assert zones is None, 'Lambda does not support zones.'
|
171
174
|
resources = resources.assert_launchable()
|
sky/clouds/nebius.py
CHANGED
@@ -16,6 +16,7 @@ from sky.utils import resources_utils
|
|
16
16
|
|
17
17
|
if typing.TYPE_CHECKING:
|
18
18
|
from sky import resources as resources_lib
|
19
|
+
from sky.volumes import volume as volume_lib
|
19
20
|
|
20
21
|
_INDENT_PREFIX = ' '
|
21
22
|
|
@@ -196,13 +197,15 @@ class Nebius(clouds.Cloud):
|
|
196
197
|
return None
|
197
198
|
|
198
199
|
def make_deploy_resources_variables(
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
200
|
+
self,
|
201
|
+
resources: 'resources_lib.Resources',
|
202
|
+
cluster_name: resources_utils.ClusterName,
|
203
|
+
region: 'clouds.Region',
|
204
|
+
zones: Optional[List['clouds.Zone']],
|
205
|
+
num_nodes: int,
|
206
|
+
dryrun: bool = False,
|
207
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
208
|
+
) -> Dict[str, Any]:
|
206
209
|
del dryrun, cluster_name
|
207
210
|
assert zones is None, ('Nebius does not support zones', zones)
|
208
211
|
|
sky/clouds/oci.py
CHANGED
@@ -40,6 +40,7 @@ from sky.utils import ux_utils
|
|
40
40
|
if typing.TYPE_CHECKING:
|
41
41
|
# Renaming to avoid shadowing variables.
|
42
42
|
from sky import resources as resources_lib
|
43
|
+
from sky.volumes import volume as volume_lib
|
43
44
|
|
44
45
|
logger = logging.getLogger(__name__)
|
45
46
|
|
@@ -207,13 +208,15 @@ class OCI(clouds.Cloud):
|
|
207
208
|
return None
|
208
209
|
|
209
210
|
def make_deploy_resources_variables(
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
211
|
+
self,
|
212
|
+
resources: 'resources_lib.Resources',
|
213
|
+
cluster_name: resources_utils.ClusterName,
|
214
|
+
region: Optional['clouds.Region'],
|
215
|
+
zones: Optional[List['clouds.Zone']],
|
216
|
+
num_nodes: int,
|
217
|
+
dryrun: bool = False,
|
218
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
219
|
+
) -> Dict[str, Any]:
|
217
220
|
del cluster_name, dryrun # Unused.
|
218
221
|
assert region is not None, resources
|
219
222
|
|
sky/clouds/paperspace.py
CHANGED
@@ -14,6 +14,7 @@ if typing.TYPE_CHECKING:
|
|
14
14
|
import requests
|
15
15
|
|
16
16
|
from sky import resources as resources_lib
|
17
|
+
from sky.volumes import volume as volume_lib
|
17
18
|
else:
|
18
19
|
requests = adaptors_common.LazyImport('requests')
|
19
20
|
|
@@ -179,13 +180,15 @@ class Paperspace(clouds.Cloud):
|
|
179
180
|
return None
|
180
181
|
|
181
182
|
def make_deploy_resources_variables(
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
183
|
+
self,
|
184
|
+
resources: 'resources_lib.Resources',
|
185
|
+
cluster_name: resources_utils.ClusterName,
|
186
|
+
region: 'clouds.Region',
|
187
|
+
zones: Optional[List['clouds.Zone']],
|
188
|
+
num_nodes: int,
|
189
|
+
dryrun: bool = False,
|
190
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
191
|
+
) -> Dict[str, Optional[str]]:
|
189
192
|
del zones, dryrun, cluster_name
|
190
193
|
|
191
194
|
resources = resources.assert_launchable()
|
sky/clouds/runpod.py
CHANGED
@@ -10,6 +10,7 @@ from sky.utils import resources_utils
|
|
10
10
|
|
11
11
|
if typing.TYPE_CHECKING:
|
12
12
|
from sky import resources as resources_lib
|
13
|
+
from sky.volumes import volume as volume_lib
|
13
14
|
|
14
15
|
_CREDENTIAL_FILES = [
|
15
16
|
'config.toml',
|
@@ -160,13 +161,15 @@ class RunPod(clouds.Cloud):
|
|
160
161
|
return None
|
161
162
|
|
162
163
|
def make_deploy_resources_variables(
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
164
|
+
self,
|
165
|
+
resources: 'resources_lib.Resources',
|
166
|
+
cluster_name: resources_utils.ClusterName,
|
167
|
+
region: 'clouds.Region',
|
168
|
+
zones: Optional[List['clouds.Zone']],
|
169
|
+
num_nodes: int,
|
170
|
+
dryrun: bool = False,
|
171
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
172
|
+
) -> Dict[str, Optional[Union[str, bool]]]:
|
170
173
|
del dryrun, cluster_name # unused
|
171
174
|
assert zones is not None, (region, zones)
|
172
175
|
|
sky/clouds/scp.py
CHANGED
@@ -19,6 +19,7 @@ from sky.utils import status_lib
|
|
19
19
|
if typing.TYPE_CHECKING:
|
20
20
|
# Renaming to avoid shadowing variables.
|
21
21
|
from sky import resources as resources_lib
|
22
|
+
from sky.volumes import volume as volume_lib
|
22
23
|
|
23
24
|
_CREDENTIAL_FILES = [
|
24
25
|
'scp_credential',
|
@@ -183,13 +184,15 @@ class SCP(clouds.Cloud):
|
|
183
184
|
return None
|
184
185
|
|
185
186
|
def make_deploy_resources_variables(
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
187
|
+
self,
|
188
|
+
resources: 'resources_lib.Resources',
|
189
|
+
cluster_name: 'resources_utils.ClusterName',
|
190
|
+
region: 'clouds.Region',
|
191
|
+
zones: Optional[List['clouds.Zone']],
|
192
|
+
num_nodes: int,
|
193
|
+
dryrun: bool = False,
|
194
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
195
|
+
) -> Dict[str, Optional[str]]:
|
193
196
|
del cluster_name, dryrun # Unused.
|
194
197
|
assert zones is None, 'SCP does not support zones.'
|
195
198
|
|
sky/clouds/ssh.py
CHANGED
@@ -211,6 +211,42 @@ class SSH(kubernetes.Kubernetes):
|
|
211
211
|
|
212
212
|
return success, ctx2text
|
213
213
|
|
214
|
+
@classmethod
|
215
|
+
def check_single_context(cls, context: str) -> Tuple[bool, str]:
|
216
|
+
"""Checks if the context is valid and accessible."""
|
217
|
+
reasons = kubernetes_utils.check_port_forward_mode_dependencies(False)
|
218
|
+
if reasons is not None:
|
219
|
+
formatted = '\n'.join(
|
220
|
+
[reasons[0]] +
|
221
|
+
[f'{cls._INDENT_PREFIX}' + r for r in reasons[1:]])
|
222
|
+
return (False, formatted)
|
223
|
+
|
224
|
+
# Add ssh- prefix to the context
|
225
|
+
if not context.startswith('ssh-'):
|
226
|
+
context = f'ssh-{context}'
|
227
|
+
|
228
|
+
# Get SSH contexts
|
229
|
+
try:
|
230
|
+
existing_allowed_contexts = cls.existing_allowed_contexts()
|
231
|
+
except Exception as e: # pylint: disable=broad-except
|
232
|
+
return (False, f'Failed to get SSH contexts: {str(e)}')
|
233
|
+
|
234
|
+
if not existing_allowed_contexts:
|
235
|
+
return (False,
|
236
|
+
'No SSH Node Pools are up. Run `sky ssh up` to set up '
|
237
|
+
f'Node Pools from {SSH_NODE_POOLS_PATH}.')
|
238
|
+
|
239
|
+
if context not in existing_allowed_contexts:
|
240
|
+
return (False, f'SSH Node Pool {context} is not set up. '
|
241
|
+
f'Run `sky ssh up --infra {context}` to set up.')
|
242
|
+
|
243
|
+
# Check if the context is valid
|
244
|
+
suc, text = super()._check_single_context(context)
|
245
|
+
if not suc:
|
246
|
+
return (False, text)
|
247
|
+
|
248
|
+
return (True, 'SSH Node Pool is set up.')
|
249
|
+
|
214
250
|
@classmethod
|
215
251
|
def expand_infras(cls) -> List[str]:
|
216
252
|
return [
|
sky/clouds/vast.py
CHANGED
@@ -10,6 +10,7 @@ from sky.utils import resources_utils
|
|
10
10
|
|
11
11
|
if typing.TYPE_CHECKING:
|
12
12
|
from sky import resources as resources_lib
|
13
|
+
from sky.volumes import volume as volume_lib
|
13
14
|
|
14
15
|
|
15
16
|
@registry.CLOUD_REGISTRY.register
|
@@ -155,13 +156,15 @@ class Vast(clouds.Cloud):
|
|
155
156
|
return None
|
156
157
|
|
157
158
|
def make_deploy_resources_variables(
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
159
|
+
self,
|
160
|
+
resources: 'resources_lib.Resources',
|
161
|
+
cluster_name: resources_utils.ClusterName,
|
162
|
+
region: 'clouds.Region',
|
163
|
+
zones: Optional[List['clouds.Zone']],
|
164
|
+
num_nodes: int,
|
165
|
+
dryrun: bool = False,
|
166
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
167
|
+
) -> Dict[str, Optional[str]]:
|
165
168
|
del zones, dryrun, cluster_name, num_nodes # unused
|
166
169
|
|
167
170
|
resources = resources.assert_launchable()
|
sky/clouds/vsphere.py
CHANGED
@@ -18,6 +18,7 @@ if typing.TYPE_CHECKING:
|
|
18
18
|
|
19
19
|
# Renaming to avoid shadowing variables.
|
20
20
|
from sky import resources as resources_lib
|
21
|
+
from sky.volumes import volume as volume_lib
|
21
22
|
else:
|
22
23
|
requests = adaptors_common.LazyImport('requests')
|
23
24
|
|
@@ -184,6 +185,7 @@ class Vsphere(clouds.Cloud):
|
|
184
185
|
zones: Optional[List['clouds.Zone']],
|
185
186
|
num_nodes: int,
|
186
187
|
dryrun: bool = False,
|
188
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
187
189
|
) -> Dict[str, Optional[str]]:
|
188
190
|
# TODO get image id here.
|
189
191
|
del cluster_name, dryrun # unused
|
sky/core.py
CHANGED
@@ -76,6 +76,7 @@ def optimize(
|
|
76
76
|
for a task.
|
77
77
|
exceptions.NoCloudAccessError: if no public clouds are enabled.
|
78
78
|
"""
|
79
|
+
dag.resolve_and_validate_volumes()
|
79
80
|
# TODO: We apply the admin policy only on the first DAG optimization which
|
80
81
|
# is shown on `sky launch`. The optimizer is also invoked during failover,
|
81
82
|
# but we do not apply the admin policy there. We should apply the admin
|
@@ -1306,6 +1307,26 @@ def ssh_up(infra: Optional[str] = None, cleanup: bool = False) -> None:
|
|
1306
1307
|
)
|
1307
1308
|
|
1308
1309
|
|
1310
|
+
@usage_lib.entrypoint
|
1311
|
+
def ssh_status(context_name: str) -> Tuple[bool, str]:
|
1312
|
+
"""Check the status of an SSH Node Pool context.
|
1313
|
+
|
1314
|
+
Args:
|
1315
|
+
context_name: The SSH context name (e.g., 'ssh-my-cluster')
|
1316
|
+
|
1317
|
+
Returns:
|
1318
|
+
Tuple[bool, str]: (is_ready, reason)
|
1319
|
+
- is_ready: True if the SSH Node Pool is ready, False otherwise
|
1320
|
+
- reason: Explanation of the status
|
1321
|
+
"""
|
1322
|
+
try:
|
1323
|
+
is_ready, reason = clouds.SSH.check_single_context(context_name)
|
1324
|
+
return is_ready, reason
|
1325
|
+
except Exception as e: # pylint: disable=broad-except
|
1326
|
+
return False, ('Failed to check SSH context: '
|
1327
|
+
f'{common_utils.format_exception(e)}')
|
1328
|
+
|
1329
|
+
|
1309
1330
|
def get_all_contexts() -> List[str]:
|
1310
1331
|
"""Get all available contexts from Kubernetes and SSH clouds.
|
1311
1332
|
|
sky/dag.py
CHANGED
@@ -83,6 +83,20 @@ class Dag:
|
|
83
83
|
task.validate(skip_file_mounts=skip_file_mounts,
|
84
84
|
skip_workdir=skip_workdir)
|
85
85
|
|
86
|
+
def resolve_and_validate_volumes(self) -> None:
|
87
|
+
for task in self.tasks:
|
88
|
+
task.resolve_and_validate_volumes()
|
89
|
+
|
90
|
+
def pre_mount_volumes(self) -> None:
|
91
|
+
vol_map = {}
|
92
|
+
# Deduplicate volume mounts.
|
93
|
+
for task in self.tasks:
|
94
|
+
if task.volume_mounts is not None:
|
95
|
+
for volume_mount in task.volume_mounts:
|
96
|
+
vol_map[volume_mount.volume_name] = volume_mount
|
97
|
+
for volume_mount in vol_map.values():
|
98
|
+
volume_mount.pre_mount()
|
99
|
+
|
86
100
|
|
87
101
|
class _DagContext(threading.local):
|
88
102
|
"""A thread-local stack of Dags."""
|
sky/dashboard/out/404.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/52082cf558ec9705.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/52082cf558ec9705.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-08fdb9e6070127fc.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-9a3ce3170d2edcec.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-1be831200e60c5c0.js" defer=""></script><script src="/dashboard/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"bs6UB9V4Jq10TIZ5x-kBK","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
|
@@ -0,0 +1 @@
|
|
1
|
+
self.__BUILD_MANIFEST=function(s,c,e,a,t,u,n,r,b,i,j,k,f,o){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-6b0d9e5031b70c58.js"],"/_error":["static/chunks/pages/_error-1be831200e60c5c0.js"],"/clusters":["static/chunks/pages/clusters-f119a5630a1efd61.js"],"/clusters/[cluster]":[s,c,e,a,t,r,i,u,n,j,b,k,f,o,"static/chunks/871-3db673be3ee3750b.js","static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js"],"/clusters/[cluster]/[job]":[s,c,e,a,t,u,n,"static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js"],"/config":["static/chunks/pages/config-6b255eae088da6a3.js"],"/infra":["static/chunks/pages/infra-ee8cc4d449945d19.js"],"/infra/[context]":["static/chunks/pages/infra/[context]-b302aea4d65766bf.js"],"/jobs":["static/chunks/pages/jobs-0a5695ff3075d94a.js"],"/jobs/[job]":[s,c,e,a,t,r,u,n,b,"static/chunks/pages/jobs/[job]-e4b23128db0774cd.js"],"/users":["static/chunks/pages/users-4978cbb093e141e7.js"],"/volumes":["static/chunks/pages/volumes-476b670ef33d1ecd.js"],"/workspace/new":["static/chunks/pages/workspace/new-5b59bce9eb208d84.js"],"/workspaces":["static/chunks/pages/workspaces-50e230828730cfb3.js"],"/workspaces/[name]":[s,c,e,a,t,r,i,u,n,j,b,k,f,o,"static/chunks/141-fa5a20cbf401b351.js","static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/config","/infra","/infra/[context]","/jobs","/jobs/[job]","/users","/volumes","/workspace/new","/workspaces","/workspaces/[name]"]}}("static/chunks/616-d6128fa9e7cae6e6.js","static/chunks/230-d6e363362017ff3a.js","static/chunks/799-3625946b2ec2eb30.js","static/chunks/664-047bc03493fda379.js","static/chunks/804-4c9fc53aa74bc191.js","static/chunks/989-db34c16ad7ea6155.js","static/chunks/470-92dd1614396389be.js","static/chunks/798-c0525dc3f21e488d.js","static/chunks/969-d3a0b53f728d280a.js","static/chunks/947-6620842ef80ae879.js","static/chunks/990-0ad5ea1699e03ee8.js","static/chunks/43-36177d00f6956ab2.js","static/chunks/973-81b2d057178adb76.js","static/chunks/938-068520cc11738deb.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|