skypilot-nightly 1.0.0.dev20250624__py3-none-any.whl → 1.0.0.dev20250625__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +26 -11
- sky/backends/cloud_vm_ray_backend.py +16 -5
- sky/client/cli/command.py +222 -4
- sky/client/sdk.py +110 -82
- sky/clouds/aws.py +10 -7
- sky/clouds/azure.py +10 -7
- sky/clouds/cloud.py +2 -0
- sky/clouds/cudo.py +2 -0
- sky/clouds/do.py +10 -7
- sky/clouds/fluidstack.py +2 -0
- sky/clouds/gcp.py +10 -7
- sky/clouds/hyperbolic.py +10 -7
- sky/clouds/ibm.py +2 -0
- sky/clouds/kubernetes.py +26 -9
- sky/clouds/lambda_cloud.py +10 -7
- sky/clouds/nebius.py +10 -7
- sky/clouds/oci.py +10 -7
- sky/clouds/paperspace.py +10 -7
- sky/clouds/runpod.py +10 -7
- sky/clouds/scp.py +10 -7
- sky/clouds/vast.py +10 -7
- sky/clouds/vsphere.py +2 -0
- sky/core.py +1 -0
- sky/dag.py +14 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/ZWdSYkqVe3WjnFR8ocqoG/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
- sky/dashboard/out/_next/static/chunks/310.2671028c20e892c7.js +16 -0
- sky/dashboard/out/_next/static/chunks/{37-4650f214e2119168.js → 37-1f1e94f5a561202a.js} +2 -2
- sky/dashboard/out/_next/static/chunks/42.bc85e5b1a4debf22.js +6 -0
- sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
- sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
- sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
- sky/dashboard/out/_next/static/chunks/66-66ae330df2d3c1c7.js +1 -0
- sky/dashboard/out/_next/static/chunks/682.00e56a220dd26fe1.js +6 -0
- sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
- sky/dashboard/out/_next/static/chunks/{856-bfddc18e16f3873c.js → 856-cdf66268ec878d0c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce31493da9747ef4.js → _app-0ef7418d1a3822f3.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-32ce4f49f2261f55.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-4aa031d1f42723d8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-3102d02a188f04b3.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-6f1e02e31eecb5ce.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-fd5dc8a91bd9169a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-26da173e20af16e4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-ce29e7420385563d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-09ae0f6f972aa871.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-ecc5a7003776cfa7.js → [name]-0b4c662a25e4747a.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-862b120406461b10.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-6133dc1e928bd0b5.js +1 -0
- sky/dashboard/out/_next/static/css/b23cb0257bf96c51.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage_utils.py +2 -4
- sky/exceptions.py +15 -0
- sky/execution.py +5 -0
- sky/global_user_state.py +129 -0
- sky/jobs/client/sdk.py +13 -11
- sky/jobs/server/core.py +4 -0
- sky/models.py +16 -0
- sky/provision/__init__.py +26 -0
- sky/provision/kubernetes/__init__.py +3 -0
- sky/provision/kubernetes/instance.py +38 -77
- sky/provision/kubernetes/utils.py +52 -2
- sky/provision/kubernetes/volume.py +147 -0
- sky/resources.py +20 -76
- sky/serve/client/sdk.py +13 -13
- sky/serve/server/core.py +5 -1
- sky/server/common.py +40 -5
- sky/server/constants.py +5 -1
- sky/server/metrics.py +105 -0
- sky/server/requests/executor.py +30 -14
- sky/server/requests/payloads.py +16 -0
- sky/server/requests/requests.py +35 -1
- sky/server/rest.py +152 -0
- sky/server/server.py +66 -16
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +8 -3
- sky/server/uvicorn.py +153 -13
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +14 -3
- sky/task.py +141 -18
- sky/templates/kubernetes-ray.yml.j2 +30 -1
- sky/users/permission.py +2 -0
- sky/utils/context.py +3 -1
- sky/utils/resources_utils.py +66 -0
- sky/utils/rich_utils.py +6 -0
- sky/utils/schemas.py +146 -3
- sky/utils/status_lib.py +10 -0
- sky/utils/validator.py +11 -1
- sky/volumes/__init__.py +0 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +64 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +199 -0
- sky/volumes/server/server.py +85 -0
- sky/volumes/utils.py +158 -0
- sky/volumes/volume.py +198 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/RECORD +123 -108
- sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
- sky/dashboard/out/_next/static/chunks/42.2273cc2415291ceb.js +0 -6
- sky/dashboard/out/_next/static/chunks/470-1494c899266cf5c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
- sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
- sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
- sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
- sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-4e065c812a52460b.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-520ec1ab65e2f2a4.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters-7e9736af1c6345a6.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/config-e4f473661889e7cd.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-00fd23b9577492ca.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-8a4bf7370d4d9bb7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-171c27f4ca94861c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-55e5bcb16d563231.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-c9f4d785cdaa52d8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-f00cba35691483b1.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-c85998e6a5722f21.js +0 -1
- sky/dashboard/out/_next/static/css/6ab927686b492a4a.css +0 -3
- sky/dashboard/out/_next/static/zsALxITkbP8J8NVwSDwMo/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{zsALxITkbP8J8NVwSDwMo → ZWdSYkqVe3WjnFR8ocqoG}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{843-bde186946d353355.js → 843-07d25a7e64462fd8.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{938-ce7991c156584b06.js → 938-068520cc11738deb.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{973-56412c7976b4655b.js → 973-5b5019ba333e8d62.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/top_level.txt +0 -0
sky/utils/rich_utils.py
CHANGED
@@ -7,6 +7,7 @@ import threading
|
|
7
7
|
import typing
|
8
8
|
from typing import Callable, Iterator, Optional, Tuple, Union
|
9
9
|
|
10
|
+
from sky import exceptions
|
10
11
|
from sky.adaptors import common as adaptors_common
|
11
12
|
from sky.utils import annotations
|
12
13
|
from sky.utils import context
|
@@ -58,6 +59,7 @@ class Control(enum.Enum):
|
|
58
59
|
EXIT = 'rich_exit'
|
59
60
|
UPDATE = 'rich_update'
|
60
61
|
HEARTBEAT = 'heartbeat'
|
62
|
+
RETRY = 'retry'
|
61
63
|
|
62
64
|
def encode(self, msg: str) -> str:
|
63
65
|
return f'<{self.value}>{msg}</{self.value}>'
|
@@ -365,6 +367,10 @@ def decode_rich_status(
|
|
365
367
|
yield line
|
366
368
|
continue
|
367
369
|
|
370
|
+
if control == Control.RETRY:
|
371
|
+
raise exceptions.ServerTemporarilyUnavailableError(
|
372
|
+
'The server is temporarily unavailable. Please try '
|
373
|
+
'again.')
|
368
374
|
# control is not None, i.e. it is a rich status control message.
|
369
375
|
if threading.current_thread() is not threading.main_thread():
|
370
376
|
yield None
|
sky/utils/schemas.py
CHANGED
@@ -70,8 +70,36 @@ _AUTOSTOP_SCHEMA = {
|
|
70
70
|
}
|
71
71
|
|
72
72
|
|
73
|
-
|
74
|
-
|
73
|
+
# Note: This is similar to _get_infra_pattern()
|
74
|
+
# but without the wildcard patterns.
|
75
|
+
def _get_volume_infra_pattern():
|
76
|
+
# Building the regex pattern for the infra field
|
77
|
+
# Format: cloud[/region[/zone]] or wildcards or kubernetes context
|
78
|
+
# Match any cloud name (case insensitive)
|
79
|
+
all_clouds = list(constants.ALL_CLOUDS)
|
80
|
+
all_clouds.remove('kubernetes')
|
81
|
+
cloud_pattern = f'(?i:({"|".join(all_clouds)}))'
|
82
|
+
|
83
|
+
# Optional /region followed by optional /zone
|
84
|
+
# /[^/]+ matches a slash followed by any characters except slash (region or
|
85
|
+
# zone name)
|
86
|
+
# The outer (?:...)? makes the entire region/zone part optional
|
87
|
+
region_zone_pattern = '(?:/[^/]+(?:/[^/]+)?)?'
|
88
|
+
|
89
|
+
# Kubernetes specific pattern - matches:
|
90
|
+
# 1. Just the word "kubernetes" or "k8s" by itself
|
91
|
+
# 2. "k8s/" or "kubernetes/" followed by any context name (which may contain
|
92
|
+
# slashes)
|
93
|
+
kubernetes_pattern = '(?i:kubernetes|k8s)(?:/.+)?'
|
94
|
+
|
95
|
+
# Combine all patterns with alternation (|)
|
96
|
+
# ^ marks start of string, $ marks end of string
|
97
|
+
infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
|
98
|
+
f'{kubernetes_pattern})$')
|
99
|
+
return infra_pattern
|
100
|
+
|
101
|
+
|
102
|
+
def _get_infra_pattern():
|
75
103
|
# Building the regex pattern for the infra field
|
76
104
|
# Format: cloud[/region[/zone]] or wildcards or kubernetes context
|
77
105
|
# Match any cloud name (case insensitive)
|
@@ -103,7 +131,11 @@ def _get_single_resources_schema():
|
|
103
131
|
infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
|
104
132
|
f'{wildcard_cloud}{wildcard_with_region}|'
|
105
133
|
f'{kubernetes_pattern})$')
|
134
|
+
return infra_pattern
|
106
135
|
|
136
|
+
|
137
|
+
def _get_single_resources_schema():
|
138
|
+
"""Schema for a single resource in a resources list."""
|
107
139
|
return {
|
108
140
|
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
109
141
|
'type': 'object',
|
@@ -133,7 +165,7 @@ def _get_single_resources_schema():
|
|
133
165
|
# 3. Kubernetes patterns - e.g. "kubernetes/my-context",
|
134
166
|
# "k8s/context-name",
|
135
167
|
# "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
|
136
|
-
'pattern':
|
168
|
+
'pattern': _get_infra_pattern(),
|
137
169
|
},
|
138
170
|
'cpus': {
|
139
171
|
'anyOf': [{
|
@@ -383,6 +415,66 @@ def get_resources_schema():
|
|
383
415
|
}
|
384
416
|
|
385
417
|
|
418
|
+
def get_volume_schema():
|
419
|
+
# pylint: disable=import-outside-toplevel
|
420
|
+
from sky.volumes import volume
|
421
|
+
|
422
|
+
return {
|
423
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
424
|
+
'type': 'object',
|
425
|
+
'required': ['name', 'type', 'infra'],
|
426
|
+
'additionalProperties': False,
|
427
|
+
'properties': {
|
428
|
+
'name': {
|
429
|
+
'type': 'string',
|
430
|
+
},
|
431
|
+
'type': {
|
432
|
+
'type': 'string',
|
433
|
+
'case_sensitive_enum': [
|
434
|
+
type.value for type in volume.VolumeType
|
435
|
+
],
|
436
|
+
},
|
437
|
+
'infra': {
|
438
|
+
'type': 'string',
|
439
|
+
'description': ('Infrastructure specification in format: '
|
440
|
+
'cloud[/region[/zone]].'),
|
441
|
+
# Pattern validates:
|
442
|
+
# 1. cloud[/region[/zone]] - e.g. "aws", "aws/us-east-1",
|
443
|
+
# "aws/us-east-1/us-east-1a"
|
444
|
+
# 2. Kubernetes patterns - e.g. "kubernetes/my-context",
|
445
|
+
# "k8s/context-name",
|
446
|
+
# "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
|
447
|
+
'pattern': _get_volume_infra_pattern(),
|
448
|
+
},
|
449
|
+
'size': {
|
450
|
+
'type': 'string',
|
451
|
+
'pattern': constants.MEMORY_SIZE_PATTERN,
|
452
|
+
},
|
453
|
+
'resource_name': {
|
454
|
+
'type': 'string',
|
455
|
+
},
|
456
|
+
'config': {
|
457
|
+
'type': 'object',
|
458
|
+
'required': [],
|
459
|
+
'properties': {
|
460
|
+
'storage_class_name': {
|
461
|
+
'type': 'string',
|
462
|
+
},
|
463
|
+
'access_mode': {
|
464
|
+
'type': 'string',
|
465
|
+
'case_sensitive_enum': [
|
466
|
+
type.value for type in volume.VolumeAccessMode
|
467
|
+
],
|
468
|
+
},
|
469
|
+
'namespace': {
|
470
|
+
'type': 'string',
|
471
|
+
},
|
472
|
+
},
|
473
|
+
},
|
474
|
+
}
|
475
|
+
}
|
476
|
+
|
477
|
+
|
386
478
|
def get_storage_schema():
|
387
479
|
# pylint: disable=import-outside-toplevel
|
388
480
|
from sky.data import storage
|
@@ -457,6 +549,49 @@ def get_storage_schema():
|
|
457
549
|
}
|
458
550
|
|
459
551
|
|
552
|
+
def get_volume_mount_schema():
|
553
|
+
"""Schema for volume mount object in task config (internal use only)."""
|
554
|
+
return {
|
555
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
556
|
+
'type': 'object',
|
557
|
+
'required': [],
|
558
|
+
'additionalProperties': False,
|
559
|
+
'properties': {
|
560
|
+
'path': {
|
561
|
+
'type': 'string',
|
562
|
+
},
|
563
|
+
'volume_name': {
|
564
|
+
'type': 'string',
|
565
|
+
},
|
566
|
+
'volume_config': {
|
567
|
+
'type': 'object',
|
568
|
+
'required': [],
|
569
|
+
'additionalProperties': True,
|
570
|
+
'properties': {
|
571
|
+
'cloud': {
|
572
|
+
'type': 'string',
|
573
|
+
'case_insensitive_enum': list(constants.ALL_CLOUDS)
|
574
|
+
},
|
575
|
+
'region': {
|
576
|
+
'anyOf': [{
|
577
|
+
'type': 'string'
|
578
|
+
}, {
|
579
|
+
'type': 'null'
|
580
|
+
}]
|
581
|
+
},
|
582
|
+
'zone': {
|
583
|
+
'anyOf': [{
|
584
|
+
'type': 'string'
|
585
|
+
}, {
|
586
|
+
'type': 'null'
|
587
|
+
}]
|
588
|
+
},
|
589
|
+
},
|
590
|
+
}
|
591
|
+
}
|
592
|
+
}
|
593
|
+
|
594
|
+
|
460
595
|
def get_service_schema():
|
461
596
|
"""Schema for top-level `service:` field (for SkyServe)."""
|
462
597
|
# To avoid circular imports, only import when needed.
|
@@ -723,6 +858,14 @@ def get_task_schema():
|
|
723
858
|
'config': _filter_schema(
|
724
859
|
get_config_schema(),
|
725
860
|
constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK),
|
861
|
+
# volumes config is validated separately using get_volume_schema
|
862
|
+
'volumes': {
|
863
|
+
'type': 'object',
|
864
|
+
},
|
865
|
+
'volume_mounts': {
|
866
|
+
'type': 'array',
|
867
|
+
'items': get_volume_mount_schema(),
|
868
|
+
},
|
726
869
|
**_experimental_task_schema(),
|
727
870
|
}
|
728
871
|
}
|
sky/utils/status_lib.py
CHANGED
@@ -54,3 +54,13 @@ class StorageStatus(enum.Enum):
|
|
54
54
|
|
55
55
|
# Finished uploading, in terminal state
|
56
56
|
READY = 'READY'
|
57
|
+
|
58
|
+
|
59
|
+
class VolumeStatus(enum.Enum):
|
60
|
+
"""Volume status as recorded in table 'volumes'."""
|
61
|
+
|
62
|
+
# Volume is ready to be used
|
63
|
+
READY = 'READY'
|
64
|
+
|
65
|
+
# Volume is being used
|
66
|
+
IN_USE = 'IN_USE'
|
sky/utils/validator.py
CHANGED
@@ -14,9 +14,19 @@ def case_insensitive_enum(validator, enums, instance, schema):
|
|
14
14
|
f'{instance!r} is not one of {enums!r}')
|
15
15
|
|
16
16
|
|
17
|
+
def case_sensitive_enum(validator, enums, instance, schema):
|
18
|
+
del validator, schema # Unused.
|
19
|
+
if instance not in enums:
|
20
|
+
yield jsonschema.ValidationError(
|
21
|
+
f'{instance!r} is not one of {enums!r}')
|
22
|
+
|
23
|
+
|
17
24
|
# Move this to a function to delay initialization
|
18
25
|
def get_schema_validator():
|
19
26
|
"""Get the schema validator class, initializing it only when needed."""
|
20
27
|
return jsonschema.validators.extend(
|
21
28
|
jsonschema.Draft7Validator,
|
22
|
-
validators={
|
29
|
+
validators={
|
30
|
+
'case_insensitive_enum': case_insensitive_enum,
|
31
|
+
'case_sensitive_enum': case_sensitive_enum
|
32
|
+
})
|
sky/volumes/__init__.py
ADDED
File without changes
|
File without changes
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""SDK functions for managed jobs."""
|
2
|
+
import json
|
3
|
+
import typing
|
4
|
+
from typing import List
|
5
|
+
|
6
|
+
from sky import sky_logging
|
7
|
+
from sky.adaptors import common as adaptors_common
|
8
|
+
from sky.server import common as server_common
|
9
|
+
from sky.server.requests import payloads
|
10
|
+
from sky.usage import usage_lib
|
11
|
+
from sky.utils import annotations
|
12
|
+
from sky.utils import context
|
13
|
+
from sky.volumes import volume as volume_lib
|
14
|
+
|
15
|
+
if typing.TYPE_CHECKING:
|
16
|
+
import requests
|
17
|
+
else:
|
18
|
+
requests = adaptors_common.LazyImport('requests')
|
19
|
+
|
20
|
+
logger = sky_logging.init_logger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
@context.contextual
|
24
|
+
@usage_lib.entrypoint
|
25
|
+
@server_common.check_server_healthy_or_start
|
26
|
+
@annotations.client_api
|
27
|
+
def apply(volume: volume_lib.Volume) -> server_common.RequestId:
|
28
|
+
"""Creates or registers a volume.
|
29
|
+
"""
|
30
|
+
body = payloads.VolumeApplyBody(name=volume.name,
|
31
|
+
volume_type=volume.type,
|
32
|
+
cloud=volume.cloud,
|
33
|
+
region=volume.region,
|
34
|
+
zone=volume.zone,
|
35
|
+
size=volume.size,
|
36
|
+
config=volume.config)
|
37
|
+
response = requests.post(f'{server_common.get_server_url()}/volumes/apply',
|
38
|
+
json=json.loads(body.model_dump_json()),
|
39
|
+
cookies=server_common.get_api_cookie_jar())
|
40
|
+
return server_common.get_request_id(response)
|
41
|
+
|
42
|
+
|
43
|
+
@context.contextual
|
44
|
+
@usage_lib.entrypoint
|
45
|
+
@server_common.check_server_healthy_or_start
|
46
|
+
@annotations.client_api
|
47
|
+
def ls() -> server_common.RequestId:
|
48
|
+
"""Lists all volumes."""
|
49
|
+
response = requests.get(f'{server_common.get_server_url()}/volumes',
|
50
|
+
cookies=server_common.get_api_cookie_jar())
|
51
|
+
return server_common.get_request_id(response)
|
52
|
+
|
53
|
+
|
54
|
+
@context.contextual
|
55
|
+
@usage_lib.entrypoint
|
56
|
+
@server_common.check_server_healthy_or_start
|
57
|
+
@annotations.client_api
|
58
|
+
def delete(names: List[str]) -> server_common.RequestId:
|
59
|
+
"""Deletes a volume."""
|
60
|
+
body = payloads.VolumeDeleteBody(names=names)
|
61
|
+
response = requests.post(f'{server_common.get_server_url()}/volumes/delete',
|
62
|
+
json=json.loads(body.model_dump_json()),
|
63
|
+
cookies=server_common.get_api_cookie_jar())
|
64
|
+
return server_common.get_request_id(response)
|
File without changes
|
@@ -0,0 +1,199 @@
|
|
1
|
+
"""Volume management core."""
|
2
|
+
|
3
|
+
import contextlib
|
4
|
+
import os
|
5
|
+
from typing import Any, Dict, Generator, List, Optional
|
6
|
+
import uuid
|
7
|
+
|
8
|
+
import filelock
|
9
|
+
|
10
|
+
import sky
|
11
|
+
from sky import global_user_state
|
12
|
+
from sky import models
|
13
|
+
from sky import provision
|
14
|
+
from sky import sky_logging
|
15
|
+
from sky.utils import common_utils
|
16
|
+
from sky.utils import status_lib
|
17
|
+
|
18
|
+
logger = sky_logging.init_logger(__name__)
|
19
|
+
|
20
|
+
# Filelocks for the storage management.
|
21
|
+
VOLUME_LOCK_PATH = os.path.expanduser('~/.sky/.{volume_name}.lock')
|
22
|
+
VOLUME_LOCK_TIMEOUT_SECONDS = 20
|
23
|
+
|
24
|
+
|
25
|
+
def volume_refresh():
|
26
|
+
"""Refreshes the volume status."""
|
27
|
+
volumes = global_user_state.get_volumes()
|
28
|
+
for volume in volumes:
|
29
|
+
volume_name = volume.get('name')
|
30
|
+
config = volume.get('handle')
|
31
|
+
if config is None:
|
32
|
+
logger.warning(f'Volume {volume_name} has no handle.'
|
33
|
+
'Skipping status refresh...')
|
34
|
+
continue
|
35
|
+
cloud = config.cloud
|
36
|
+
usedby = provision.get_volume_usedby(cloud, config)
|
37
|
+
with _volume_lock(volume_name):
|
38
|
+
latest_volume = global_user_state.get_volume_by_name(volume_name)
|
39
|
+
if latest_volume is None:
|
40
|
+
logger.warning(f'Volume {volume_name} not found.')
|
41
|
+
continue
|
42
|
+
status = latest_volume.get('status')
|
43
|
+
if not usedby:
|
44
|
+
if status != status_lib.VolumeStatus.READY:
|
45
|
+
logger.info(f'Update volume {volume_name} '
|
46
|
+
f'status to READY')
|
47
|
+
global_user_state.update_volume_status(
|
48
|
+
volume_name, status=status_lib.VolumeStatus.READY)
|
49
|
+
else:
|
50
|
+
if status != status_lib.VolumeStatus.IN_USE:
|
51
|
+
logger.info(f'Update volume {volume_name} '
|
52
|
+
f'status to IN_USE, usedby: {usedby}')
|
53
|
+
global_user_state.update_volume_status(
|
54
|
+
volume_name, status=status_lib.VolumeStatus.IN_USE)
|
55
|
+
|
56
|
+
|
57
|
+
def volume_list() -> List[Dict[str, Any]]:
|
58
|
+
"""Gets the volumes.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
[
|
62
|
+
{
|
63
|
+
'name': str,
|
64
|
+
'type': str,
|
65
|
+
'launched_at': int timestamp of creation,
|
66
|
+
'cloud': str,
|
67
|
+
'region': str,
|
68
|
+
'zone': str,
|
69
|
+
'size': str,
|
70
|
+
'config': Dict[str, Any],
|
71
|
+
'name_on_cloud': str,
|
72
|
+
'user_hash': str,
|
73
|
+
'workspace': str,
|
74
|
+
'last_attached_at': int timestamp of last attachment,
|
75
|
+
'last_use': last command,
|
76
|
+
'status': sky.VolumeStatus,
|
77
|
+
}
|
78
|
+
]
|
79
|
+
"""
|
80
|
+
volumes = global_user_state.get_volumes()
|
81
|
+
all_users = global_user_state.get_all_users()
|
82
|
+
user_map = {user.id: user.name for user in all_users}
|
83
|
+
records = []
|
84
|
+
for volume in volumes:
|
85
|
+
volume_name = volume.get('name')
|
86
|
+
record = {
|
87
|
+
'name': volume_name,
|
88
|
+
'launched_at': volume.get('launched_at'),
|
89
|
+
'user_hash': volume.get('user_hash'),
|
90
|
+
'user_name': user_map.get(volume.get('user_hash'), ''),
|
91
|
+
'workspace': volume.get('workspace'),
|
92
|
+
'last_attached_at': volume.get('last_attached_at'),
|
93
|
+
'last_use': volume.get('last_use'),
|
94
|
+
}
|
95
|
+
status = volume.get('status')
|
96
|
+
if status is not None:
|
97
|
+
record['status'] = status.value
|
98
|
+
else:
|
99
|
+
record['status'] = ''
|
100
|
+
config = volume.get('handle')
|
101
|
+
if config is None:
|
102
|
+
logger.warning(f'Volume {volume_name} has no handle.')
|
103
|
+
continue
|
104
|
+
record['type'] = config.type
|
105
|
+
record['cloud'] = config.cloud
|
106
|
+
record['region'] = config.region
|
107
|
+
record['zone'] = config.zone
|
108
|
+
record['size'] = config.size
|
109
|
+
record['config'] = config.config
|
110
|
+
record['name_on_cloud'] = config.name_on_cloud
|
111
|
+
records.append(record)
|
112
|
+
return records
|
113
|
+
|
114
|
+
|
115
|
+
def volume_delete(names: List[str]) -> None:
|
116
|
+
"""Deletes volumes.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
names: List of volume names to delete.
|
120
|
+
|
121
|
+
Raises:
|
122
|
+
ValueError: If the volume does not exist
|
123
|
+
or is in use or has no handle.
|
124
|
+
"""
|
125
|
+
for name in names:
|
126
|
+
volume = global_user_state.get_volume_by_name(name)
|
127
|
+
if volume is None:
|
128
|
+
raise ValueError(f'Volume {name} not found.')
|
129
|
+
if volume.get('status') == status_lib.VolumeStatus.IN_USE:
|
130
|
+
raise ValueError(f'Volume {name} is in use.')
|
131
|
+
config = volume.get('handle')
|
132
|
+
if config is None:
|
133
|
+
raise ValueError(f'Volume {name} has no handle.')
|
134
|
+
logger.debug(f'Deleting volume {name} with config {config}')
|
135
|
+
cloud = config.cloud
|
136
|
+
with _volume_lock(name):
|
137
|
+
provision.delete_volume(cloud, config)
|
138
|
+
global_user_state.delete_volume(name)
|
139
|
+
|
140
|
+
|
141
|
+
def volume_apply(name: str, volume_type: str, cloud: str, region: Optional[str],
|
142
|
+
zone: Optional[str], size: Optional[str],
|
143
|
+
config: Dict[str, Any]) -> None:
|
144
|
+
"""Creates or registers a volume.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
name: The name of the volume.
|
148
|
+
volume_type: The type of the volume.
|
149
|
+
cloud: The cloud of the volume.
|
150
|
+
region: The region of the volume.
|
151
|
+
zone: The zone of the volume.
|
152
|
+
size: The size of the volume.
|
153
|
+
config: The configuration of the volume.
|
154
|
+
|
155
|
+
"""
|
156
|
+
# Reuse the method for cluster name on cloud to
|
157
|
+
# generate the storage name on cloud.
|
158
|
+
cloud_obj = sky.CLOUD_REGISTRY.from_str(cloud)
|
159
|
+
assert cloud_obj is not None
|
160
|
+
name_uuid = str(uuid.uuid4())[:6]
|
161
|
+
name_on_cloud = common_utils.make_cluster_name_on_cloud(
|
162
|
+
name, max_length=cloud_obj.max_cluster_name_length())
|
163
|
+
name_on_cloud += '-' + name_uuid
|
164
|
+
config = models.VolumeConfig(
|
165
|
+
name=name,
|
166
|
+
type=volume_type,
|
167
|
+
cloud=str(cloud_obj),
|
168
|
+
region=region,
|
169
|
+
zone=zone,
|
170
|
+
size=size,
|
171
|
+
config=config,
|
172
|
+
name_on_cloud=name_on_cloud,
|
173
|
+
)
|
174
|
+
logger.debug(
|
175
|
+
f'Creating volume {name} on cloud {cloud} with config {config}')
|
176
|
+
with _volume_lock(name):
|
177
|
+
current_volume = global_user_state.get_volume_by_name(name)
|
178
|
+
if current_volume is not None:
|
179
|
+
logger.info(f'Volume {name} already exists.')
|
180
|
+
return
|
181
|
+
config = provision.apply_volume(cloud, config)
|
182
|
+
global_user_state.add_volume(name, config,
|
183
|
+
status_lib.VolumeStatus.READY)
|
184
|
+
|
185
|
+
|
186
|
+
@contextlib.contextmanager
|
187
|
+
def _volume_lock(volume_name: str) -> Generator[None, None, None]:
|
188
|
+
"""Context manager for volume lock."""
|
189
|
+
try:
|
190
|
+
with filelock.FileLock(VOLUME_LOCK_PATH.format(volume_name=volume_name),
|
191
|
+
VOLUME_LOCK_TIMEOUT_SECONDS):
|
192
|
+
yield
|
193
|
+
except filelock.Timeout as e:
|
194
|
+
raise RuntimeError(
|
195
|
+
f'Failed to update user due to a timeout '
|
196
|
+
f'when trying to acquire the lock at '
|
197
|
+
f'{VOLUME_LOCK_PATH.format(volume_name=volume_name)}. '
|
198
|
+
'Please try again or manually remove the lock '
|
199
|
+
f'file if you believe it is stale.') from e
|
@@ -0,0 +1,85 @@
|
|
1
|
+
"""REST API for storage management."""
|
2
|
+
|
3
|
+
import fastapi
|
4
|
+
|
5
|
+
import sky
|
6
|
+
from sky import clouds
|
7
|
+
from sky import sky_logging
|
8
|
+
from sky.server.requests import executor
|
9
|
+
from sky.server.requests import payloads
|
10
|
+
from sky.server.requests import requests as requests_lib
|
11
|
+
from sky.volumes import volume
|
12
|
+
from sky.volumes.server import core
|
13
|
+
|
14
|
+
logger = sky_logging.init_logger(__name__)
|
15
|
+
|
16
|
+
router = fastapi.APIRouter()
|
17
|
+
|
18
|
+
|
19
|
+
@router.get('')
|
20
|
+
async def volume_list(request: fastapi.Request) -> None:
|
21
|
+
"""Gets the volumes."""
|
22
|
+
executor.schedule_request(
|
23
|
+
request_id=request.state.request_id,
|
24
|
+
request_name='volume_list',
|
25
|
+
request_body=payloads.RequestBody(),
|
26
|
+
func=core.volume_list,
|
27
|
+
schedule_type=requests_lib.ScheduleType.SHORT,
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
@router.post('/delete')
|
32
|
+
async def volume_delete(request: fastapi.Request,
|
33
|
+
volume_delete_body: payloads.VolumeDeleteBody) -> None:
|
34
|
+
"""Deletes a volume."""
|
35
|
+
executor.schedule_request(
|
36
|
+
request_id=request.state.request_id,
|
37
|
+
request_name='volume_delete',
|
38
|
+
request_body=volume_delete_body,
|
39
|
+
func=core.volume_delete,
|
40
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
41
|
+
)
|
42
|
+
|
43
|
+
|
44
|
+
@router.post('/apply')
|
45
|
+
async def volume_apply(request: fastapi.Request,
|
46
|
+
volume_apply_body: payloads.VolumeApplyBody) -> None:
|
47
|
+
"""Creates or registers a volume."""
|
48
|
+
volume_cloud = volume_apply_body.cloud
|
49
|
+
volume_type = volume_apply_body.volume_type
|
50
|
+
volume_config = volume_apply_body.config
|
51
|
+
|
52
|
+
supported_volume_types = [
|
53
|
+
volume_type.value for volume_type in volume.VolumeType
|
54
|
+
]
|
55
|
+
if volume_type not in supported_volume_types:
|
56
|
+
raise fastapi.HTTPException(
|
57
|
+
status_code=400, detail=f'Invalid volume type: {volume_type}')
|
58
|
+
cloud = sky.CLOUD_REGISTRY.from_str(volume_cloud)
|
59
|
+
if cloud is None:
|
60
|
+
raise fastapi.HTTPException(status_code=400,
|
61
|
+
detail=f'Invalid cloud: {volume_cloud}')
|
62
|
+
if volume_type == volume.VolumeType.PVC.value:
|
63
|
+
if not cloud.is_same_cloud(clouds.Kubernetes()):
|
64
|
+
raise fastapi.HTTPException(
|
65
|
+
status_code=400,
|
66
|
+
detail='PVC storage is only supported on Kubernetes')
|
67
|
+
supported_access_modes = [
|
68
|
+
access_mode.value for access_mode in volume.VolumeAccessMode
|
69
|
+
]
|
70
|
+
if volume_config is None:
|
71
|
+
volume_config = {}
|
72
|
+
access_mode = volume_config.get('access_mode')
|
73
|
+
if access_mode is None:
|
74
|
+
volume_config[
|
75
|
+
'access_mode'] = volume.VolumeAccessMode.READ_WRITE_ONCE.value
|
76
|
+
elif access_mode not in supported_access_modes:
|
77
|
+
raise fastapi.HTTPException(
|
78
|
+
status_code=400, detail=f'Invalid access mode: {access_mode}')
|
79
|
+
executor.schedule_request(
|
80
|
+
request_id=request.state.request_id,
|
81
|
+
request_name='volume_apply',
|
82
|
+
request_body=volume_apply_body,
|
83
|
+
func=core.volume_apply,
|
84
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
85
|
+
)
|