skypilot-nightly 1.0.0.dev20250624__py3-none-any.whl → 1.0.0.dev20250626__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +1 -6
- sky/backends/backend_utils.py +26 -11
- sky/backends/cloud_vm_ray_backend.py +16 -5
- sky/client/cli/command.py +232 -9
- sky/client/sdk.py +195 -91
- sky/clouds/aws.py +10 -7
- sky/clouds/azure.py +10 -7
- sky/clouds/cloud.py +2 -0
- sky/clouds/cudo.py +2 -0
- sky/clouds/do.py +10 -7
- sky/clouds/fluidstack.py +2 -0
- sky/clouds/gcp.py +10 -7
- sky/clouds/hyperbolic.py +10 -7
- sky/clouds/ibm.py +2 -0
- sky/clouds/kubernetes.py +26 -9
- sky/clouds/lambda_cloud.py +10 -7
- sky/clouds/nebius.py +10 -7
- sky/clouds/oci.py +10 -7
- sky/clouds/paperspace.py +10 -7
- sky/clouds/runpod.py +10 -7
- sky/clouds/scp.py +10 -7
- sky/clouds/ssh.py +36 -0
- sky/clouds/vast.py +10 -7
- sky/clouds/vsphere.py +2 -0
- sky/core.py +21 -0
- sky/dag.py +14 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/141-fa5a20cbf401b351.js +11 -0
- sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
- sky/dashboard/out/_next/static/chunks/25.76c246239df93d50.js +6 -0
- sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +1 -0
- sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +1 -0
- sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
- sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
- sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
- sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +16 -0
- sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
- sky/dashboard/out/_next/static/chunks/785.dc2686c3c1235554.js +1 -0
- sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +6 -0
- sky/dashboard/out/_next/static/chunks/875.52c962183328b3f2.js +25 -0
- sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +1 -0
- sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +1 -0
- sky/dashboard/out/_next/static/chunks/984.e8bac186a24e5178.js +1 -0
- sky/dashboard/out/_next/static/chunks/990-0ad5ea1699e03ee8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce31493da9747ef4.js → _app-9a3ce3170d2edcec.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/{clusters-7e9736af1c6345a6.js → clusters-f119a5630a1efd61.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/config-6b255eae088da6a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-b302aea4d65766bf.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-ee8cc4d449945d19.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-0a5695ff3075d94a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-4978cbb093e141e7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/{new-31aa8bdcb7592635.js → new-5b59bce9eb208d84.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-50e230828730cfb3.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-08fdb9e6070127fc.js +1 -0
- sky/dashboard/out/_next/static/css/52082cf558ec9705.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage_utils.py +2 -4
- sky/exceptions.py +15 -0
- sky/execution.py +5 -0
- sky/global_user_state.py +129 -0
- sky/jobs/client/sdk.py +13 -11
- sky/jobs/server/core.py +4 -0
- sky/models.py +16 -0
- sky/provision/__init__.py +26 -0
- sky/provision/kubernetes/__init__.py +3 -0
- sky/provision/kubernetes/instance.py +38 -77
- sky/provision/kubernetes/utils.py +70 -4
- sky/provision/kubernetes/volume.py +147 -0
- sky/resources.py +20 -76
- sky/serve/client/sdk.py +13 -13
- sky/serve/server/core.py +5 -1
- sky/server/common.py +40 -5
- sky/server/constants.py +5 -1
- sky/server/metrics.py +105 -0
- sky/server/requests/executor.py +30 -14
- sky/server/requests/payloads.py +16 -0
- sky/server/requests/requests.py +35 -1
- sky/server/rest.py +153 -0
- sky/server/server.py +70 -43
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +8 -3
- sky/server/uvicorn.py +153 -13
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +19 -3
- sky/skypilot_config.py +3 -0
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +133 -0
- sky/ssh_node_pools/server.py +232 -0
- sky/task.py +141 -18
- sky/templates/kubernetes-ray.yml.j2 +30 -1
- sky/users/permission.py +2 -0
- sky/utils/context.py +3 -1
- sky/utils/kubernetes/deploy_remote_cluster.py +12 -185
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/resources_utils.py +66 -0
- sky/utils/rich_utils.py +6 -0
- sky/utils/schemas.py +146 -3
- sky/utils/status_lib.py +10 -0
- sky/utils/validator.py +11 -1
- sky/volumes/__init__.py +0 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +64 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +199 -0
- sky/volumes/server/server.py +85 -0
- sky/volumes/utils.py +158 -0
- sky/volumes/volume.py +198 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/RECORD +135 -115
- sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +0 -1
- sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
- sky/dashboard/out/_next/static/chunks/37-4650f214e2119168.js +0 -6
- sky/dashboard/out/_next/static/chunks/42.2273cc2415291ceb.js +0 -6
- sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +0 -1
- sky/dashboard/out/_next/static/chunks/470-1494c899266cf5c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/513.309df9e18a9ff005.js +0 -1
- sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
- sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
- sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
- sky/dashboard/out/_next/static/chunks/843-bde186946d353355.js +0 -11
- sky/dashboard/out/_next/static/chunks/856-bfddc18e16f3873c.js +0 -1
- sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
- sky/dashboard/out/_next/static/chunks/973-56412c7976b4655b.js +0 -1
- sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-4e065c812a52460b.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-520ec1ab65e2f2a4.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/config-e4f473661889e7cd.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-00fd23b9577492ca.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-8a4bf7370d4d9bb7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-171c27f4ca94861c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-55e5bcb16d563231.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-c9f4d785cdaa52d8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-ecc5a7003776cfa7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-f00cba35691483b1.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-c85998e6a5722f21.js +0 -1
- sky/dashboard/out/_next/static/css/6ab927686b492a4a.css +0 -3
- sky/dashboard/out/_next/static/zsALxITkbP8J8NVwSDwMo/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{zsALxITkbP8J8NVwSDwMo → bs6UB9V4Jq10TIZ5x-kBK}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{938-ce7991c156584b06.js → 938-068520cc11738deb.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,221 @@
|
|
1
|
+
"""Utility functions for managing SSH node pools."""
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
import subprocess
|
5
|
+
from typing import Any, Callable, Dict, List, Optional
|
6
|
+
import uuid
|
7
|
+
|
8
|
+
import yaml
|
9
|
+
|
10
|
+
from sky.utils import ux_utils
|
11
|
+
|
12
|
+
DEFAULT_SSH_NODE_POOLS_PATH = os.path.expanduser('~/.sky/ssh_node_pools.yaml')
|
13
|
+
RED = '\033[0;31m'
|
14
|
+
NC = '\033[0m' # No color
|
15
|
+
|
16
|
+
|
17
|
+
def check_host_in_ssh_config(hostname: str) -> bool:
|
18
|
+
"""Return True iff *hostname* matches at least one `Host`/`Match` stanza
|
19
|
+
in the user's OpenSSH client configuration (including anything pulled in
|
20
|
+
via Include).
|
21
|
+
|
22
|
+
It calls: ssh -vvG <hostname> -o ConnectTimeout=0
|
23
|
+
which:
|
24
|
+
• -G expands the effective config without connecting
|
25
|
+
• -vv prints debug lines that show which stanzas are applied
|
26
|
+
• ConnectTimeout=0 avoids a DNS lookup if <hostname> is a FQDN/IP
|
27
|
+
|
28
|
+
No config files are opened or parsed manually.
|
29
|
+
|
30
|
+
Parameters
|
31
|
+
----------
|
32
|
+
hostname : str
|
33
|
+
The alias/IP/FQDN you want to test.
|
34
|
+
|
35
|
+
Returns
|
36
|
+
-------
|
37
|
+
bool
|
38
|
+
True – a specific stanza matched the host
|
39
|
+
False – nothing but the global defaults (`Host *`) applied
|
40
|
+
"""
|
41
|
+
# We direct stderr→stdout because debug output goes to stderr.
|
42
|
+
proc = subprocess.run(
|
43
|
+
['ssh', '-vvG', hostname, '-o', 'ConnectTimeout=0'],
|
44
|
+
text=True,
|
45
|
+
stdout=subprocess.PIPE,
|
46
|
+
stderr=subprocess.STDOUT,
|
47
|
+
check=False, # we only want the text, not to raise
|
48
|
+
)
|
49
|
+
|
50
|
+
# Look for lines like:
|
51
|
+
# debug1: ~/.ssh/config line 42: Applying options for <hostname>
|
52
|
+
# Anything other than "*"
|
53
|
+
pattern = re.compile(r'^debug\d+: .*Applying options for ([^*].*)$',
|
54
|
+
re.MULTILINE)
|
55
|
+
|
56
|
+
return bool(pattern.search(proc.stdout))
|
57
|
+
|
58
|
+
|
59
|
+
class UniqueKeySafeLoader(yaml.SafeLoader):
|
60
|
+
"""Custom YAML loader that raises an error if there are duplicate keys."""
|
61
|
+
|
62
|
+
def construct_mapping(self, node, deep=False):
|
63
|
+
mapping = set()
|
64
|
+
for key_node, _ in node.value:
|
65
|
+
key = self.construct_object(key_node, deep=deep)
|
66
|
+
if key in mapping:
|
67
|
+
raise yaml.constructor.ConstructorError(
|
68
|
+
note=(f'Duplicate key found: {key!r}.\n'
|
69
|
+
'Please remove one of them from the YAML file.'))
|
70
|
+
mapping.add(key)
|
71
|
+
return super().construct_mapping(node, deep)
|
72
|
+
|
73
|
+
|
74
|
+
def load_ssh_targets(file_path: str) -> Dict[str, Any]:
|
75
|
+
"""Load SSH targets from YAML file."""
|
76
|
+
if not os.path.exists(file_path):
|
77
|
+
with ux_utils.print_exception_no_traceback():
|
78
|
+
raise ValueError(f'SSH Node Pools file not found: {file_path}')
|
79
|
+
|
80
|
+
try:
|
81
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
82
|
+
targets = yaml.load(f, Loader=UniqueKeySafeLoader)
|
83
|
+
return targets
|
84
|
+
except yaml.constructor.ConstructorError as e:
|
85
|
+
with ux_utils.print_exception_no_traceback():
|
86
|
+
raise ValueError(e.note) from e
|
87
|
+
except (yaml.YAMLError, IOError, OSError) as e:
|
88
|
+
with ux_utils.print_exception_no_traceback():
|
89
|
+
raise ValueError(f'Error loading SSH Node Pools file: {e}') from e
|
90
|
+
|
91
|
+
|
92
|
+
def get_cluster_config(
|
93
|
+
targets: Dict[str, Any],
|
94
|
+
cluster_name: Optional[str] = None,
|
95
|
+
file_path: str = DEFAULT_SSH_NODE_POOLS_PATH) -> Dict[str, Any]:
|
96
|
+
"""Get configuration for specific clusters or all clusters."""
|
97
|
+
if not targets:
|
98
|
+
with ux_utils.print_exception_no_traceback():
|
99
|
+
raise ValueError(
|
100
|
+
f'No clusters defined in SSH Node Pools file {file_path}')
|
101
|
+
|
102
|
+
if cluster_name:
|
103
|
+
if cluster_name not in targets:
|
104
|
+
with ux_utils.print_exception_no_traceback():
|
105
|
+
raise ValueError(f'Cluster {cluster_name!r} not found in '
|
106
|
+
f'SSH Node Pools file {file_path}')
|
107
|
+
return {cluster_name: targets[cluster_name]}
|
108
|
+
|
109
|
+
# Return all clusters if no specific cluster is specified
|
110
|
+
return targets
|
111
|
+
|
112
|
+
|
113
|
+
def prepare_hosts_info(
|
114
|
+
cluster_name: str,
|
115
|
+
cluster_config: Dict[str, Any],
|
116
|
+
upload_ssh_key_func: Optional[Callable[[str, str], str]] = None
|
117
|
+
) -> List[Dict[str, str]]:
|
118
|
+
"""Prepare list of hosts with resolved user, identity_file, and password.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
cluster_name: The name of the cluster.
|
122
|
+
cluster_config: The configuration for the cluster.
|
123
|
+
upload_ssh_key_func: A function to upload the SSH key to the remote
|
124
|
+
server and wait for the key to be uploaded. This function will take
|
125
|
+
the key name and the local key file path as input, and return the
|
126
|
+
path for the remote SSH key file on the API server. This function
|
127
|
+
will only be set in `sky ssh up -f` mode, and if this function is
|
128
|
+
set, any ssh config will not be allowed as we don't support
|
129
|
+
uploading any ssh config to the API server.
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
A list of hosts with resolved user, identity_file, and password.
|
133
|
+
"""
|
134
|
+
if 'hosts' not in cluster_config or not cluster_config['hosts']:
|
135
|
+
with ux_utils.print_exception_no_traceback():
|
136
|
+
raise ValueError(
|
137
|
+
f'No hosts defined in cluster {cluster_name} configuration')
|
138
|
+
|
139
|
+
# Get cluster-level defaults
|
140
|
+
cluster_user = cluster_config.get('user', '')
|
141
|
+
cluster_identity_file = os.path.expanduser(
|
142
|
+
cluster_config.get('identity_file', ''))
|
143
|
+
cluster_password = cluster_config.get('password', '')
|
144
|
+
|
145
|
+
# Check if cluster identity file exists
|
146
|
+
if cluster_identity_file and not os.path.isfile(cluster_identity_file):
|
147
|
+
with ux_utils.print_exception_no_traceback():
|
148
|
+
raise ValueError(
|
149
|
+
f'SSH Identity File Missing: {cluster_identity_file}')
|
150
|
+
|
151
|
+
use_cluster_config_msg = (f'Cluster {cluster_name} uses SSH config '
|
152
|
+
'for hostname {host}, which is not '
|
153
|
+
'supported by the -f flag. Please use a '
|
154
|
+
'dict with `ip` field instead.')
|
155
|
+
|
156
|
+
def _maybe_hardcode_identity_file(i: int, identity_file: str) -> str:
|
157
|
+
if upload_ssh_key_func is None:
|
158
|
+
return identity_file
|
159
|
+
if not os.path.exists(os.path.expanduser(identity_file)):
|
160
|
+
with ux_utils.print_exception_no_traceback():
|
161
|
+
raise ValueError(
|
162
|
+
f'Identity file {identity_file} does not exist.')
|
163
|
+
key_name = f'{cluster_name}-{i}-{str(uuid.uuid4())[:4]}'
|
164
|
+
key_file_on_api_server = upload_ssh_key_func(key_name, identity_file)
|
165
|
+
return key_file_on_api_server
|
166
|
+
|
167
|
+
hosts_info = []
|
168
|
+
for i, host in enumerate(cluster_config['hosts']):
|
169
|
+
# Host can be a string (IP or SSH config hostname) or a dict
|
170
|
+
if isinstance(host, str):
|
171
|
+
# Check if this is an SSH config hostname
|
172
|
+
is_ssh_config_host = check_host_in_ssh_config(host)
|
173
|
+
if upload_ssh_key_func is not None and is_ssh_config_host:
|
174
|
+
with ux_utils.print_exception_no_traceback():
|
175
|
+
raise ValueError(use_cluster_config_msg.format(host=host))
|
176
|
+
|
177
|
+
hosts_info.append({
|
178
|
+
'ip': host,
|
179
|
+
'user': '' if is_ssh_config_host else cluster_user,
|
180
|
+
'identity_file': '' if is_ssh_config_host else
|
181
|
+
_maybe_hardcode_identity_file(
|
182
|
+
i, cluster_identity_file),
|
183
|
+
'password': cluster_password,
|
184
|
+
'use_ssh_config': is_ssh_config_host
|
185
|
+
})
|
186
|
+
else:
|
187
|
+
# It's a dict with potential overrides
|
188
|
+
if 'ip' not in host:
|
189
|
+
print(f'{RED}Warning: Host missing \'ip\' field, '
|
190
|
+
f'skipping: {host}{NC}')
|
191
|
+
continue
|
192
|
+
|
193
|
+
# Check if this is an SSH config hostname
|
194
|
+
is_ssh_config_host = check_host_in_ssh_config(host['ip'])
|
195
|
+
if upload_ssh_key_func is not None and is_ssh_config_host:
|
196
|
+
with ux_utils.print_exception_no_traceback():
|
197
|
+
raise ValueError(use_cluster_config_msg.format(host=host))
|
198
|
+
|
199
|
+
# Use host-specific values or fall back to cluster defaults
|
200
|
+
host_user = '' if is_ssh_config_host else host.get(
|
201
|
+
'user', cluster_user)
|
202
|
+
host_identity_file = '' if is_ssh_config_host else (
|
203
|
+
_maybe_hardcode_identity_file(
|
204
|
+
i, host.get('identity_file', cluster_identity_file)))
|
205
|
+
host_identity_file = os.path.expanduser(host_identity_file)
|
206
|
+
host_password = host.get('password', cluster_password)
|
207
|
+
|
208
|
+
if host_identity_file and not os.path.isfile(host_identity_file):
|
209
|
+
with ux_utils.print_exception_no_traceback():
|
210
|
+
raise ValueError(
|
211
|
+
f'SSH Identity File Missing: {host_identity_file}')
|
212
|
+
|
213
|
+
hosts_info.append({
|
214
|
+
'ip': host['ip'],
|
215
|
+
'user': host_user,
|
216
|
+
'identity_file': host_identity_file,
|
217
|
+
'password': host_password,
|
218
|
+
'use_ssh_config': is_ssh_config_host
|
219
|
+
})
|
220
|
+
|
221
|
+
return hosts_info
|
sky/utils/resources_utils.py
CHANGED
@@ -8,6 +8,7 @@ import typing
|
|
8
8
|
from typing import Dict, List, Optional, Set, Union
|
9
9
|
|
10
10
|
from sky import skypilot_config
|
11
|
+
from sky.skylet import constants
|
11
12
|
from sky.utils import common_utils
|
12
13
|
from sky.utils import registry
|
13
14
|
from sky.utils import ux_utils
|
@@ -331,3 +332,68 @@ def make_launchables_for_valid_region_zones(
|
|
331
332
|
# Batch the requests at the granularity of a single region.
|
332
333
|
launchables.append(launchable_resources.copy(region=region.name))
|
333
334
|
return launchables
|
335
|
+
|
336
|
+
|
337
|
+
def parse_memory_resource(resource_qty_str: Union[str, int, float],
|
338
|
+
field_name: str,
|
339
|
+
ret_type: type = int,
|
340
|
+
unit: str = 'gb',
|
341
|
+
allow_plus: bool = False,
|
342
|
+
allow_x: bool = False,
|
343
|
+
allow_rounding: bool = False) -> str:
|
344
|
+
"""Returns memory size in chosen units given a resource quantity string.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
resource_qty_str: Resource quantity string
|
348
|
+
unit: Unit to convert to
|
349
|
+
allow_plus: Whether to allow '+' prefix
|
350
|
+
allow_x: Whether to allow 'x' suffix
|
351
|
+
"""
|
352
|
+
assert unit in constants.MEMORY_SIZE_UNITS, f'Invalid unit: {unit}'
|
353
|
+
|
354
|
+
error_msg = (f'"{field_name}" field should be a '
|
355
|
+
f'{constants.MEMORY_SIZE_PATTERN}+?,'
|
356
|
+
f' got {resource_qty_str}')
|
357
|
+
|
358
|
+
resource_str = str(resource_qty_str)
|
359
|
+
|
360
|
+
# Handle plus and x suffixes, x is only used internally for jobs controller
|
361
|
+
plus = ''
|
362
|
+
if resource_str.endswith('+'):
|
363
|
+
if allow_plus:
|
364
|
+
resource_str = resource_str[:-1]
|
365
|
+
plus = '+'
|
366
|
+
else:
|
367
|
+
raise ValueError(error_msg)
|
368
|
+
|
369
|
+
x = ''
|
370
|
+
if resource_str.endswith('x'):
|
371
|
+
if allow_x:
|
372
|
+
resource_str = resource_str[:-1]
|
373
|
+
x = 'x'
|
374
|
+
else:
|
375
|
+
raise ValueError(error_msg)
|
376
|
+
|
377
|
+
try:
|
378
|
+
# We assume it is already in the wanted units to maintain backwards
|
379
|
+
# compatibility
|
380
|
+
ret_type(resource_str)
|
381
|
+
return f'{resource_str}{plus}{x}'
|
382
|
+
except ValueError:
|
383
|
+
pass
|
384
|
+
|
385
|
+
resource_str = resource_str.lower()
|
386
|
+
for mem_unit, multiplier in constants.MEMORY_SIZE_UNITS.items():
|
387
|
+
if resource_str.endswith(mem_unit):
|
388
|
+
try:
|
389
|
+
value = ret_type(resource_str[:-len(mem_unit)])
|
390
|
+
converted = (value * multiplier /
|
391
|
+
constants.MEMORY_SIZE_UNITS[unit])
|
392
|
+
if not allow_rounding and ret_type(converted) != converted:
|
393
|
+
raise ValueError(error_msg)
|
394
|
+
converted = ret_type(converted)
|
395
|
+
return f'{converted}{plus}{x}'
|
396
|
+
except ValueError:
|
397
|
+
continue
|
398
|
+
|
399
|
+
raise ValueError(error_msg)
|
sky/utils/rich_utils.py
CHANGED
@@ -7,6 +7,7 @@ import threading
|
|
7
7
|
import typing
|
8
8
|
from typing import Callable, Iterator, Optional, Tuple, Union
|
9
9
|
|
10
|
+
from sky import exceptions
|
10
11
|
from sky.adaptors import common as adaptors_common
|
11
12
|
from sky.utils import annotations
|
12
13
|
from sky.utils import context
|
@@ -58,6 +59,7 @@ class Control(enum.Enum):
|
|
58
59
|
EXIT = 'rich_exit'
|
59
60
|
UPDATE = 'rich_update'
|
60
61
|
HEARTBEAT = 'heartbeat'
|
62
|
+
RETRY = 'retry'
|
61
63
|
|
62
64
|
def encode(self, msg: str) -> str:
|
63
65
|
return f'<{self.value}>{msg}</{self.value}>'
|
@@ -365,6 +367,10 @@ def decode_rich_status(
|
|
365
367
|
yield line
|
366
368
|
continue
|
367
369
|
|
370
|
+
if control == Control.RETRY:
|
371
|
+
raise exceptions.ServerTemporarilyUnavailableError(
|
372
|
+
'The server is temporarily unavailable. Please try '
|
373
|
+
'again.')
|
368
374
|
# control is not None, i.e. it is a rich status control message.
|
369
375
|
if threading.current_thread() is not threading.main_thread():
|
370
376
|
yield None
|
sky/utils/schemas.py
CHANGED
@@ -70,8 +70,36 @@ _AUTOSTOP_SCHEMA = {
|
|
70
70
|
}
|
71
71
|
|
72
72
|
|
73
|
-
|
74
|
-
|
73
|
+
# Note: This is similar to _get_infra_pattern()
|
74
|
+
# but without the wildcard patterns.
|
75
|
+
def _get_volume_infra_pattern():
|
76
|
+
# Building the regex pattern for the infra field
|
77
|
+
# Format: cloud[/region[/zone]] or wildcards or kubernetes context
|
78
|
+
# Match any cloud name (case insensitive)
|
79
|
+
all_clouds = list(constants.ALL_CLOUDS)
|
80
|
+
all_clouds.remove('kubernetes')
|
81
|
+
cloud_pattern = f'(?i:({"|".join(all_clouds)}))'
|
82
|
+
|
83
|
+
# Optional /region followed by optional /zone
|
84
|
+
# /[^/]+ matches a slash followed by any characters except slash (region or
|
85
|
+
# zone name)
|
86
|
+
# The outer (?:...)? makes the entire region/zone part optional
|
87
|
+
region_zone_pattern = '(?:/[^/]+(?:/[^/]+)?)?'
|
88
|
+
|
89
|
+
# Kubernetes specific pattern - matches:
|
90
|
+
# 1. Just the word "kubernetes" or "k8s" by itself
|
91
|
+
# 2. "k8s/" or "kubernetes/" followed by any context name (which may contain
|
92
|
+
# slashes)
|
93
|
+
kubernetes_pattern = '(?i:kubernetes|k8s)(?:/.+)?'
|
94
|
+
|
95
|
+
# Combine all patterns with alternation (|)
|
96
|
+
# ^ marks start of string, $ marks end of string
|
97
|
+
infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
|
98
|
+
f'{kubernetes_pattern})$')
|
99
|
+
return infra_pattern
|
100
|
+
|
101
|
+
|
102
|
+
def _get_infra_pattern():
|
75
103
|
# Building the regex pattern for the infra field
|
76
104
|
# Format: cloud[/region[/zone]] or wildcards or kubernetes context
|
77
105
|
# Match any cloud name (case insensitive)
|
@@ -103,7 +131,11 @@ def _get_single_resources_schema():
|
|
103
131
|
infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
|
104
132
|
f'{wildcard_cloud}{wildcard_with_region}|'
|
105
133
|
f'{kubernetes_pattern})$')
|
134
|
+
return infra_pattern
|
106
135
|
|
136
|
+
|
137
|
+
def _get_single_resources_schema():
|
138
|
+
"""Schema for a single resource in a resources list."""
|
107
139
|
return {
|
108
140
|
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
109
141
|
'type': 'object',
|
@@ -133,7 +165,7 @@ def _get_single_resources_schema():
|
|
133
165
|
# 3. Kubernetes patterns - e.g. "kubernetes/my-context",
|
134
166
|
# "k8s/context-name",
|
135
167
|
# "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
|
136
|
-
'pattern':
|
168
|
+
'pattern': _get_infra_pattern(),
|
137
169
|
},
|
138
170
|
'cpus': {
|
139
171
|
'anyOf': [{
|
@@ -383,6 +415,66 @@ def get_resources_schema():
|
|
383
415
|
}
|
384
416
|
|
385
417
|
|
418
|
+
def get_volume_schema():
|
419
|
+
# pylint: disable=import-outside-toplevel
|
420
|
+
from sky.volumes import volume
|
421
|
+
|
422
|
+
return {
|
423
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
424
|
+
'type': 'object',
|
425
|
+
'required': ['name', 'type', 'infra'],
|
426
|
+
'additionalProperties': False,
|
427
|
+
'properties': {
|
428
|
+
'name': {
|
429
|
+
'type': 'string',
|
430
|
+
},
|
431
|
+
'type': {
|
432
|
+
'type': 'string',
|
433
|
+
'case_sensitive_enum': [
|
434
|
+
type.value for type in volume.VolumeType
|
435
|
+
],
|
436
|
+
},
|
437
|
+
'infra': {
|
438
|
+
'type': 'string',
|
439
|
+
'description': ('Infrastructure specification in format: '
|
440
|
+
'cloud[/region[/zone]].'),
|
441
|
+
# Pattern validates:
|
442
|
+
# 1. cloud[/region[/zone]] - e.g. "aws", "aws/us-east-1",
|
443
|
+
# "aws/us-east-1/us-east-1a"
|
444
|
+
# 2. Kubernetes patterns - e.g. "kubernetes/my-context",
|
445
|
+
# "k8s/context-name",
|
446
|
+
# "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
|
447
|
+
'pattern': _get_volume_infra_pattern(),
|
448
|
+
},
|
449
|
+
'size': {
|
450
|
+
'type': 'string',
|
451
|
+
'pattern': constants.MEMORY_SIZE_PATTERN,
|
452
|
+
},
|
453
|
+
'resource_name': {
|
454
|
+
'type': 'string',
|
455
|
+
},
|
456
|
+
'config': {
|
457
|
+
'type': 'object',
|
458
|
+
'required': [],
|
459
|
+
'properties': {
|
460
|
+
'storage_class_name': {
|
461
|
+
'type': 'string',
|
462
|
+
},
|
463
|
+
'access_mode': {
|
464
|
+
'type': 'string',
|
465
|
+
'case_sensitive_enum': [
|
466
|
+
type.value for type in volume.VolumeAccessMode
|
467
|
+
],
|
468
|
+
},
|
469
|
+
'namespace': {
|
470
|
+
'type': 'string',
|
471
|
+
},
|
472
|
+
},
|
473
|
+
},
|
474
|
+
}
|
475
|
+
}
|
476
|
+
|
477
|
+
|
386
478
|
def get_storage_schema():
|
387
479
|
# pylint: disable=import-outside-toplevel
|
388
480
|
from sky.data import storage
|
@@ -457,6 +549,49 @@ def get_storage_schema():
|
|
457
549
|
}
|
458
550
|
|
459
551
|
|
552
|
+
def get_volume_mount_schema():
|
553
|
+
"""Schema for volume mount object in task config (internal use only)."""
|
554
|
+
return {
|
555
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
556
|
+
'type': 'object',
|
557
|
+
'required': [],
|
558
|
+
'additionalProperties': False,
|
559
|
+
'properties': {
|
560
|
+
'path': {
|
561
|
+
'type': 'string',
|
562
|
+
},
|
563
|
+
'volume_name': {
|
564
|
+
'type': 'string',
|
565
|
+
},
|
566
|
+
'volume_config': {
|
567
|
+
'type': 'object',
|
568
|
+
'required': [],
|
569
|
+
'additionalProperties': True,
|
570
|
+
'properties': {
|
571
|
+
'cloud': {
|
572
|
+
'type': 'string',
|
573
|
+
'case_insensitive_enum': list(constants.ALL_CLOUDS)
|
574
|
+
},
|
575
|
+
'region': {
|
576
|
+
'anyOf': [{
|
577
|
+
'type': 'string'
|
578
|
+
}, {
|
579
|
+
'type': 'null'
|
580
|
+
}]
|
581
|
+
},
|
582
|
+
'zone': {
|
583
|
+
'anyOf': [{
|
584
|
+
'type': 'string'
|
585
|
+
}, {
|
586
|
+
'type': 'null'
|
587
|
+
}]
|
588
|
+
},
|
589
|
+
},
|
590
|
+
}
|
591
|
+
}
|
592
|
+
}
|
593
|
+
|
594
|
+
|
460
595
|
def get_service_schema():
|
461
596
|
"""Schema for top-level `service:` field (for SkyServe)."""
|
462
597
|
# To avoid circular imports, only import when needed.
|
@@ -723,6 +858,14 @@ def get_task_schema():
|
|
723
858
|
'config': _filter_schema(
|
724
859
|
get_config_schema(),
|
725
860
|
constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK),
|
861
|
+
# volumes config is validated separately using get_volume_schema
|
862
|
+
'volumes': {
|
863
|
+
'type': 'object',
|
864
|
+
},
|
865
|
+
'volume_mounts': {
|
866
|
+
'type': 'array',
|
867
|
+
'items': get_volume_mount_schema(),
|
868
|
+
},
|
726
869
|
**_experimental_task_schema(),
|
727
870
|
}
|
728
871
|
}
|
sky/utils/status_lib.py
CHANGED
@@ -54,3 +54,13 @@ class StorageStatus(enum.Enum):
|
|
54
54
|
|
55
55
|
# Finished uploading, in terminal state
|
56
56
|
READY = 'READY'
|
57
|
+
|
58
|
+
|
59
|
+
class VolumeStatus(enum.Enum):
|
60
|
+
"""Volume status as recorded in table 'volumes'."""
|
61
|
+
|
62
|
+
# Volume is ready to be used
|
63
|
+
READY = 'READY'
|
64
|
+
|
65
|
+
# Volume is being used
|
66
|
+
IN_USE = 'IN_USE'
|
sky/utils/validator.py
CHANGED
@@ -14,9 +14,19 @@ def case_insensitive_enum(validator, enums, instance, schema):
|
|
14
14
|
f'{instance!r} is not one of {enums!r}')
|
15
15
|
|
16
16
|
|
17
|
+
def case_sensitive_enum(validator, enums, instance, schema):
|
18
|
+
del validator, schema # Unused.
|
19
|
+
if instance not in enums:
|
20
|
+
yield jsonschema.ValidationError(
|
21
|
+
f'{instance!r} is not one of {enums!r}')
|
22
|
+
|
23
|
+
|
17
24
|
# Move this to a function to delay initialization
|
18
25
|
def get_schema_validator():
|
19
26
|
"""Get the schema validator class, initializing it only when needed."""
|
20
27
|
return jsonschema.validators.extend(
|
21
28
|
jsonschema.Draft7Validator,
|
22
|
-
validators={
|
29
|
+
validators={
|
30
|
+
'case_insensitive_enum': case_insensitive_enum,
|
31
|
+
'case_sensitive_enum': case_sensitive_enum
|
32
|
+
})
|
sky/volumes/__init__.py
ADDED
File without changes
|
File without changes
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""SDK functions for managed jobs."""
|
2
|
+
import json
|
3
|
+
import typing
|
4
|
+
from typing import List
|
5
|
+
|
6
|
+
from sky import sky_logging
|
7
|
+
from sky.adaptors import common as adaptors_common
|
8
|
+
from sky.server import common as server_common
|
9
|
+
from sky.server.requests import payloads
|
10
|
+
from sky.usage import usage_lib
|
11
|
+
from sky.utils import annotations
|
12
|
+
from sky.utils import context
|
13
|
+
from sky.volumes import volume as volume_lib
|
14
|
+
|
15
|
+
if typing.TYPE_CHECKING:
|
16
|
+
import requests
|
17
|
+
else:
|
18
|
+
requests = adaptors_common.LazyImport('requests')
|
19
|
+
|
20
|
+
logger = sky_logging.init_logger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
@context.contextual
|
24
|
+
@usage_lib.entrypoint
|
25
|
+
@server_common.check_server_healthy_or_start
|
26
|
+
@annotations.client_api
|
27
|
+
def apply(volume: volume_lib.Volume) -> server_common.RequestId:
|
28
|
+
"""Creates or registers a volume.
|
29
|
+
"""
|
30
|
+
body = payloads.VolumeApplyBody(name=volume.name,
|
31
|
+
volume_type=volume.type,
|
32
|
+
cloud=volume.cloud,
|
33
|
+
region=volume.region,
|
34
|
+
zone=volume.zone,
|
35
|
+
size=volume.size,
|
36
|
+
config=volume.config)
|
37
|
+
response = requests.post(f'{server_common.get_server_url()}/volumes/apply',
|
38
|
+
json=json.loads(body.model_dump_json()),
|
39
|
+
cookies=server_common.get_api_cookie_jar())
|
40
|
+
return server_common.get_request_id(response)
|
41
|
+
|
42
|
+
|
43
|
+
@context.contextual
|
44
|
+
@usage_lib.entrypoint
|
45
|
+
@server_common.check_server_healthy_or_start
|
46
|
+
@annotations.client_api
|
47
|
+
def ls() -> server_common.RequestId:
|
48
|
+
"""Lists all volumes."""
|
49
|
+
response = requests.get(f'{server_common.get_server_url()}/volumes',
|
50
|
+
cookies=server_common.get_api_cookie_jar())
|
51
|
+
return server_common.get_request_id(response)
|
52
|
+
|
53
|
+
|
54
|
+
@context.contextual
|
55
|
+
@usage_lib.entrypoint
|
56
|
+
@server_common.check_server_healthy_or_start
|
57
|
+
@annotations.client_api
|
58
|
+
def delete(names: List[str]) -> server_common.RequestId:
|
59
|
+
"""Deletes a volume."""
|
60
|
+
body = payloads.VolumeDeleteBody(names=names)
|
61
|
+
response = requests.post(f'{server_common.get_server_url()}/volumes/delete',
|
62
|
+
json=json.loads(body.model_dump_json()),
|
63
|
+
cookies=server_common.get_api_cookie_jar())
|
64
|
+
return server_common.get_request_id(response)
|
File without changes
|