skypilot-nightly 1.0.0.dev20250624__py3-none-any.whl → 1.0.0.dev20250626__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +1 -6
  3. sky/backends/backend_utils.py +26 -11
  4. sky/backends/cloud_vm_ray_backend.py +16 -5
  5. sky/client/cli/command.py +232 -9
  6. sky/client/sdk.py +195 -91
  7. sky/clouds/aws.py +10 -7
  8. sky/clouds/azure.py +10 -7
  9. sky/clouds/cloud.py +2 -0
  10. sky/clouds/cudo.py +2 -0
  11. sky/clouds/do.py +10 -7
  12. sky/clouds/fluidstack.py +2 -0
  13. sky/clouds/gcp.py +10 -7
  14. sky/clouds/hyperbolic.py +10 -7
  15. sky/clouds/ibm.py +2 -0
  16. sky/clouds/kubernetes.py +26 -9
  17. sky/clouds/lambda_cloud.py +10 -7
  18. sky/clouds/nebius.py +10 -7
  19. sky/clouds/oci.py +10 -7
  20. sky/clouds/paperspace.py +10 -7
  21. sky/clouds/runpod.py +10 -7
  22. sky/clouds/scp.py +10 -7
  23. sky/clouds/ssh.py +36 -0
  24. sky/clouds/vast.py +10 -7
  25. sky/clouds/vsphere.py +2 -0
  26. sky/core.py +21 -0
  27. sky/dag.py +14 -0
  28. sky/dashboard/out/404.html +1 -1
  29. sky/dashboard/out/_next/static/bs6UB9V4Jq10TIZ5x-kBK/_buildManifest.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/141-fa5a20cbf401b351.js +11 -0
  31. sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/25.76c246239df93d50.js +6 -0
  33. sky/dashboard/out/_next/static/chunks/43-36177d00f6956ab2.js +1 -0
  34. sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/690.55f9eed3be903f56.js +16 -0
  39. sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
  40. sky/dashboard/out/_next/static/chunks/785.dc2686c3c1235554.js +1 -0
  41. sky/dashboard/out/_next/static/chunks/871-3db673be3ee3750b.js +6 -0
  42. sky/dashboard/out/_next/static/chunks/875.52c962183328b3f2.js +25 -0
  43. sky/dashboard/out/_next/static/chunks/973-81b2d057178adb76.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/982.1b61658204416b0f.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/984.e8bac186a24e5178.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/990-0ad5ea1699e03ee8.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/pages/{_app-ce31493da9747ef4.js → _app-9a3ce3170d2edcec.js} +1 -1
  48. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
  49. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-8040f2483897ed0c.js +6 -0
  50. sky/dashboard/out/_next/static/chunks/pages/{clusters-7e9736af1c6345a6.js → clusters-f119a5630a1efd61.js} +1 -1
  51. sky/dashboard/out/_next/static/chunks/pages/config-6b255eae088da6a3.js +1 -0
  52. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-b302aea4d65766bf.js +1 -0
  53. sky/dashboard/out/_next/static/chunks/pages/infra-ee8cc4d449945d19.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
  55. sky/dashboard/out/_next/static/chunks/pages/jobs-0a5695ff3075d94a.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/pages/users-4978cbb093e141e7.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
  58. sky/dashboard/out/_next/static/chunks/pages/workspace/{new-31aa8bdcb7592635.js → new-5b59bce9eb208d84.js} +1 -1
  59. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-cb7e720b739de53a.js +1 -0
  60. sky/dashboard/out/_next/static/chunks/pages/workspaces-50e230828730cfb3.js +1 -0
  61. sky/dashboard/out/_next/static/chunks/webpack-08fdb9e6070127fc.js +1 -0
  62. sky/dashboard/out/_next/static/css/52082cf558ec9705.css +3 -0
  63. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  64. sky/dashboard/out/clusters/[cluster].html +1 -1
  65. sky/dashboard/out/clusters.html +1 -1
  66. sky/dashboard/out/config.html +1 -1
  67. sky/dashboard/out/index.html +1 -1
  68. sky/dashboard/out/infra/[context].html +1 -1
  69. sky/dashboard/out/infra.html +1 -1
  70. sky/dashboard/out/jobs/[job].html +1 -1
  71. sky/dashboard/out/jobs.html +1 -1
  72. sky/dashboard/out/users.html +1 -1
  73. sky/dashboard/out/volumes.html +1 -0
  74. sky/dashboard/out/workspace/new.html +1 -1
  75. sky/dashboard/out/workspaces/[name].html +1 -1
  76. sky/dashboard/out/workspaces.html +1 -1
  77. sky/data/storage_utils.py +2 -4
  78. sky/exceptions.py +15 -0
  79. sky/execution.py +5 -0
  80. sky/global_user_state.py +129 -0
  81. sky/jobs/client/sdk.py +13 -11
  82. sky/jobs/server/core.py +4 -0
  83. sky/models.py +16 -0
  84. sky/provision/__init__.py +26 -0
  85. sky/provision/kubernetes/__init__.py +3 -0
  86. sky/provision/kubernetes/instance.py +38 -77
  87. sky/provision/kubernetes/utils.py +70 -4
  88. sky/provision/kubernetes/volume.py +147 -0
  89. sky/resources.py +20 -76
  90. sky/serve/client/sdk.py +13 -13
  91. sky/serve/server/core.py +5 -1
  92. sky/server/common.py +40 -5
  93. sky/server/constants.py +5 -1
  94. sky/server/metrics.py +105 -0
  95. sky/server/requests/executor.py +30 -14
  96. sky/server/requests/payloads.py +16 -0
  97. sky/server/requests/requests.py +35 -1
  98. sky/server/rest.py +153 -0
  99. sky/server/server.py +70 -43
  100. sky/server/state.py +20 -0
  101. sky/server/stream_utils.py +8 -3
  102. sky/server/uvicorn.py +153 -13
  103. sky/setup_files/dependencies.py +2 -0
  104. sky/skylet/constants.py +19 -3
  105. sky/skypilot_config.py +3 -0
  106. sky/ssh_node_pools/__init__.py +1 -0
  107. sky/ssh_node_pools/core.py +133 -0
  108. sky/ssh_node_pools/server.py +232 -0
  109. sky/task.py +141 -18
  110. sky/templates/kubernetes-ray.yml.j2 +30 -1
  111. sky/users/permission.py +2 -0
  112. sky/utils/context.py +3 -1
  113. sky/utils/kubernetes/deploy_remote_cluster.py +12 -185
  114. sky/utils/kubernetes/ssh_utils.py +221 -0
  115. sky/utils/resources_utils.py +66 -0
  116. sky/utils/rich_utils.py +6 -0
  117. sky/utils/schemas.py +146 -3
  118. sky/utils/status_lib.py +10 -0
  119. sky/utils/validator.py +11 -1
  120. sky/volumes/__init__.py +0 -0
  121. sky/volumes/client/__init__.py +0 -0
  122. sky/volumes/client/sdk.py +64 -0
  123. sky/volumes/server/__init__.py +0 -0
  124. sky/volumes/server/core.py +199 -0
  125. sky/volumes/server/server.py +85 -0
  126. sky/volumes/utils.py +158 -0
  127. sky/volumes/volume.py +198 -0
  128. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/METADATA +2 -1
  129. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/RECORD +135 -115
  130. sky/dashboard/out/_next/static/chunks/211.692afc57e812ae1a.js +0 -1
  131. sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
  132. sky/dashboard/out/_next/static/chunks/37-4650f214e2119168.js +0 -6
  133. sky/dashboard/out/_next/static/chunks/42.2273cc2415291ceb.js +0 -6
  134. sky/dashboard/out/_next/static/chunks/443.b2242d0efcdf5f47.js +0 -1
  135. sky/dashboard/out/_next/static/chunks/470-1494c899266cf5c9.js +0 -1
  136. sky/dashboard/out/_next/static/chunks/513.309df9e18a9ff005.js +0 -1
  137. sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
  138. sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
  139. sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
  140. sky/dashboard/out/_next/static/chunks/843-bde186946d353355.js +0 -11
  141. sky/dashboard/out/_next/static/chunks/856-bfddc18e16f3873c.js +0 -1
  142. sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
  143. sky/dashboard/out/_next/static/chunks/973-56412c7976b4655b.js +0 -1
  144. sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
  145. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-4e065c812a52460b.js +0 -6
  146. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-520ec1ab65e2f2a4.js +0 -6
  147. sky/dashboard/out/_next/static/chunks/pages/config-e4f473661889e7cd.js +0 -1
  148. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-00fd23b9577492ca.js +0 -1
  149. sky/dashboard/out/_next/static/chunks/pages/infra-8a4bf7370d4d9bb7.js +0 -1
  150. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-171c27f4ca94861c.js +0 -16
  151. sky/dashboard/out/_next/static/chunks/pages/jobs-55e5bcb16d563231.js +0 -1
  152. sky/dashboard/out/_next/static/chunks/pages/users-c9f4d785cdaa52d8.js +0 -1
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-ecc5a7003776cfa7.js +0 -1
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-f00cba35691483b1.js +0 -1
  155. sky/dashboard/out/_next/static/chunks/webpack-c85998e6a5722f21.js +0 -1
  156. sky/dashboard/out/_next/static/css/6ab927686b492a4a.css +0 -3
  157. sky/dashboard/out/_next/static/zsALxITkbP8J8NVwSDwMo/_buildManifest.js +0 -1
  158. /sky/dashboard/out/_next/static/{zsALxITkbP8J8NVwSDwMo → bs6UB9V4Jq10TIZ5x-kBK}/_ssgManifest.js +0 -0
  159. /sky/dashboard/out/_next/static/chunks/{938-ce7991c156584b06.js → 938-068520cc11738deb.js} +0 -0
  160. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/WHEEL +0 -0
  161. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/entry_points.txt +0 -0
  162. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/licenses/LICENSE +0 -0
  163. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250626.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,221 @@
1
+ """Utility functions for managing SSH node pools."""
2
+ import os
3
+ import re
4
+ import subprocess
5
+ from typing import Any, Callable, Dict, List, Optional
6
+ import uuid
7
+
8
+ import yaml
9
+
10
+ from sky.utils import ux_utils
11
+
12
+ DEFAULT_SSH_NODE_POOLS_PATH = os.path.expanduser('~/.sky/ssh_node_pools.yaml')
13
+ RED = '\033[0;31m'
14
+ NC = '\033[0m' # No color
15
+
16
+
17
+ def check_host_in_ssh_config(hostname: str) -> bool:
18
+ """Return True iff *hostname* matches at least one `Host`/`Match` stanza
19
+ in the user's OpenSSH client configuration (including anything pulled in
20
+ via Include).
21
+
22
+ It calls: ssh -vvG <hostname> -o ConnectTimeout=0
23
+ which:
24
+ • -G expands the effective config without connecting
25
+ • -vv prints debug lines that show which stanzas are applied
26
+ • ConnectTimeout=0 avoids a DNS lookup if <hostname> is a FQDN/IP
27
+
28
+ No config files are opened or parsed manually.
29
+
30
+ Parameters
31
+ ----------
32
+ hostname : str
33
+ The alias/IP/FQDN you want to test.
34
+
35
+ Returns
36
+ -------
37
+ bool
38
+ True – a specific stanza matched the host
39
+ False – nothing but the global defaults (`Host *`) applied
40
+ """
41
+ # We direct stderr→stdout because debug output goes to stderr.
42
+ proc = subprocess.run(
43
+ ['ssh', '-vvG', hostname, '-o', 'ConnectTimeout=0'],
44
+ text=True,
45
+ stdout=subprocess.PIPE,
46
+ stderr=subprocess.STDOUT,
47
+ check=False, # we only want the text, not to raise
48
+ )
49
+
50
+ # Look for lines like:
51
+ # debug1: ~/.ssh/config line 42: Applying options for <hostname>
52
+ # Anything other than "*"
53
+ pattern = re.compile(r'^debug\d+: .*Applying options for ([^*].*)$',
54
+ re.MULTILINE)
55
+
56
+ return bool(pattern.search(proc.stdout))
57
+
58
+
59
+ class UniqueKeySafeLoader(yaml.SafeLoader):
60
+ """Custom YAML loader that raises an error if there are duplicate keys."""
61
+
62
+ def construct_mapping(self, node, deep=False):
63
+ mapping = set()
64
+ for key_node, _ in node.value:
65
+ key = self.construct_object(key_node, deep=deep)
66
+ if key in mapping:
67
+ raise yaml.constructor.ConstructorError(
68
+ note=(f'Duplicate key found: {key!r}.\n'
69
+ 'Please remove one of them from the YAML file.'))
70
+ mapping.add(key)
71
+ return super().construct_mapping(node, deep)
72
+
73
+
74
+ def load_ssh_targets(file_path: str) -> Dict[str, Any]:
75
+ """Load SSH targets from YAML file."""
76
+ if not os.path.exists(file_path):
77
+ with ux_utils.print_exception_no_traceback():
78
+ raise ValueError(f'SSH Node Pools file not found: {file_path}')
79
+
80
+ try:
81
+ with open(file_path, 'r', encoding='utf-8') as f:
82
+ targets = yaml.load(f, Loader=UniqueKeySafeLoader)
83
+ return targets
84
+ except yaml.constructor.ConstructorError as e:
85
+ with ux_utils.print_exception_no_traceback():
86
+ raise ValueError(e.note) from e
87
+ except (yaml.YAMLError, IOError, OSError) as e:
88
+ with ux_utils.print_exception_no_traceback():
89
+ raise ValueError(f'Error loading SSH Node Pools file: {e}') from e
90
+
91
+
92
+ def get_cluster_config(
93
+ targets: Dict[str, Any],
94
+ cluster_name: Optional[str] = None,
95
+ file_path: str = DEFAULT_SSH_NODE_POOLS_PATH) -> Dict[str, Any]:
96
+ """Get configuration for specific clusters or all clusters."""
97
+ if not targets:
98
+ with ux_utils.print_exception_no_traceback():
99
+ raise ValueError(
100
+ f'No clusters defined in SSH Node Pools file {file_path}')
101
+
102
+ if cluster_name:
103
+ if cluster_name not in targets:
104
+ with ux_utils.print_exception_no_traceback():
105
+ raise ValueError(f'Cluster {cluster_name!r} not found in '
106
+ f'SSH Node Pools file {file_path}')
107
+ return {cluster_name: targets[cluster_name]}
108
+
109
+ # Return all clusters if no specific cluster is specified
110
+ return targets
111
+
112
+
113
+ def prepare_hosts_info(
114
+ cluster_name: str,
115
+ cluster_config: Dict[str, Any],
116
+ upload_ssh_key_func: Optional[Callable[[str, str], str]] = None
117
+ ) -> List[Dict[str, str]]:
118
+ """Prepare list of hosts with resolved user, identity_file, and password.
119
+
120
+ Args:
121
+ cluster_name: The name of the cluster.
122
+ cluster_config: The configuration for the cluster.
123
+ upload_ssh_key_func: A function to upload the SSH key to the remote
124
+ server and wait for the key to be uploaded. This function will take
125
+ the key name and the local key file path as input, and return the
126
+ path for the remote SSH key file on the API server. This function
127
+ will only be set in `sky ssh up -f` mode, and if this function is
128
+ set, any ssh config will not be allowed as we don't support
129
+ uploading any ssh config to the API server.
130
+
131
+ Returns:
132
+ A list of hosts with resolved user, identity_file, and password.
133
+ """
134
+ if 'hosts' not in cluster_config or not cluster_config['hosts']:
135
+ with ux_utils.print_exception_no_traceback():
136
+ raise ValueError(
137
+ f'No hosts defined in cluster {cluster_name} configuration')
138
+
139
+ # Get cluster-level defaults
140
+ cluster_user = cluster_config.get('user', '')
141
+ cluster_identity_file = os.path.expanduser(
142
+ cluster_config.get('identity_file', ''))
143
+ cluster_password = cluster_config.get('password', '')
144
+
145
+ # Check if cluster identity file exists
146
+ if cluster_identity_file and not os.path.isfile(cluster_identity_file):
147
+ with ux_utils.print_exception_no_traceback():
148
+ raise ValueError(
149
+ f'SSH Identity File Missing: {cluster_identity_file}')
150
+
151
+ use_cluster_config_msg = (f'Cluster {cluster_name} uses SSH config '
152
+ 'for hostname {host}, which is not '
153
+ 'supported by the -f flag. Please use a '
154
+ 'dict with `ip` field instead.')
155
+
156
+ def _maybe_hardcode_identity_file(i: int, identity_file: str) -> str:
157
+ if upload_ssh_key_func is None:
158
+ return identity_file
159
+ if not os.path.exists(os.path.expanduser(identity_file)):
160
+ with ux_utils.print_exception_no_traceback():
161
+ raise ValueError(
162
+ f'Identity file {identity_file} does not exist.')
163
+ key_name = f'{cluster_name}-{i}-{str(uuid.uuid4())[:4]}'
164
+ key_file_on_api_server = upload_ssh_key_func(key_name, identity_file)
165
+ return key_file_on_api_server
166
+
167
+ hosts_info = []
168
+ for i, host in enumerate(cluster_config['hosts']):
169
+ # Host can be a string (IP or SSH config hostname) or a dict
170
+ if isinstance(host, str):
171
+ # Check if this is an SSH config hostname
172
+ is_ssh_config_host = check_host_in_ssh_config(host)
173
+ if upload_ssh_key_func is not None and is_ssh_config_host:
174
+ with ux_utils.print_exception_no_traceback():
175
+ raise ValueError(use_cluster_config_msg.format(host=host))
176
+
177
+ hosts_info.append({
178
+ 'ip': host,
179
+ 'user': '' if is_ssh_config_host else cluster_user,
180
+ 'identity_file': '' if is_ssh_config_host else
181
+ _maybe_hardcode_identity_file(
182
+ i, cluster_identity_file),
183
+ 'password': cluster_password,
184
+ 'use_ssh_config': is_ssh_config_host
185
+ })
186
+ else:
187
+ # It's a dict with potential overrides
188
+ if 'ip' not in host:
189
+ print(f'{RED}Warning: Host missing \'ip\' field, '
190
+ f'skipping: {host}{NC}')
191
+ continue
192
+
193
+ # Check if this is an SSH config hostname
194
+ is_ssh_config_host = check_host_in_ssh_config(host['ip'])
195
+ if upload_ssh_key_func is not None and is_ssh_config_host:
196
+ with ux_utils.print_exception_no_traceback():
197
+ raise ValueError(use_cluster_config_msg.format(host=host))
198
+
199
+ # Use host-specific values or fall back to cluster defaults
200
+ host_user = '' if is_ssh_config_host else host.get(
201
+ 'user', cluster_user)
202
+ host_identity_file = '' if is_ssh_config_host else (
203
+ _maybe_hardcode_identity_file(
204
+ i, host.get('identity_file', cluster_identity_file)))
205
+ host_identity_file = os.path.expanduser(host_identity_file)
206
+ host_password = host.get('password', cluster_password)
207
+
208
+ if host_identity_file and not os.path.isfile(host_identity_file):
209
+ with ux_utils.print_exception_no_traceback():
210
+ raise ValueError(
211
+ f'SSH Identity File Missing: {host_identity_file}')
212
+
213
+ hosts_info.append({
214
+ 'ip': host['ip'],
215
+ 'user': host_user,
216
+ 'identity_file': host_identity_file,
217
+ 'password': host_password,
218
+ 'use_ssh_config': is_ssh_config_host
219
+ })
220
+
221
+ return hosts_info
@@ -8,6 +8,7 @@ import typing
8
8
  from typing import Dict, List, Optional, Set, Union
9
9
 
10
10
  from sky import skypilot_config
11
+ from sky.skylet import constants
11
12
  from sky.utils import common_utils
12
13
  from sky.utils import registry
13
14
  from sky.utils import ux_utils
@@ -331,3 +332,68 @@ def make_launchables_for_valid_region_zones(
331
332
  # Batch the requests at the granularity of a single region.
332
333
  launchables.append(launchable_resources.copy(region=region.name))
333
334
  return launchables
335
+
336
+
337
+ def parse_memory_resource(resource_qty_str: Union[str, int, float],
338
+ field_name: str,
339
+ ret_type: type = int,
340
+ unit: str = 'gb',
341
+ allow_plus: bool = False,
342
+ allow_x: bool = False,
343
+ allow_rounding: bool = False) -> str:
344
+ """Returns memory size in chosen units given a resource quantity string.
345
+
346
+ Args:
347
+ resource_qty_str: Resource quantity string
348
+ unit: Unit to convert to
349
+ allow_plus: Whether to allow '+' prefix
350
+ allow_x: Whether to allow 'x' suffix
351
+ """
352
+ assert unit in constants.MEMORY_SIZE_UNITS, f'Invalid unit: {unit}'
353
+
354
+ error_msg = (f'"{field_name}" field should be a '
355
+ f'{constants.MEMORY_SIZE_PATTERN}+?,'
356
+ f' got {resource_qty_str}')
357
+
358
+ resource_str = str(resource_qty_str)
359
+
360
+ # Handle plus and x suffixes, x is only used internally for jobs controller
361
+ plus = ''
362
+ if resource_str.endswith('+'):
363
+ if allow_plus:
364
+ resource_str = resource_str[:-1]
365
+ plus = '+'
366
+ else:
367
+ raise ValueError(error_msg)
368
+
369
+ x = ''
370
+ if resource_str.endswith('x'):
371
+ if allow_x:
372
+ resource_str = resource_str[:-1]
373
+ x = 'x'
374
+ else:
375
+ raise ValueError(error_msg)
376
+
377
+ try:
378
+ # We assume it is already in the wanted units to maintain backwards
379
+ # compatibility
380
+ ret_type(resource_str)
381
+ return f'{resource_str}{plus}{x}'
382
+ except ValueError:
383
+ pass
384
+
385
+ resource_str = resource_str.lower()
386
+ for mem_unit, multiplier in constants.MEMORY_SIZE_UNITS.items():
387
+ if resource_str.endswith(mem_unit):
388
+ try:
389
+ value = ret_type(resource_str[:-len(mem_unit)])
390
+ converted = (value * multiplier /
391
+ constants.MEMORY_SIZE_UNITS[unit])
392
+ if not allow_rounding and ret_type(converted) != converted:
393
+ raise ValueError(error_msg)
394
+ converted = ret_type(converted)
395
+ return f'{converted}{plus}{x}'
396
+ except ValueError:
397
+ continue
398
+
399
+ raise ValueError(error_msg)
sky/utils/rich_utils.py CHANGED
@@ -7,6 +7,7 @@ import threading
7
7
  import typing
8
8
  from typing import Callable, Iterator, Optional, Tuple, Union
9
9
 
10
+ from sky import exceptions
10
11
  from sky.adaptors import common as adaptors_common
11
12
  from sky.utils import annotations
12
13
  from sky.utils import context
@@ -58,6 +59,7 @@ class Control(enum.Enum):
58
59
  EXIT = 'rich_exit'
59
60
  UPDATE = 'rich_update'
60
61
  HEARTBEAT = 'heartbeat'
62
+ RETRY = 'retry'
61
63
 
62
64
  def encode(self, msg: str) -> str:
63
65
  return f'<{self.value}>{msg}</{self.value}>'
@@ -365,6 +367,10 @@ def decode_rich_status(
365
367
  yield line
366
368
  continue
367
369
 
370
+ if control == Control.RETRY:
371
+ raise exceptions.ServerTemporarilyUnavailableError(
372
+ 'The server is temporarily unavailable. Please try '
373
+ 'again.')
368
374
  # control is not None, i.e. it is a rich status control message.
369
375
  if threading.current_thread() is not threading.main_thread():
370
376
  yield None
sky/utils/schemas.py CHANGED
@@ -70,8 +70,36 @@ _AUTOSTOP_SCHEMA = {
70
70
  }
71
71
 
72
72
 
73
- def _get_single_resources_schema():
74
- """Schema for a single resource in a resources list."""
73
+ # Note: This is similar to _get_infra_pattern()
74
+ # but without the wildcard patterns.
75
+ def _get_volume_infra_pattern():
76
+ # Building the regex pattern for the infra field
77
+ # Format: cloud[/region[/zone]] or wildcards or kubernetes context
78
+ # Match any cloud name (case insensitive)
79
+ all_clouds = list(constants.ALL_CLOUDS)
80
+ all_clouds.remove('kubernetes')
81
+ cloud_pattern = f'(?i:({"|".join(all_clouds)}))'
82
+
83
+ # Optional /region followed by optional /zone
84
+ # /[^/]+ matches a slash followed by any characters except slash (region or
85
+ # zone name)
86
+ # The outer (?:...)? makes the entire region/zone part optional
87
+ region_zone_pattern = '(?:/[^/]+(?:/[^/]+)?)?'
88
+
89
+ # Kubernetes specific pattern - matches:
90
+ # 1. Just the word "kubernetes" or "k8s" by itself
91
+ # 2. "k8s/" or "kubernetes/" followed by any context name (which may contain
92
+ # slashes)
93
+ kubernetes_pattern = '(?i:kubernetes|k8s)(?:/.+)?'
94
+
95
+ # Combine all patterns with alternation (|)
96
+ # ^ marks start of string, $ marks end of string
97
+ infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
98
+ f'{kubernetes_pattern})$')
99
+ return infra_pattern
100
+
101
+
102
+ def _get_infra_pattern():
75
103
  # Building the regex pattern for the infra field
76
104
  # Format: cloud[/region[/zone]] or wildcards or kubernetes context
77
105
  # Match any cloud name (case insensitive)
@@ -103,7 +131,11 @@ def _get_single_resources_schema():
103
131
  infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
104
132
  f'{wildcard_cloud}{wildcard_with_region}|'
105
133
  f'{kubernetes_pattern})$')
134
+ return infra_pattern
106
135
 
136
+
137
+ def _get_single_resources_schema():
138
+ """Schema for a single resource in a resources list."""
107
139
  return {
108
140
  '$schema': 'https://json-schema.org/draft/2020-12/schema',
109
141
  'type': 'object',
@@ -133,7 +165,7 @@ def _get_single_resources_schema():
133
165
  # 3. Kubernetes patterns - e.g. "kubernetes/my-context",
134
166
  # "k8s/context-name",
135
167
  # "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
136
- 'pattern': infra_pattern,
168
+ 'pattern': _get_infra_pattern(),
137
169
  },
138
170
  'cpus': {
139
171
  'anyOf': [{
@@ -383,6 +415,66 @@ def get_resources_schema():
383
415
  }
384
416
 
385
417
 
418
+ def get_volume_schema():
419
+ # pylint: disable=import-outside-toplevel
420
+ from sky.volumes import volume
421
+
422
+ return {
423
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
424
+ 'type': 'object',
425
+ 'required': ['name', 'type', 'infra'],
426
+ 'additionalProperties': False,
427
+ 'properties': {
428
+ 'name': {
429
+ 'type': 'string',
430
+ },
431
+ 'type': {
432
+ 'type': 'string',
433
+ 'case_sensitive_enum': [
434
+ type.value for type in volume.VolumeType
435
+ ],
436
+ },
437
+ 'infra': {
438
+ 'type': 'string',
439
+ 'description': ('Infrastructure specification in format: '
440
+ 'cloud[/region[/zone]].'),
441
+ # Pattern validates:
442
+ # 1. cloud[/region[/zone]] - e.g. "aws", "aws/us-east-1",
443
+ # "aws/us-east-1/us-east-1a"
444
+ # 2. Kubernetes patterns - e.g. "kubernetes/my-context",
445
+ # "k8s/context-name",
446
+ # "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
447
+ 'pattern': _get_volume_infra_pattern(),
448
+ },
449
+ 'size': {
450
+ 'type': 'string',
451
+ 'pattern': constants.MEMORY_SIZE_PATTERN,
452
+ },
453
+ 'resource_name': {
454
+ 'type': 'string',
455
+ },
456
+ 'config': {
457
+ 'type': 'object',
458
+ 'required': [],
459
+ 'properties': {
460
+ 'storage_class_name': {
461
+ 'type': 'string',
462
+ },
463
+ 'access_mode': {
464
+ 'type': 'string',
465
+ 'case_sensitive_enum': [
466
+ type.value for type in volume.VolumeAccessMode
467
+ ],
468
+ },
469
+ 'namespace': {
470
+ 'type': 'string',
471
+ },
472
+ },
473
+ },
474
+ }
475
+ }
476
+
477
+
386
478
  def get_storage_schema():
387
479
  # pylint: disable=import-outside-toplevel
388
480
  from sky.data import storage
@@ -457,6 +549,49 @@ def get_storage_schema():
457
549
  }
458
550
 
459
551
 
552
+ def get_volume_mount_schema():
553
+ """Schema for volume mount object in task config (internal use only)."""
554
+ return {
555
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
556
+ 'type': 'object',
557
+ 'required': [],
558
+ 'additionalProperties': False,
559
+ 'properties': {
560
+ 'path': {
561
+ 'type': 'string',
562
+ },
563
+ 'volume_name': {
564
+ 'type': 'string',
565
+ },
566
+ 'volume_config': {
567
+ 'type': 'object',
568
+ 'required': [],
569
+ 'additionalProperties': True,
570
+ 'properties': {
571
+ 'cloud': {
572
+ 'type': 'string',
573
+ 'case_insensitive_enum': list(constants.ALL_CLOUDS)
574
+ },
575
+ 'region': {
576
+ 'anyOf': [{
577
+ 'type': 'string'
578
+ }, {
579
+ 'type': 'null'
580
+ }]
581
+ },
582
+ 'zone': {
583
+ 'anyOf': [{
584
+ 'type': 'string'
585
+ }, {
586
+ 'type': 'null'
587
+ }]
588
+ },
589
+ },
590
+ }
591
+ }
592
+ }
593
+
594
+
460
595
  def get_service_schema():
461
596
  """Schema for top-level `service:` field (for SkyServe)."""
462
597
  # To avoid circular imports, only import when needed.
@@ -723,6 +858,14 @@ def get_task_schema():
723
858
  'config': _filter_schema(
724
859
  get_config_schema(),
725
860
  constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK),
861
+ # volumes config is validated separately using get_volume_schema
862
+ 'volumes': {
863
+ 'type': 'object',
864
+ },
865
+ 'volume_mounts': {
866
+ 'type': 'array',
867
+ 'items': get_volume_mount_schema(),
868
+ },
726
869
  **_experimental_task_schema(),
727
870
  }
728
871
  }
sky/utils/status_lib.py CHANGED
@@ -54,3 +54,13 @@ class StorageStatus(enum.Enum):
54
54
 
55
55
  # Finished uploading, in terminal state
56
56
  READY = 'READY'
57
+
58
+
59
+ class VolumeStatus(enum.Enum):
60
+ """Volume status as recorded in table 'volumes'."""
61
+
62
+ # Volume is ready to be used
63
+ READY = 'READY'
64
+
65
+ # Volume is being used
66
+ IN_USE = 'IN_USE'
sky/utils/validator.py CHANGED
@@ -14,9 +14,19 @@ def case_insensitive_enum(validator, enums, instance, schema):
14
14
  f'{instance!r} is not one of {enums!r}')
15
15
 
16
16
 
17
+ def case_sensitive_enum(validator, enums, instance, schema):
18
+ del validator, schema # Unused.
19
+ if instance not in enums:
20
+ yield jsonschema.ValidationError(
21
+ f'{instance!r} is not one of {enums!r}')
22
+
23
+
17
24
  # Move this to a function to delay initialization
18
25
  def get_schema_validator():
19
26
  """Get the schema validator class, initializing it only when needed."""
20
27
  return jsonschema.validators.extend(
21
28
  jsonschema.Draft7Validator,
22
- validators={'case_insensitive_enum': case_insensitive_enum})
29
+ validators={
30
+ 'case_insensitive_enum': case_insensitive_enum,
31
+ 'case_sensitive_enum': case_sensitive_enum
32
+ })
File without changes
File without changes
@@ -0,0 +1,64 @@
1
+ """SDK functions for managed jobs."""
2
+ import json
3
+ import typing
4
+ from typing import List
5
+
6
+ from sky import sky_logging
7
+ from sky.adaptors import common as adaptors_common
8
+ from sky.server import common as server_common
9
+ from sky.server.requests import payloads
10
+ from sky.usage import usage_lib
11
+ from sky.utils import annotations
12
+ from sky.utils import context
13
+ from sky.volumes import volume as volume_lib
14
+
15
+ if typing.TYPE_CHECKING:
16
+ import requests
17
+ else:
18
+ requests = adaptors_common.LazyImport('requests')
19
+
20
+ logger = sky_logging.init_logger(__name__)
21
+
22
+
23
+ @context.contextual
24
+ @usage_lib.entrypoint
25
+ @server_common.check_server_healthy_or_start
26
+ @annotations.client_api
27
+ def apply(volume: volume_lib.Volume) -> server_common.RequestId:
28
+ """Creates or registers a volume.
29
+ """
30
+ body = payloads.VolumeApplyBody(name=volume.name,
31
+ volume_type=volume.type,
32
+ cloud=volume.cloud,
33
+ region=volume.region,
34
+ zone=volume.zone,
35
+ size=volume.size,
36
+ config=volume.config)
37
+ response = requests.post(f'{server_common.get_server_url()}/volumes/apply',
38
+ json=json.loads(body.model_dump_json()),
39
+ cookies=server_common.get_api_cookie_jar())
40
+ return server_common.get_request_id(response)
41
+
42
+
43
+ @context.contextual
44
+ @usage_lib.entrypoint
45
+ @server_common.check_server_healthy_or_start
46
+ @annotations.client_api
47
+ def ls() -> server_common.RequestId:
48
+ """Lists all volumes."""
49
+ response = requests.get(f'{server_common.get_server_url()}/volumes',
50
+ cookies=server_common.get_api_cookie_jar())
51
+ return server_common.get_request_id(response)
52
+
53
+
54
+ @context.contextual
55
+ @usage_lib.entrypoint
56
+ @server_common.check_server_healthy_or_start
57
+ @annotations.client_api
58
+ def delete(names: List[str]) -> server_common.RequestId:
59
+ """Deletes a volume."""
60
+ body = payloads.VolumeDeleteBody(names=names)
61
+ response = requests.post(f'{server_common.get_server_url()}/volumes/delete',
62
+ json=json.loads(body.model_dump_json()),
63
+ cookies=server_common.get_api_cookie_jar())
64
+ return server_common.get_request_id(response)
File without changes