skypilot-nightly 1.0.0.dev20250520__py3-none-any.whl → 1.0.0.dev20250522__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +4 -1
- sky/backends/cloud_vm_ray_backend.py +56 -37
- sky/check.py +3 -3
- sky/cli.py +89 -16
- sky/client/cli.py +89 -16
- sky/client/sdk.py +92 -4
- sky/clouds/__init__.py +2 -0
- sky/clouds/cloud.py +6 -0
- sky/clouds/gcp.py +156 -21
- sky/clouds/service_catalog/__init__.py +3 -0
- sky/clouds/service_catalog/common.py +9 -2
- sky/clouds/service_catalog/constants.py +1 -0
- sky/core.py +6 -8
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/CzOVV6JpRQBRt5GhZuhyK/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +6 -0
- sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +6 -0
- sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +1 -0
- sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +1 -0
- sky/dashboard/out/_next/static/chunks/{678-206dddca808e6d16.js → 582-683f4f27b81996dc.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +1 -0
- sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/data/storage.py +1 -0
- sky/execution.py +57 -8
- sky/jobs/server/core.py +5 -3
- sky/jobs/utils.py +38 -7
- sky/optimizer.py +41 -39
- sky/provision/gcp/constants.py +147 -4
- sky/provision/gcp/instance_utils.py +10 -0
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/provisioner.py +16 -7
- sky/resources.py +233 -18
- sky/serve/serve_utils.py +5 -13
- sky/serve/server/core.py +2 -4
- sky/server/common.py +60 -14
- sky/server/constants.py +2 -0
- sky/server/html/token_page.html +154 -0
- sky/server/requests/executor.py +3 -6
- sky/server/requests/payloads.py +3 -3
- sky/server/server.py +40 -8
- sky/skypilot_config.py +117 -31
- sky/task.py +24 -1
- sky/templates/gcp-ray.yml.j2 +44 -1
- sky/templates/nebius-ray.yml.j2 +0 -2
- sky/utils/admin_policy_utils.py +26 -22
- sky/utils/cli_utils/status_utils.py +95 -56
- sky/utils/common_utils.py +35 -2
- sky/utils/context.py +36 -6
- sky/utils/context_utils.py +15 -0
- sky/utils/infra_utils.py +175 -0
- sky/utils/resources_utils.py +55 -21
- sky/utils/schemas.py +111 -5
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/RECORD +73 -68
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/WHEEL +1 -1
- sky/dashboard/out/_next/static/8hlc2dkbIDDBOkxtEW7X6/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/845-0ca6f2c1ba667c3b.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- /sky/dashboard/out/_next/static/{8hlc2dkbIDDBOkxtEW7X6 → CzOVV6JpRQBRt5GhZuhyK}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/top_level.txt +0 -0
@@ -33,17 +33,15 @@ class StatusColumn:
|
|
33
33
|
def __init__(self,
|
34
34
|
name: str,
|
35
35
|
calc_func: Callable,
|
36
|
-
|
36
|
+
truncate: bool = True,
|
37
37
|
show_by_default: bool = True):
|
38
38
|
self.name = name
|
39
39
|
self.calc_func = calc_func
|
40
|
-
self.
|
40
|
+
self.truncate: bool = truncate
|
41
41
|
self.show_by_default = show_by_default
|
42
42
|
|
43
43
|
def calc(self, record):
|
44
|
-
val = self.calc_func(record)
|
45
|
-
if self.trunc_length != 0:
|
46
|
-
val = common_utils.truncate_long_string(str(val), self.trunc_length)
|
44
|
+
val = self.calc_func(record, self.truncate)
|
47
45
|
return val
|
48
46
|
|
49
47
|
|
@@ -68,19 +66,20 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
68
66
|
StatusColumn('USER_ID', _get_user_hash, show_by_default=False))
|
69
67
|
|
70
68
|
status_columns += [
|
71
|
-
StatusColumn('
|
72
|
-
StatusColumn('RESOURCES',
|
73
|
-
_get_resources,
|
74
|
-
trunc_length=70 if not show_all else 0),
|
75
|
-
StatusColumn('REGION', _get_region, show_by_default=False),
|
76
|
-
StatusColumn('ZONE', _get_zone, show_by_default=False),
|
69
|
+
StatusColumn('INFRA', _get_infra, truncate=not show_all),
|
70
|
+
StatusColumn('RESOURCES', _get_resources, truncate=not show_all),
|
77
71
|
StatusColumn('STATUS', _get_status_colored),
|
78
72
|
StatusColumn('AUTOSTOP', _get_autostop),
|
79
|
-
StatusColumn('
|
80
|
-
StatusColumn('COMMAND',
|
81
|
-
_get_command,
|
82
|
-
trunc_length=COMMAND_TRUNC_LENGTH if not show_all else 0),
|
73
|
+
StatusColumn('LAUNCHED', _get_launched),
|
83
74
|
]
|
75
|
+
if show_all:
|
76
|
+
status_columns += [
|
77
|
+
StatusColumn('HEAD_IP', _get_head_ip, show_by_default=False),
|
78
|
+
StatusColumn('COMMAND',
|
79
|
+
_get_command,
|
80
|
+
truncate=not show_all,
|
81
|
+
show_by_default=False),
|
82
|
+
]
|
84
83
|
|
85
84
|
columns = []
|
86
85
|
for status_column in status_columns:
|
@@ -160,10 +159,10 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
160
159
|
status_columns = [
|
161
160
|
StatusColumn('NAME', _get_name),
|
162
161
|
StatusColumn('LAUNCHED', _get_launched),
|
163
|
-
StatusColumn('DURATION', _get_duration,
|
162
|
+
StatusColumn('DURATION', _get_duration, truncate=False),
|
164
163
|
StatusColumn('RESOURCES',
|
165
164
|
_get_resources_for_cost_report,
|
166
|
-
|
165
|
+
truncate=False),
|
167
166
|
StatusColumn('STATUS',
|
168
167
|
_get_status_for_cost_report,
|
169
168
|
show_by_default=True),
|
@@ -221,47 +220,68 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
221
220
|
# Some of these lambdas are invoked on both _ClusterRecord and
|
222
221
|
# _ClusterCostReportRecord, which is okay as we guarantee the queried fields
|
223
222
|
# exist in those cases.
|
224
|
-
_get_name = (lambda cluster_record: cluster_record['name'])
|
225
|
-
_get_user_hash = (lambda cluster_record: cluster_record['user_hash'])
|
226
|
-
_get_user_name = (
|
227
|
-
|
223
|
+
_get_name = (lambda cluster_record, _: cluster_record['name'])
|
224
|
+
_get_user_hash = (lambda cluster_record, _: cluster_record['user_hash'])
|
225
|
+
_get_user_name = (
|
226
|
+
lambda cluster_record, _: cluster_record.get('user_name', '-'))
|
227
|
+
_get_launched = (lambda cluster_record, _: log_utils.readable_time_duration(
|
228
228
|
cluster_record['launched_at']))
|
229
|
-
|
230
|
-
lambda clusters_status: clusters_status['handle'].launched_resources.region)
|
231
|
-
_get_command = (lambda cluster_record: cluster_record['last_use'])
|
232
|
-
_get_duration = (lambda cluster_record: log_utils.readable_time_duration(
|
229
|
+
_get_duration = (lambda cluster_record, _: log_utils.readable_time_duration(
|
233
230
|
0, cluster_record['duration'], absolute=True))
|
234
231
|
|
235
232
|
|
236
|
-
def
|
237
|
-
|
238
|
-
|
233
|
+
def _get_command(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
234
|
+
command = cluster_record.get('last_use', '-')
|
235
|
+
if truncate:
|
236
|
+
return common_utils.truncate_long_string(command, COMMAND_TRUNC_LENGTH)
|
237
|
+
return command
|
239
238
|
|
240
|
-
def _get_status_colored(cluster_record: _ClusterRecord) -> str:
|
241
|
-
return _get_status(cluster_record).colored_str()
|
242
239
|
|
240
|
+
def _get_status(cluster_record: _ClusterRecord,
|
241
|
+
truncate: bool = True) -> status_lib.ClusterStatus:
|
242
|
+
del truncate
|
243
|
+
return cluster_record['status']
|
243
244
|
|
244
|
-
def _get_resources(cluster_record: _ClusterRecord) -> str:
|
245
|
-
if 'resources_str' in cluster_record:
|
246
|
-
return cluster_record['resources_str']
|
247
|
-
handle = cluster_record['handle']
|
248
|
-
if isinstance(handle, backends.LocalDockerResourceHandle):
|
249
|
-
resources_str = 'docker'
|
250
|
-
elif isinstance(handle, backends.CloudVmRayResourceHandle):
|
251
|
-
resources_str = resources_utils.get_readable_resources_repr(handle)
|
252
|
-
else:
|
253
|
-
raise ValueError(f'Unknown handle type {type(handle)} encountered.')
|
254
|
-
return resources_str
|
255
245
|
|
246
|
+
def _get_status_colored(cluster_record: _ClusterRecord,
|
247
|
+
truncate: bool = True) -> str:
|
248
|
+
del truncate
|
249
|
+
return _get_status(cluster_record).colored_str()
|
256
250
|
|
257
|
-
def _get_zone(cluster_record: _ClusterRecord) -> str:
|
258
|
-
zone_str = cluster_record['handle'].launched_resources.zone
|
259
|
-
if zone_str is None:
|
260
|
-
zone_str = '-'
|
261
|
-
return zone_str
|
262
251
|
|
252
|
+
def _get_resources(cluster_record: _ClusterRecord,
|
253
|
+
truncate: bool = True) -> str:
|
254
|
+
"""Get the resources information for a cluster.
|
263
255
|
|
264
|
-
|
256
|
+
Returns:
|
257
|
+
A string in one of the following formats:
|
258
|
+
- For cloud VMs: "Nx instance_type" (e.g., "1x m6i.2xlarge")
|
259
|
+
- For K8S/SSH: "Nx (...)"
|
260
|
+
- "-" if no resource information is available
|
261
|
+
"""
|
262
|
+
handle = cluster_record['handle']
|
263
|
+
if isinstance(handle, backends.CloudVmRayResourceHandle):
|
264
|
+
launched_resources = handle.launched_resources
|
265
|
+
if launched_resources is None:
|
266
|
+
return '-'
|
267
|
+
|
268
|
+
# For cloud VMs, show instance type directly
|
269
|
+
# For K8S/SSH, show (...) as the resource type
|
270
|
+
resources_str = cluster_record.get('resources_str', None)
|
271
|
+
if not truncate:
|
272
|
+
resources_str_full = cluster_record.get('resources_str_full', None)
|
273
|
+
if resources_str_full is not None:
|
274
|
+
resources_str = resources_str_full
|
275
|
+
if resources_str is None:
|
276
|
+
resources_str = resources_utils.get_readable_resources_repr(
|
277
|
+
handle, simplify=truncate)
|
278
|
+
|
279
|
+
return resources_str
|
280
|
+
return '-'
|
281
|
+
|
282
|
+
|
283
|
+
def _get_autostop(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
284
|
+
del truncate
|
265
285
|
autostop_str = ''
|
266
286
|
separation = ''
|
267
287
|
if cluster_record['autostop'] >= 0:
|
@@ -276,7 +296,8 @@ def _get_autostop(cluster_record: _ClusterRecord) -> str:
|
|
276
296
|
return autostop_str
|
277
297
|
|
278
298
|
|
279
|
-
def _get_head_ip(cluster_record: _ClusterRecord) -> str:
|
299
|
+
def _get_head_ip(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
300
|
+
del truncate # Unused
|
280
301
|
handle = cluster_record['handle']
|
281
302
|
if not isinstance(handle, backends.CloudVmRayResourceHandle):
|
282
303
|
return '-'
|
@@ -291,6 +312,25 @@ def _is_pending_autostop(cluster_record: _ClusterRecord) -> bool:
|
|
291
312
|
cluster_record) != status_lib.ClusterStatus.STOPPED
|
292
313
|
|
293
314
|
|
315
|
+
def _get_infra(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
316
|
+
"""Get the infrastructure information for a cluster.
|
317
|
+
|
318
|
+
Returns:
|
319
|
+
A string in one of the following formats:
|
320
|
+
- AWS/region (e.g., "AWS/us-east-1")
|
321
|
+
- K8S/context (e.g., "K8S/my-ctx")
|
322
|
+
- SSH/hostname (e.g., "SSH/my-tobi-box")
|
323
|
+
- "-" if no infrastructure information is available
|
324
|
+
"""
|
325
|
+
handle = cluster_record['handle']
|
326
|
+
if isinstance(handle, backends.CloudVmRayResourceHandle):
|
327
|
+
if handle.launched_resources is None:
|
328
|
+
# If launched_resources is None, try to get infra from the record
|
329
|
+
return cluster_record.get('infra', '-')
|
330
|
+
return handle.launched_resources.infra.formatted_str(truncate)
|
331
|
+
return '-'
|
332
|
+
|
333
|
+
|
294
334
|
# ---- 'sky cost-report' helper functions below ----
|
295
335
|
|
296
336
|
|
@@ -347,14 +387,13 @@ def show_kubernetes_cluster_status_table(
|
|
347
387
|
show_all: bool) -> None:
|
348
388
|
"""Compute cluster table values and display for Kubernetes clusters."""
|
349
389
|
status_columns = [
|
350
|
-
StatusColumn('USER', lambda c: c.user),
|
351
|
-
StatusColumn('NAME', lambda c: c.cluster_name),
|
352
|
-
StatusColumn('
|
353
|
-
|
354
|
-
StatusColumn(
|
355
|
-
|
356
|
-
|
357
|
-
StatusColumn('STATUS', lambda c: c.status.colored_str()),
|
390
|
+
StatusColumn('USER', lambda c, _: c.user),
|
391
|
+
StatusColumn('NAME', lambda c, _: c.cluster_name),
|
392
|
+
StatusColumn('RESOURCES', lambda c, _: c.resources_str, truncate=False),
|
393
|
+
StatusColumn('STATUS', lambda c, _: c.status.colored_str()),
|
394
|
+
StatusColumn(
|
395
|
+
'LAUNCHED',
|
396
|
+
lambda c, _: log_utils.readable_time_duration(c.launched_at)),
|
358
397
|
# TODO(romilb): We should consider adding POD_NAME field here when --all
|
359
398
|
# is passed to help users fetch pod name programmatically.
|
360
399
|
]
|
sky/utils/common_utils.py
CHANGED
@@ -723,10 +723,43 @@ def deprecated_function(
|
|
723
723
|
return new_func
|
724
724
|
|
725
725
|
|
726
|
-
def truncate_long_string(s: str,
|
727
|
-
|
726
|
+
def truncate_long_string(s: str,
|
727
|
+
max_length: int = 35,
|
728
|
+
truncate_middle: bool = False) -> str:
|
729
|
+
"""Truncate a string to a maximum length.
|
730
|
+
|
731
|
+
Args:
|
732
|
+
s: String to truncate.
|
733
|
+
max_length: Maximum length of the truncated string.
|
734
|
+
truncate_middle: Whether to truncate in the middle of the string.
|
735
|
+
If True, the middle part of the string is replaced with '...'.
|
736
|
+
If False, truncation happens at the end preserving whole words.
|
737
|
+
|
738
|
+
Returns:
|
739
|
+
Truncated string.
|
740
|
+
"""
|
728
741
|
if len(s) <= max_length:
|
729
742
|
return s
|
743
|
+
|
744
|
+
if truncate_middle:
|
745
|
+
# Reserve 3 characters for '...'
|
746
|
+
if max_length <= 3:
|
747
|
+
return '...'
|
748
|
+
|
749
|
+
# Calculate how many characters to keep from beginning and end
|
750
|
+
half_length = (max_length - 3) // 2
|
751
|
+
remainder = (max_length - 3) % 2
|
752
|
+
|
753
|
+
# Keep one more character at the beginning if max_length - 3 is odd
|
754
|
+
start_length = half_length + remainder
|
755
|
+
end_length = half_length
|
756
|
+
|
757
|
+
# When end_length is 0, just show the start part and '...'
|
758
|
+
if end_length == 0:
|
759
|
+
return s[:start_length] + '...'
|
760
|
+
return s[:start_length] + '...' + s[-end_length:]
|
761
|
+
|
762
|
+
# Original end-truncation logic
|
730
763
|
splits = s.split(' ')
|
731
764
|
if len(splits[0]) > max_length:
|
732
765
|
return splits[0][:max_length] + '...' # Use '…'?
|
sky/utils/context.py
CHANGED
@@ -57,6 +57,7 @@ class Context(object):
|
|
57
57
|
self._log_file = None
|
58
58
|
self._log_file_handle = None
|
59
59
|
self.env_overrides = {}
|
60
|
+
self.config_context = None
|
60
61
|
|
61
62
|
def cancel(self):
|
62
63
|
"""Cancel the context."""
|
@@ -159,17 +160,25 @@ class ContextualEnviron(MutableMapping):
|
|
159
160
|
ctx = get()
|
160
161
|
if ctx is not None:
|
161
162
|
if key in ctx.env_overrides:
|
162
|
-
|
163
|
+
value = ctx.env_overrides[key]
|
164
|
+
# None is used to indicate that the key is deleted in the
|
165
|
+
# context.
|
166
|
+
if value is None:
|
167
|
+
raise KeyError(key)
|
168
|
+
return value
|
163
169
|
return self._environ[key]
|
164
170
|
|
165
171
|
def __iter__(self):
|
166
172
|
ctx = get()
|
173
|
+
deleted_keys = set()
|
167
174
|
if ctx is not None:
|
168
|
-
for key in ctx.env_overrides:
|
175
|
+
for key, value in ctx.env_overrides.items():
|
176
|
+
if value is None:
|
177
|
+
deleted_keys.add(key)
|
169
178
|
yield key
|
170
179
|
for key in self._environ:
|
171
180
|
# Deduplicate the keys
|
172
|
-
if key not in ctx.env_overrides:
|
181
|
+
if key not in ctx.env_overrides and key not in deleted_keys:
|
173
182
|
yield key
|
174
183
|
else:
|
175
184
|
return self._environ.__iter__()
|
@@ -178,10 +187,27 @@ class ContextualEnviron(MutableMapping):
|
|
178
187
|
return len(dict(self))
|
179
188
|
|
180
189
|
def __setitem__(self, key, value):
|
181
|
-
|
190
|
+
ctx = get()
|
191
|
+
if ctx is not None:
|
192
|
+
ctx.env_overrides[key] = value
|
193
|
+
else:
|
194
|
+
self._environ.__setitem__(key, value)
|
182
195
|
|
183
196
|
def __delitem__(self, key):
|
184
|
-
|
197
|
+
ctx = get()
|
198
|
+
if ctx is not None:
|
199
|
+
if key in ctx.env_overrides:
|
200
|
+
del ctx.env_overrides[key]
|
201
|
+
elif key in self._environ:
|
202
|
+
# If the key is not set in the context but set in the environ
|
203
|
+
# of the process, we mark it as deleted in the context by
|
204
|
+
# setting the value to None.
|
205
|
+
ctx.env_overrides[key] = None
|
206
|
+
else:
|
207
|
+
# The key is not set in the context nor the process.
|
208
|
+
raise KeyError(key)
|
209
|
+
else:
|
210
|
+
self._environ.__delitem__(key)
|
185
211
|
|
186
212
|
def __repr__(self):
|
187
213
|
return self._environ.__repr__()
|
@@ -190,7 +216,11 @@ class ContextualEnviron(MutableMapping):
|
|
190
216
|
copied = self._environ.copy()
|
191
217
|
ctx = get()
|
192
218
|
if ctx is not None:
|
193
|
-
|
219
|
+
for key in ctx.env_overrides:
|
220
|
+
if ctx.env_overrides[key] is None:
|
221
|
+
copied.pop(key)
|
222
|
+
else:
|
223
|
+
copied[key] = ctx.env_overrides[key]
|
194
224
|
return copied
|
195
225
|
|
196
226
|
def setdefault(self, key, default=None):
|
sky/utils/context_utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Utilities for SkyPilot context."""
|
2
2
|
import asyncio
|
3
|
+
import contextvars
|
3
4
|
import functools
|
4
5
|
import io
|
5
6
|
import multiprocessing
|
@@ -170,3 +171,17 @@ def cancellation_guard(func: F) -> F:
|
|
170
171
|
return func(*args, **kwargs)
|
171
172
|
|
172
173
|
return typing.cast(F, wrapper)
|
174
|
+
|
175
|
+
|
176
|
+
# TODO(aylei): replace this with asyncio.to_thread once we drop support for
|
177
|
+
# python 3.8
|
178
|
+
def to_thread(func, /, *args, **kwargs):
|
179
|
+
"""Asynchronously run function *func* in a separate thread.
|
180
|
+
|
181
|
+
This is same as asyncio.to_thread added in python 3.9
|
182
|
+
"""
|
183
|
+
loop = asyncio.get_running_loop()
|
184
|
+
# This is critical to pass the current coroutine context to the new thread
|
185
|
+
pyctx = contextvars.copy_context()
|
186
|
+
func_call = functools.partial(pyctx.run, func, *args, **kwargs)
|
187
|
+
return loop.run_in_executor(None, func_call)
|
sky/utils/infra_utils.py
ADDED
@@ -0,0 +1,175 @@
|
|
1
|
+
"""Utility functions for handling infrastructure specifications."""
|
2
|
+
import dataclasses
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from sky.utils import common_utils
|
6
|
+
from sky.utils import ux_utils
|
7
|
+
|
8
|
+
_REGION_OR_ZONE_TRUNCATION_LENGTH = 25
|
9
|
+
|
10
|
+
|
11
|
+
@dataclasses.dataclass
|
12
|
+
class InfraInfo:
|
13
|
+
"""Infrastructure information parsed from infra string.
|
14
|
+
|
15
|
+
When a field is None, it means the field is not specified.
|
16
|
+
"""
|
17
|
+
cloud: Optional[str] = None
|
18
|
+
region: Optional[str] = None
|
19
|
+
zone: Optional[str] = None
|
20
|
+
|
21
|
+
def __init__(self,
|
22
|
+
cloud: Optional[str] = None,
|
23
|
+
region: Optional[str] = None,
|
24
|
+
zone: Optional[str] = None):
|
25
|
+
assert cloud not in ['none', 'None', 'NONE'], 'cloud must be specified'
|
26
|
+
if not cloud or cloud == '*':
|
27
|
+
cloud = None
|
28
|
+
if not region or region == '*':
|
29
|
+
region = None
|
30
|
+
if not zone or zone == '*':
|
31
|
+
zone = None
|
32
|
+
|
33
|
+
self.cloud = cloud
|
34
|
+
self.region = region
|
35
|
+
self.zone = zone
|
36
|
+
|
37
|
+
@staticmethod
|
38
|
+
def from_str(infra: Optional[str]) -> 'InfraInfo':
|
39
|
+
"""Parse the infra string into cloud, region, and zone components.
|
40
|
+
|
41
|
+
The format of the infra string is `cloud`, `cloud/region`, or
|
42
|
+
`cloud/region/zone`. Examples: `aws`, `aws/us-east-1`,
|
43
|
+
`aws/us-east-1/us-east-1a`. For any field, you can use `*` to indicate
|
44
|
+
that any value is acceptable.
|
45
|
+
|
46
|
+
If `*` is used for any field, the InfraInfo will have None for that
|
47
|
+
field.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
infra: A string in the format of `cloud`, `cloud/region`, or
|
51
|
+
`cloud/region/zone`. Examples: `aws`, `aws/us-east-1`,
|
52
|
+
`aws/us-east-1/us-east-1a`.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
An InfraInfo object containing cloud, region, and zone information.
|
56
|
+
|
57
|
+
Raises:
|
58
|
+
ValueError: If the infra string is malformed.
|
59
|
+
"""
|
60
|
+
if infra is None or not infra.strip():
|
61
|
+
return InfraInfo()
|
62
|
+
|
63
|
+
infra = infra.strip().strip('/')
|
64
|
+
|
65
|
+
# Split on / to get cloud, region, zone
|
66
|
+
parts = [p.strip() for p in infra.strip().split('/')]
|
67
|
+
|
68
|
+
if '' in parts:
|
69
|
+
with ux_utils.print_exception_no_traceback():
|
70
|
+
raise ValueError(
|
71
|
+
f'Invalid infra format: {infra}. Format should not contain '
|
72
|
+
'empty parts (e.g., double slashes "//").')
|
73
|
+
|
74
|
+
if not parts or not parts[0]:
|
75
|
+
with ux_utils.print_exception_no_traceback():
|
76
|
+
raise ValueError(
|
77
|
+
f'Invalid infra format: {infra}. Expected format is '
|
78
|
+
'"cloud", "cloud/region", or "cloud/region/zone".')
|
79
|
+
|
80
|
+
cloud_name: Optional[str] = parts[0].lower()
|
81
|
+
|
82
|
+
# Handle Kubernetes contexts specially, as they can contain slashes
|
83
|
+
if cloud_name in ['k8s', 'kubernetes']:
|
84
|
+
# For Kubernetes, the entire string after "k8s/" is the
|
85
|
+
# context name (region)
|
86
|
+
cloud_name = 'kubernetes' # Normalize k8s to kubernetes
|
87
|
+
region = '/'.join(parts[1:]) if len(parts) >= 2 else None
|
88
|
+
zone = None
|
89
|
+
else:
|
90
|
+
# For non-Kubernetes clouds, continue with regular parsing
|
91
|
+
# but be careful to only split into max 3 parts
|
92
|
+
region_zone_parts = parts[1:]
|
93
|
+
region = None
|
94
|
+
zone = None
|
95
|
+
if region_zone_parts:
|
96
|
+
region = region_zone_parts[0]
|
97
|
+
if len(region_zone_parts) > 1:
|
98
|
+
zone = region_zone_parts[1]
|
99
|
+
if len(region_zone_parts) > 2:
|
100
|
+
with ux_utils.print_exception_no_traceback():
|
101
|
+
raise ValueError(
|
102
|
+
f'Invalid infra format: {infra}. Expected format '
|
103
|
+
'is "cloud", "cloud/region", or '
|
104
|
+
'"cloud/region/zone".')
|
105
|
+
|
106
|
+
if cloud_name == '*':
|
107
|
+
cloud_name = None
|
108
|
+
if region == '*':
|
109
|
+
region = None
|
110
|
+
if zone == '*':
|
111
|
+
zone = None
|
112
|
+
return InfraInfo(cloud=cloud_name, region=region, zone=zone)
|
113
|
+
|
114
|
+
def to_str(self) -> Optional[str]:
|
115
|
+
"""Formats cloud, region, and zone into an infra string.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
cloud: The cloud object
|
119
|
+
region: The region name
|
120
|
+
zone: The zone name
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
A formatted infra string, or None if cloud is None or '*'
|
124
|
+
"""
|
125
|
+
cloud = self.cloud
|
126
|
+
region = self.region
|
127
|
+
zone = self.zone
|
128
|
+
|
129
|
+
if cloud is None:
|
130
|
+
cloud = '*'
|
131
|
+
if region is None:
|
132
|
+
region = '*'
|
133
|
+
if zone is None:
|
134
|
+
zone = '*'
|
135
|
+
|
136
|
+
# Build the parts list and filter out trailing wildcards
|
137
|
+
parts = [cloud.lower(), region, zone]
|
138
|
+
while parts and parts[-1] == '*':
|
139
|
+
parts.pop()
|
140
|
+
|
141
|
+
if not parts:
|
142
|
+
return None
|
143
|
+
|
144
|
+
# Join the parts with '/'
|
145
|
+
return '/'.join(parts)
|
146
|
+
|
147
|
+
def formatted_str(self, truncate: bool = True) -> str:
|
148
|
+
"""Formats cloud, region, and zone into an infra string.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
truncate: Whether to truncate the region or zone
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
A formatted infra string, or None if cloud is None or '*'
|
155
|
+
"""
|
156
|
+
if self.cloud is None or self.cloud == '*':
|
157
|
+
return '-'
|
158
|
+
|
159
|
+
region_or_zone = None
|
160
|
+
if self.zone is not None and self.zone != '*':
|
161
|
+
region_or_zone = self.zone
|
162
|
+
elif self.region is not None and self.region != '*':
|
163
|
+
region_or_zone = self.region
|
164
|
+
|
165
|
+
if region_or_zone is not None and truncate:
|
166
|
+
region_or_zone = common_utils.truncate_long_string(
|
167
|
+
region_or_zone,
|
168
|
+
_REGION_OR_ZONE_TRUNCATION_LENGTH,
|
169
|
+
truncate_middle=True)
|
170
|
+
|
171
|
+
formatted_str = f'{self.cloud}'
|
172
|
+
if region_or_zone is not None:
|
173
|
+
formatted_str += f' ({region_or_zone})'
|
174
|
+
|
175
|
+
return formatted_str
|
sky/utils/resources_utils.py
CHANGED
@@ -4,11 +4,11 @@ import enum
|
|
4
4
|
import itertools
|
5
5
|
import json
|
6
6
|
import math
|
7
|
-
import re
|
8
7
|
import typing
|
9
8
|
from typing import Dict, List, Optional, Set, Union
|
10
9
|
|
11
10
|
from sky import skypilot_config
|
11
|
+
from sky.utils import common_utils
|
12
12
|
from sky.utils import registry
|
13
13
|
from sky.utils import ux_utils
|
14
14
|
|
@@ -50,6 +50,20 @@ class DiskTier(enum.Enum):
|
|
50
50
|
return types.index(self) <= types.index(other)
|
51
51
|
|
52
52
|
|
53
|
+
class StorageType(enum.Enum):
|
54
|
+
"""Storage type."""
|
55
|
+
# Durable network storage, e.g. GCP persistent disks
|
56
|
+
NETWORK = 'network'
|
57
|
+
# Local instance storage, e.g. GCP local SSDs
|
58
|
+
INSTANCE = 'instance'
|
59
|
+
|
60
|
+
|
61
|
+
class DiskAttachMode(enum.Enum):
|
62
|
+
"""Disk attach mode."""
|
63
|
+
READ_ONLY = 'read_only'
|
64
|
+
READ_WRITE = 'read_write'
|
65
|
+
|
66
|
+
|
53
67
|
@dataclasses.dataclass
|
54
68
|
class ClusterName:
|
55
69
|
display_name: str
|
@@ -139,34 +153,54 @@ def simplify_ports(ports: List[str]) -> List[str]:
|
|
139
153
|
|
140
154
|
def format_resource(resource: 'resources_lib.Resources',
|
141
155
|
simplify: bool = False) -> str:
|
156
|
+
resource = resource.assert_launchable()
|
157
|
+
vcpu, mem = resource.cloud.get_vcpus_mem_from_instance_type(
|
158
|
+
resource.instance_type)
|
159
|
+
|
160
|
+
components = []
|
161
|
+
|
162
|
+
if resource.accelerators is not None:
|
163
|
+
acc, count = list(resource.accelerators.items())[0]
|
164
|
+
components.append(f'gpus={acc}:{count}')
|
165
|
+
|
166
|
+
is_k8s = str(resource.cloud).lower() == 'kubernetes'
|
167
|
+
if (resource.accelerators is None or is_k8s or not simplify):
|
168
|
+
if vcpu is not None:
|
169
|
+
components.append(f'cpus={int(vcpu)}')
|
170
|
+
if mem is not None:
|
171
|
+
components.append(f'mem={int(mem)}')
|
172
|
+
|
173
|
+
instance_type = resource.instance_type
|
142
174
|
if simplify:
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
assert vcpu is not None, 'vCPU must be specified'
|
149
|
-
hardware = f'vCPU={int(vcpu)}'
|
150
|
-
else:
|
151
|
-
hardware = f'{resource.accelerators}'
|
152
|
-
spot = '[Spot]' if resource.use_spot else ''
|
153
|
-
return f'{cloud}({spot}{hardware})'
|
175
|
+
instance_type = common_utils.truncate_long_string(instance_type, 15)
|
176
|
+
if not is_k8s:
|
177
|
+
components.append(instance_type)
|
178
|
+
if simplify:
|
179
|
+
components.append('...')
|
154
180
|
else:
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
181
|
+
image_id = resource.image_id
|
182
|
+
if image_id is not None:
|
183
|
+
if None in image_id:
|
184
|
+
components.append(f'image_id={image_id[None]}')
|
185
|
+
else:
|
186
|
+
components.append(f'image_id={image_id}')
|
187
|
+
components.append(f'disk={resource.disk_size}')
|
188
|
+
disk_tier = resource.disk_tier
|
189
|
+
if disk_tier is not None:
|
190
|
+
components.append(f'disk_tier={disk_tier.value}')
|
191
|
+
ports = resource.ports
|
192
|
+
if ports is not None:
|
193
|
+
components.append(f'ports={ports}')
|
194
|
+
|
195
|
+
spot = '[spot]' if resource.use_spot else ''
|
196
|
+
return f'{spot}({"" if not components else ", ".join(components)})'
|
163
197
|
|
164
198
|
|
165
199
|
def get_readable_resources_repr(handle: 'backends.CloudVmRayResourceHandle',
|
166
200
|
simplify: bool = False) -> str:
|
167
201
|
if (handle.launched_nodes is not None and
|
168
202
|
handle.launched_resources is not None):
|
169
|
-
return (f'{handle.launched_nodes}x
|
203
|
+
return (f'{handle.launched_nodes}x'
|
170
204
|
f'{format_resource(handle.launched_resources, simplify)}')
|
171
205
|
return _DEFAULT_MESSAGE_HANDLE_INITIALIZING
|
172
206
|
|