skypilot-nightly 1.0.0.dev20250520__py3-none-any.whl → 1.0.0.dev20250522__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +4 -1
  3. sky/backends/cloud_vm_ray_backend.py +56 -37
  4. sky/check.py +3 -3
  5. sky/cli.py +89 -16
  6. sky/client/cli.py +89 -16
  7. sky/client/sdk.py +92 -4
  8. sky/clouds/__init__.py +2 -0
  9. sky/clouds/cloud.py +6 -0
  10. sky/clouds/gcp.py +156 -21
  11. sky/clouds/service_catalog/__init__.py +3 -0
  12. sky/clouds/service_catalog/common.py +9 -2
  13. sky/clouds/service_catalog/constants.py +1 -0
  14. sky/core.py +6 -8
  15. sky/dashboard/out/404.html +1 -1
  16. sky/dashboard/out/_next/static/CzOVV6JpRQBRt5GhZuhyK/_buildManifest.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/236-1a3a9440417720eb.js +6 -0
  18. sky/dashboard/out/_next/static/chunks/37-d584022b0da4ac3b.js +6 -0
  19. sky/dashboard/out/_next/static/chunks/393-e1eaa440481337ec.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/480-f28cd152a98997de.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/{678-206dddca808e6d16.js → 582-683f4f27b81996dc.js} +2 -2
  22. sky/dashboard/out/_next/static/chunks/pages/_app-8cfab319f9fb3ae8.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33bc2bec322249b1.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-e2fc2dd1955e6c36.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/pages/clusters-3a748bd76e5c2984.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-70756c2dad850a7e.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/pages/jobs-ecd804b9272f4a7c.js +1 -0
  29. sky/dashboard/out/_next/static/css/7e7ce4ff31d3977b.css +3 -0
  30. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  31. sky/dashboard/out/clusters/[cluster].html +1 -1
  32. sky/dashboard/out/clusters.html +1 -1
  33. sky/dashboard/out/index.html +1 -1
  34. sky/dashboard/out/infra.html +1 -0
  35. sky/dashboard/out/jobs/[job].html +1 -1
  36. sky/dashboard/out/jobs.html +1 -1
  37. sky/data/storage.py +1 -0
  38. sky/execution.py +57 -8
  39. sky/jobs/server/core.py +5 -3
  40. sky/jobs/utils.py +38 -7
  41. sky/optimizer.py +41 -39
  42. sky/provision/gcp/constants.py +147 -4
  43. sky/provision/gcp/instance_utils.py +10 -0
  44. sky/provision/gcp/volume_utils.py +247 -0
  45. sky/provision/provisioner.py +16 -7
  46. sky/resources.py +233 -18
  47. sky/serve/serve_utils.py +5 -13
  48. sky/serve/server/core.py +2 -4
  49. sky/server/common.py +60 -14
  50. sky/server/constants.py +2 -0
  51. sky/server/html/token_page.html +154 -0
  52. sky/server/requests/executor.py +3 -6
  53. sky/server/requests/payloads.py +3 -3
  54. sky/server/server.py +40 -8
  55. sky/skypilot_config.py +117 -31
  56. sky/task.py +24 -1
  57. sky/templates/gcp-ray.yml.j2 +44 -1
  58. sky/templates/nebius-ray.yml.j2 +0 -2
  59. sky/utils/admin_policy_utils.py +26 -22
  60. sky/utils/cli_utils/status_utils.py +95 -56
  61. sky/utils/common_utils.py +35 -2
  62. sky/utils/context.py +36 -6
  63. sky/utils/context_utils.py +15 -0
  64. sky/utils/infra_utils.py +175 -0
  65. sky/utils/resources_utils.py +55 -21
  66. sky/utils/schemas.py +111 -5
  67. {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/METADATA +1 -1
  68. {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/RECORD +73 -68
  69. {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/WHEEL +1 -1
  70. sky/dashboard/out/_next/static/8hlc2dkbIDDBOkxtEW7X6/_buildManifest.js +0 -1
  71. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  72. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  73. sky/dashboard/out/_next/static/chunks/845-0ca6f2c1ba667c3b.js +0 -1
  74. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  75. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  76. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  77. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  78. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  79. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  80. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  81. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  82. /sky/dashboard/out/_next/static/{8hlc2dkbIDDBOkxtEW7X6 → CzOVV6JpRQBRt5GhZuhyK}/_ssgManifest.js +0 -0
  83. {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/entry_points.txt +0 -0
  84. {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/licenses/LICENSE +0 -0
  85. {skypilot_nightly-1.0.0.dev20250520.dist-info → skypilot_nightly-1.0.0.dev20250522.dist-info}/top_level.txt +0 -0
@@ -33,17 +33,15 @@ class StatusColumn:
33
33
  def __init__(self,
34
34
  name: str,
35
35
  calc_func: Callable,
36
- trunc_length: int = 0,
36
+ truncate: bool = True,
37
37
  show_by_default: bool = True):
38
38
  self.name = name
39
39
  self.calc_func = calc_func
40
- self.trunc_length = trunc_length
40
+ self.truncate: bool = truncate
41
41
  self.show_by_default = show_by_default
42
42
 
43
43
  def calc(self, record):
44
- val = self.calc_func(record)
45
- if self.trunc_length != 0:
46
- val = common_utils.truncate_long_string(str(val), self.trunc_length)
44
+ val = self.calc_func(record, self.truncate)
47
45
  return val
48
46
 
49
47
 
@@ -68,19 +66,20 @@ def show_status_table(cluster_records: List[_ClusterRecord],
68
66
  StatusColumn('USER_ID', _get_user_hash, show_by_default=False))
69
67
 
70
68
  status_columns += [
71
- StatusColumn('LAUNCHED', _get_launched),
72
- StatusColumn('RESOURCES',
73
- _get_resources,
74
- trunc_length=70 if not show_all else 0),
75
- StatusColumn('REGION', _get_region, show_by_default=False),
76
- StatusColumn('ZONE', _get_zone, show_by_default=False),
69
+ StatusColumn('INFRA', _get_infra, truncate=not show_all),
70
+ StatusColumn('RESOURCES', _get_resources, truncate=not show_all),
77
71
  StatusColumn('STATUS', _get_status_colored),
78
72
  StatusColumn('AUTOSTOP', _get_autostop),
79
- StatusColumn('HEAD_IP', _get_head_ip, show_by_default=False),
80
- StatusColumn('COMMAND',
81
- _get_command,
82
- trunc_length=COMMAND_TRUNC_LENGTH if not show_all else 0),
73
+ StatusColumn('LAUNCHED', _get_launched),
83
74
  ]
75
+ if show_all:
76
+ status_columns += [
77
+ StatusColumn('HEAD_IP', _get_head_ip, show_by_default=False),
78
+ StatusColumn('COMMAND',
79
+ _get_command,
80
+ truncate=not show_all,
81
+ show_by_default=False),
82
+ ]
84
83
 
85
84
  columns = []
86
85
  for status_column in status_columns:
@@ -160,10 +159,10 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
160
159
  status_columns = [
161
160
  StatusColumn('NAME', _get_name),
162
161
  StatusColumn('LAUNCHED', _get_launched),
163
- StatusColumn('DURATION', _get_duration, trunc_length=20),
162
+ StatusColumn('DURATION', _get_duration, truncate=False),
164
163
  StatusColumn('RESOURCES',
165
164
  _get_resources_for_cost_report,
166
- trunc_length=70 if not show_all else 0),
165
+ truncate=False),
167
166
  StatusColumn('STATUS',
168
167
  _get_status_for_cost_report,
169
168
  show_by_default=True),
@@ -221,47 +220,68 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
221
220
  # Some of these lambdas are invoked on both _ClusterRecord and
222
221
  # _ClusterCostReportRecord, which is okay as we guarantee the queried fields
223
222
  # exist in those cases.
224
- _get_name = (lambda cluster_record: cluster_record['name'])
225
- _get_user_hash = (lambda cluster_record: cluster_record['user_hash'])
226
- _get_user_name = (lambda cluster_record: cluster_record.get('user_name', '-'))
227
- _get_launched = (lambda cluster_record: log_utils.readable_time_duration(
223
+ _get_name = (lambda cluster_record, _: cluster_record['name'])
224
+ _get_user_hash = (lambda cluster_record, _: cluster_record['user_hash'])
225
+ _get_user_name = (
226
+ lambda cluster_record, _: cluster_record.get('user_name', '-'))
227
+ _get_launched = (lambda cluster_record, _: log_utils.readable_time_duration(
228
228
  cluster_record['launched_at']))
229
- _get_region = (
230
- lambda clusters_status: clusters_status['handle'].launched_resources.region)
231
- _get_command = (lambda cluster_record: cluster_record['last_use'])
232
- _get_duration = (lambda cluster_record: log_utils.readable_time_duration(
229
+ _get_duration = (lambda cluster_record, _: log_utils.readable_time_duration(
233
230
  0, cluster_record['duration'], absolute=True))
234
231
 
235
232
 
236
- def _get_status(cluster_record: _ClusterRecord) -> status_lib.ClusterStatus:
237
- return cluster_record['status']
238
-
233
+ def _get_command(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
234
+ command = cluster_record.get('last_use', '-')
235
+ if truncate:
236
+ return common_utils.truncate_long_string(command, COMMAND_TRUNC_LENGTH)
237
+ return command
239
238
 
240
- def _get_status_colored(cluster_record: _ClusterRecord) -> str:
241
- return _get_status(cluster_record).colored_str()
242
239
 
240
+ def _get_status(cluster_record: _ClusterRecord,
241
+ truncate: bool = True) -> status_lib.ClusterStatus:
242
+ del truncate
243
+ return cluster_record['status']
243
244
 
244
- def _get_resources(cluster_record: _ClusterRecord) -> str:
245
- if 'resources_str' in cluster_record:
246
- return cluster_record['resources_str']
247
- handle = cluster_record['handle']
248
- if isinstance(handle, backends.LocalDockerResourceHandle):
249
- resources_str = 'docker'
250
- elif isinstance(handle, backends.CloudVmRayResourceHandle):
251
- resources_str = resources_utils.get_readable_resources_repr(handle)
252
- else:
253
- raise ValueError(f'Unknown handle type {type(handle)} encountered.')
254
- return resources_str
255
245
 
246
+ def _get_status_colored(cluster_record: _ClusterRecord,
247
+ truncate: bool = True) -> str:
248
+ del truncate
249
+ return _get_status(cluster_record).colored_str()
256
250
 
257
- def _get_zone(cluster_record: _ClusterRecord) -> str:
258
- zone_str = cluster_record['handle'].launched_resources.zone
259
- if zone_str is None:
260
- zone_str = '-'
261
- return zone_str
262
251
 
252
+ def _get_resources(cluster_record: _ClusterRecord,
253
+ truncate: bool = True) -> str:
254
+ """Get the resources information for a cluster.
263
255
 
264
- def _get_autostop(cluster_record: _ClusterRecord) -> str:
256
+ Returns:
257
+ A string in one of the following formats:
258
+ - For cloud VMs: "Nx instance_type" (e.g., "1x m6i.2xlarge")
259
+ - For K8S/SSH: "Nx (...)"
260
+ - "-" if no resource information is available
261
+ """
262
+ handle = cluster_record['handle']
263
+ if isinstance(handle, backends.CloudVmRayResourceHandle):
264
+ launched_resources = handle.launched_resources
265
+ if launched_resources is None:
266
+ return '-'
267
+
268
+ # For cloud VMs, show instance type directly
269
+ # For K8S/SSH, show (...) as the resource type
270
+ resources_str = cluster_record.get('resources_str', None)
271
+ if not truncate:
272
+ resources_str_full = cluster_record.get('resources_str_full', None)
273
+ if resources_str_full is not None:
274
+ resources_str = resources_str_full
275
+ if resources_str is None:
276
+ resources_str = resources_utils.get_readable_resources_repr(
277
+ handle, simplify=truncate)
278
+
279
+ return resources_str
280
+ return '-'
281
+
282
+
283
+ def _get_autostop(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
284
+ del truncate
265
285
  autostop_str = ''
266
286
  separation = ''
267
287
  if cluster_record['autostop'] >= 0:
@@ -276,7 +296,8 @@ def _get_autostop(cluster_record: _ClusterRecord) -> str:
276
296
  return autostop_str
277
297
 
278
298
 
279
- def _get_head_ip(cluster_record: _ClusterRecord) -> str:
299
+ def _get_head_ip(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
300
+ del truncate # Unused
280
301
  handle = cluster_record['handle']
281
302
  if not isinstance(handle, backends.CloudVmRayResourceHandle):
282
303
  return '-'
@@ -291,6 +312,25 @@ def _is_pending_autostop(cluster_record: _ClusterRecord) -> bool:
291
312
  cluster_record) != status_lib.ClusterStatus.STOPPED
292
313
 
293
314
 
315
+ def _get_infra(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
316
+ """Get the infrastructure information for a cluster.
317
+
318
+ Returns:
319
+ A string in one of the following formats:
320
+ - AWS/region (e.g., "AWS/us-east-1")
321
+ - K8S/context (e.g., "K8S/my-ctx")
322
+ - SSH/hostname (e.g., "SSH/my-tobi-box")
323
+ - "-" if no infrastructure information is available
324
+ """
325
+ handle = cluster_record['handle']
326
+ if isinstance(handle, backends.CloudVmRayResourceHandle):
327
+ if handle.launched_resources is None:
328
+ # If launched_resources is None, try to get infra from the record
329
+ return cluster_record.get('infra', '-')
330
+ return handle.launched_resources.infra.formatted_str(truncate)
331
+ return '-'
332
+
333
+
294
334
  # ---- 'sky cost-report' helper functions below ----
295
335
 
296
336
 
@@ -347,14 +387,13 @@ def show_kubernetes_cluster_status_table(
347
387
  show_all: bool) -> None:
348
388
  """Compute cluster table values and display for Kubernetes clusters."""
349
389
  status_columns = [
350
- StatusColumn('USER', lambda c: c.user),
351
- StatusColumn('NAME', lambda c: c.cluster_name),
352
- StatusColumn('LAUNCHED',
353
- lambda c: log_utils.readable_time_duration(c.launched_at)),
354
- StatusColumn('RESOURCES',
355
- lambda c: c.resources_str,
356
- trunc_length=70 if not show_all else 0),
357
- StatusColumn('STATUS', lambda c: c.status.colored_str()),
390
+ StatusColumn('USER', lambda c, _: c.user),
391
+ StatusColumn('NAME', lambda c, _: c.cluster_name),
392
+ StatusColumn('RESOURCES', lambda c, _: c.resources_str, truncate=False),
393
+ StatusColumn('STATUS', lambda c, _: c.status.colored_str()),
394
+ StatusColumn(
395
+ 'LAUNCHED',
396
+ lambda c, _: log_utils.readable_time_duration(c.launched_at)),
358
397
  # TODO(romilb): We should consider adding POD_NAME field here when --all
359
398
  # is passed to help users fetch pod name programmatically.
360
399
  ]
sky/utils/common_utils.py CHANGED
@@ -723,10 +723,43 @@ def deprecated_function(
723
723
  return new_func
724
724
 
725
725
 
726
- def truncate_long_string(s: str, max_length: int = 35) -> str:
727
- """Truncate a string to a maximum length, preserving whole words."""
726
+ def truncate_long_string(s: str,
727
+ max_length: int = 35,
728
+ truncate_middle: bool = False) -> str:
729
+ """Truncate a string to a maximum length.
730
+
731
+ Args:
732
+ s: String to truncate.
733
+ max_length: Maximum length of the truncated string.
734
+ truncate_middle: Whether to truncate in the middle of the string.
735
+ If True, the middle part of the string is replaced with '...'.
736
+ If False, truncation happens at the end preserving whole words.
737
+
738
+ Returns:
739
+ Truncated string.
740
+ """
728
741
  if len(s) <= max_length:
729
742
  return s
743
+
744
+ if truncate_middle:
745
+ # Reserve 3 characters for '...'
746
+ if max_length <= 3:
747
+ return '...'
748
+
749
+ # Calculate how many characters to keep from beginning and end
750
+ half_length = (max_length - 3) // 2
751
+ remainder = (max_length - 3) % 2
752
+
753
+ # Keep one more character at the beginning if max_length - 3 is odd
754
+ start_length = half_length + remainder
755
+ end_length = half_length
756
+
757
+ # When end_length is 0, just show the start part and '...'
758
+ if end_length == 0:
759
+ return s[:start_length] + '...'
760
+ return s[:start_length] + '...' + s[-end_length:]
761
+
762
+ # Original end-truncation logic
730
763
  splits = s.split(' ')
731
764
  if len(splits[0]) > max_length:
732
765
  return splits[0][:max_length] + '...' # Use '…'?
sky/utils/context.py CHANGED
@@ -57,6 +57,7 @@ class Context(object):
57
57
  self._log_file = None
58
58
  self._log_file_handle = None
59
59
  self.env_overrides = {}
60
+ self.config_context = None
60
61
 
61
62
  def cancel(self):
62
63
  """Cancel the context."""
@@ -159,17 +160,25 @@ class ContextualEnviron(MutableMapping):
159
160
  ctx = get()
160
161
  if ctx is not None:
161
162
  if key in ctx.env_overrides:
162
- return ctx.env_overrides[key]
163
+ value = ctx.env_overrides[key]
164
+ # None is used to indicate that the key is deleted in the
165
+ # context.
166
+ if value is None:
167
+ raise KeyError(key)
168
+ return value
163
169
  return self._environ[key]
164
170
 
165
171
  def __iter__(self):
166
172
  ctx = get()
173
+ deleted_keys = set()
167
174
  if ctx is not None:
168
- for key in ctx.env_overrides:
175
+ for key, value in ctx.env_overrides.items():
176
+ if value is None:
177
+ deleted_keys.add(key)
169
178
  yield key
170
179
  for key in self._environ:
171
180
  # Deduplicate the keys
172
- if key not in ctx.env_overrides:
181
+ if key not in ctx.env_overrides and key not in deleted_keys:
173
182
  yield key
174
183
  else:
175
184
  return self._environ.__iter__()
@@ -178,10 +187,27 @@ class ContextualEnviron(MutableMapping):
178
187
  return len(dict(self))
179
188
 
180
189
  def __setitem__(self, key, value):
181
- return self._environ.__setitem__(key, value)
190
+ ctx = get()
191
+ if ctx is not None:
192
+ ctx.env_overrides[key] = value
193
+ else:
194
+ self._environ.__setitem__(key, value)
182
195
 
183
196
  def __delitem__(self, key):
184
- return self._environ.__delitem__(key)
197
+ ctx = get()
198
+ if ctx is not None:
199
+ if key in ctx.env_overrides:
200
+ del ctx.env_overrides[key]
201
+ elif key in self._environ:
202
+ # If the key is not set in the context but set in the environ
203
+ # of the process, we mark it as deleted in the context by
204
+ # setting the value to None.
205
+ ctx.env_overrides[key] = None
206
+ else:
207
+ # The key is not set in the context nor the process.
208
+ raise KeyError(key)
209
+ else:
210
+ self._environ.__delitem__(key)
185
211
 
186
212
  def __repr__(self):
187
213
  return self._environ.__repr__()
@@ -190,7 +216,11 @@ class ContextualEnviron(MutableMapping):
190
216
  copied = self._environ.copy()
191
217
  ctx = get()
192
218
  if ctx is not None:
193
- copied.update(ctx.env_overrides)
219
+ for key in ctx.env_overrides:
220
+ if ctx.env_overrides[key] is None:
221
+ copied.pop(key)
222
+ else:
223
+ copied[key] = ctx.env_overrides[key]
194
224
  return copied
195
225
 
196
226
  def setdefault(self, key, default=None):
@@ -1,5 +1,6 @@
1
1
  """Utilities for SkyPilot context."""
2
2
  import asyncio
3
+ import contextvars
3
4
  import functools
4
5
  import io
5
6
  import multiprocessing
@@ -170,3 +171,17 @@ def cancellation_guard(func: F) -> F:
170
171
  return func(*args, **kwargs)
171
172
 
172
173
  return typing.cast(F, wrapper)
174
+
175
+
176
+ # TODO(aylei): replace this with asyncio.to_thread once we drop support for
177
+ # python 3.8
178
+ def to_thread(func, /, *args, **kwargs):
179
+ """Asynchronously run function *func* in a separate thread.
180
+
181
+ This is same as asyncio.to_thread added in python 3.9
182
+ """
183
+ loop = asyncio.get_running_loop()
184
+ # This is critical to pass the current coroutine context to the new thread
185
+ pyctx = contextvars.copy_context()
186
+ func_call = functools.partial(pyctx.run, func, *args, **kwargs)
187
+ return loop.run_in_executor(None, func_call)
@@ -0,0 +1,175 @@
1
+ """Utility functions for handling infrastructure specifications."""
2
+ import dataclasses
3
+ from typing import Optional
4
+
5
+ from sky.utils import common_utils
6
+ from sky.utils import ux_utils
7
+
8
+ _REGION_OR_ZONE_TRUNCATION_LENGTH = 25
9
+
10
+
11
+ @dataclasses.dataclass
12
+ class InfraInfo:
13
+ """Infrastructure information parsed from infra string.
14
+
15
+ When a field is None, it means the field is not specified.
16
+ """
17
+ cloud: Optional[str] = None
18
+ region: Optional[str] = None
19
+ zone: Optional[str] = None
20
+
21
+ def __init__(self,
22
+ cloud: Optional[str] = None,
23
+ region: Optional[str] = None,
24
+ zone: Optional[str] = None):
25
+ assert cloud not in ['none', 'None', 'NONE'], 'cloud must be specified'
26
+ if not cloud or cloud == '*':
27
+ cloud = None
28
+ if not region or region == '*':
29
+ region = None
30
+ if not zone or zone == '*':
31
+ zone = None
32
+
33
+ self.cloud = cloud
34
+ self.region = region
35
+ self.zone = zone
36
+
37
+ @staticmethod
38
+ def from_str(infra: Optional[str]) -> 'InfraInfo':
39
+ """Parse the infra string into cloud, region, and zone components.
40
+
41
+ The format of the infra string is `cloud`, `cloud/region`, or
42
+ `cloud/region/zone`. Examples: `aws`, `aws/us-east-1`,
43
+ `aws/us-east-1/us-east-1a`. For any field, you can use `*` to indicate
44
+ that any value is acceptable.
45
+
46
+ If `*` is used for any field, the InfraInfo will have None for that
47
+ field.
48
+
49
+ Args:
50
+ infra: A string in the format of `cloud`, `cloud/region`, or
51
+ `cloud/region/zone`. Examples: `aws`, `aws/us-east-1`,
52
+ `aws/us-east-1/us-east-1a`.
53
+
54
+ Returns:
55
+ An InfraInfo object containing cloud, region, and zone information.
56
+
57
+ Raises:
58
+ ValueError: If the infra string is malformed.
59
+ """
60
+ if infra is None or not infra.strip():
61
+ return InfraInfo()
62
+
63
+ infra = infra.strip().strip('/')
64
+
65
+ # Split on / to get cloud, region, zone
66
+ parts = [p.strip() for p in infra.strip().split('/')]
67
+
68
+ if '' in parts:
69
+ with ux_utils.print_exception_no_traceback():
70
+ raise ValueError(
71
+ f'Invalid infra format: {infra}. Format should not contain '
72
+ 'empty parts (e.g., double slashes "//").')
73
+
74
+ if not parts or not parts[0]:
75
+ with ux_utils.print_exception_no_traceback():
76
+ raise ValueError(
77
+ f'Invalid infra format: {infra}. Expected format is '
78
+ '"cloud", "cloud/region", or "cloud/region/zone".')
79
+
80
+ cloud_name: Optional[str] = parts[0].lower()
81
+
82
+ # Handle Kubernetes contexts specially, as they can contain slashes
83
+ if cloud_name in ['k8s', 'kubernetes']:
84
+ # For Kubernetes, the entire string after "k8s/" is the
85
+ # context name (region)
86
+ cloud_name = 'kubernetes' # Normalize k8s to kubernetes
87
+ region = '/'.join(parts[1:]) if len(parts) >= 2 else None
88
+ zone = None
89
+ else:
90
+ # For non-Kubernetes clouds, continue with regular parsing
91
+ # but be careful to only split into max 3 parts
92
+ region_zone_parts = parts[1:]
93
+ region = None
94
+ zone = None
95
+ if region_zone_parts:
96
+ region = region_zone_parts[0]
97
+ if len(region_zone_parts) > 1:
98
+ zone = region_zone_parts[1]
99
+ if len(region_zone_parts) > 2:
100
+ with ux_utils.print_exception_no_traceback():
101
+ raise ValueError(
102
+ f'Invalid infra format: {infra}. Expected format '
103
+ 'is "cloud", "cloud/region", or '
104
+ '"cloud/region/zone".')
105
+
106
+ if cloud_name == '*':
107
+ cloud_name = None
108
+ if region == '*':
109
+ region = None
110
+ if zone == '*':
111
+ zone = None
112
+ return InfraInfo(cloud=cloud_name, region=region, zone=zone)
113
+
114
+ def to_str(self) -> Optional[str]:
115
+ """Formats cloud, region, and zone into an infra string.
116
+
117
+ Args:
118
+ cloud: The cloud object
119
+ region: The region name
120
+ zone: The zone name
121
+
122
+ Returns:
123
+ A formatted infra string, or None if cloud is None or '*'
124
+ """
125
+ cloud = self.cloud
126
+ region = self.region
127
+ zone = self.zone
128
+
129
+ if cloud is None:
130
+ cloud = '*'
131
+ if region is None:
132
+ region = '*'
133
+ if zone is None:
134
+ zone = '*'
135
+
136
+ # Build the parts list and filter out trailing wildcards
137
+ parts = [cloud.lower(), region, zone]
138
+ while parts and parts[-1] == '*':
139
+ parts.pop()
140
+
141
+ if not parts:
142
+ return None
143
+
144
+ # Join the parts with '/'
145
+ return '/'.join(parts)
146
+
147
+ def formatted_str(self, truncate: bool = True) -> str:
148
+ """Formats cloud, region, and zone into an infra string.
149
+
150
+ Args:
151
+ truncate: Whether to truncate the region or zone
152
+
153
+ Returns:
154
+ A formatted infra string, or None if cloud is None or '*'
155
+ """
156
+ if self.cloud is None or self.cloud == '*':
157
+ return '-'
158
+
159
+ region_or_zone = None
160
+ if self.zone is not None and self.zone != '*':
161
+ region_or_zone = self.zone
162
+ elif self.region is not None and self.region != '*':
163
+ region_or_zone = self.region
164
+
165
+ if region_or_zone is not None and truncate:
166
+ region_or_zone = common_utils.truncate_long_string(
167
+ region_or_zone,
168
+ _REGION_OR_ZONE_TRUNCATION_LENGTH,
169
+ truncate_middle=True)
170
+
171
+ formatted_str = f'{self.cloud}'
172
+ if region_or_zone is not None:
173
+ formatted_str += f' ({region_or_zone})'
174
+
175
+ return formatted_str
@@ -4,11 +4,11 @@ import enum
4
4
  import itertools
5
5
  import json
6
6
  import math
7
- import re
8
7
  import typing
9
8
  from typing import Dict, List, Optional, Set, Union
10
9
 
11
10
  from sky import skypilot_config
11
+ from sky.utils import common_utils
12
12
  from sky.utils import registry
13
13
  from sky.utils import ux_utils
14
14
 
@@ -50,6 +50,20 @@ class DiskTier(enum.Enum):
50
50
  return types.index(self) <= types.index(other)
51
51
 
52
52
 
53
+ class StorageType(enum.Enum):
54
+ """Storage type."""
55
+ # Durable network storage, e.g. GCP persistent disks
56
+ NETWORK = 'network'
57
+ # Local instance storage, e.g. GCP local SSDs
58
+ INSTANCE = 'instance'
59
+
60
+
61
+ class DiskAttachMode(enum.Enum):
62
+ """Disk attach mode."""
63
+ READ_ONLY = 'read_only'
64
+ READ_WRITE = 'read_write'
65
+
66
+
53
67
  @dataclasses.dataclass
54
68
  class ClusterName:
55
69
  display_name: str
@@ -139,34 +153,54 @@ def simplify_ports(ports: List[str]) -> List[str]:
139
153
 
140
154
  def format_resource(resource: 'resources_lib.Resources',
141
155
  simplify: bool = False) -> str:
156
+ resource = resource.assert_launchable()
157
+ vcpu, mem = resource.cloud.get_vcpus_mem_from_instance_type(
158
+ resource.instance_type)
159
+
160
+ components = []
161
+
162
+ if resource.accelerators is not None:
163
+ acc, count = list(resource.accelerators.items())[0]
164
+ components.append(f'gpus={acc}:{count}')
165
+
166
+ is_k8s = str(resource.cloud).lower() == 'kubernetes'
167
+ if (resource.accelerators is None or is_k8s or not simplify):
168
+ if vcpu is not None:
169
+ components.append(f'cpus={int(vcpu)}')
170
+ if mem is not None:
171
+ components.append(f'mem={int(mem)}')
172
+
173
+ instance_type = resource.instance_type
142
174
  if simplify:
143
- resource = resource.assert_launchable()
144
- cloud = resource.cloud
145
- if resource.accelerators is None:
146
- vcpu, _ = cloud.get_vcpus_mem_from_instance_type(
147
- resource.instance_type)
148
- assert vcpu is not None, 'vCPU must be specified'
149
- hardware = f'vCPU={int(vcpu)}'
150
- else:
151
- hardware = f'{resource.accelerators}'
152
- spot = '[Spot]' if resource.use_spot else ''
153
- return f'{cloud}({spot}{hardware})'
175
+ instance_type = common_utils.truncate_long_string(instance_type, 15)
176
+ if not is_k8s:
177
+ components.append(instance_type)
178
+ if simplify:
179
+ components.append('...')
154
180
  else:
155
- # accelerator_args is way too long.
156
- # Convert from:
157
- # GCP(n1-highmem-8, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '2.12.0'} # pylint: disable=line-too-long
158
- # to:
159
- # GCP(n1-highmem-8, {'tpu-v2-8': 1}...)
160
- pattern = ', accelerator_args={.*}'
161
- launched_resource_str = re.sub(pattern, '...', str(resource))
162
- return launched_resource_str
181
+ image_id = resource.image_id
182
+ if image_id is not None:
183
+ if None in image_id:
184
+ components.append(f'image_id={image_id[None]}')
185
+ else:
186
+ components.append(f'image_id={image_id}')
187
+ components.append(f'disk={resource.disk_size}')
188
+ disk_tier = resource.disk_tier
189
+ if disk_tier is not None:
190
+ components.append(f'disk_tier={disk_tier.value}')
191
+ ports = resource.ports
192
+ if ports is not None:
193
+ components.append(f'ports={ports}')
194
+
195
+ spot = '[spot]' if resource.use_spot else ''
196
+ return f'{spot}({"" if not components else ", ".join(components)})'
163
197
 
164
198
 
165
199
  def get_readable_resources_repr(handle: 'backends.CloudVmRayResourceHandle',
166
200
  simplify: bool = False) -> str:
167
201
  if (handle.launched_nodes is not None and
168
202
  handle.launched_resources is not None):
169
- return (f'{handle.launched_nodes}x '
203
+ return (f'{handle.launched_nodes}x'
170
204
  f'{format_resource(handle.launched_resources, simplify)}')
171
205
  return _DEFAULT_MESSAGE_HANDLE_INITIALIZING
172
206