skypilot-nightly 1.0.0.dev20250814__py3-none-any.whl → 1.0.0.dev20250815__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (127) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/nebius.py +43 -1
  3. sky/backends/backend_utils.py +6 -2
  4. sky/backends/cloud_vm_ray_backend.py +13 -4
  5. sky/client/cli/command.py +22 -8
  6. sky/client/sdk.py +50 -0
  7. sky/clouds/kubernetes.py +2 -6
  8. sky/clouds/nebius.py +3 -1
  9. sky/dashboard/out/404.html +1 -1
  10. sky/dashboard/out/_next/static/I-djf3wB8zZl_bI67BOyZ/_buildManifest.js +1 -0
  11. sky/dashboard/out/_next/static/chunks/1141-a96678fed5043c12.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/3015-77d22ae2fad4071c.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/3785.8ce85b31e5c602e9.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +21 -0
  16. sky/dashboard/out/_next/static/chunks/4509-fa63866741388427.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +10 -0
  18. sky/dashboard/out/_next/static/chunks/4725.68d5ce4d6bcb7991.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/6014.d466a44b73af8348.js +6 -0
  20. sky/dashboard/out/_next/static/chunks/{6135-85426374db04811e.js → 6135-4b4d5e824b7f9d3c.js} +1 -1
  21. sky/dashboard/out/_next/static/chunks/6633-efe924b9b8136699.js +40 -0
  22. sky/dashboard/out/_next/static/chunks/6856-58370d8c9a79f72b.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +6 -0
  25. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +18 -0
  26. sky/dashboard/out/_next/static/chunks/7557-5855617d0421ed55.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/8310.4ae62d5937045bf3.js +31 -0
  28. sky/dashboard/out/_next/static/chunks/8838.e7953f42af2b0544.js +45 -0
  29. sky/dashboard/out/_next/static/chunks/8969-6d493b1e2fa45826.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/{1871-980a395e92633a5c.js → 9037-f71c3c42670a4be0.js} +2 -2
  31. sky/dashboard/out/_next/static/chunks/9277.71481d5b2e606e33.js +51 -0
  32. sky/dashboard/out/_next/static/chunks/pages/{_app-c2ea34fda4f1f8c8.js → _app-ce361c6959bc2001.js} +1 -1
  33. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-078751bad714c017.js → [job]-6d43d6a6bd1d4c77.js} +2 -2
  34. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-30c5954a7b1f67d7.js +16 -0
  35. sky/dashboard/out/_next/static/chunks/pages/clusters-fa94c3548b5834aa.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-13d53fffc03ccb52.js → [context]-5264c5645299cde9.js} +1 -1
  37. sky/dashboard/out/_next/static/chunks/pages/{infra-fc9222e26c8e2f0d.js → infra-83991650ae4bd083.js} +1 -1
  38. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-ad2cd5aab787bc15.js +6 -0
  39. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-664c36eda967b1ba.js → [pool]-7d4182df6625fe10.js} +2 -7
  40. sky/dashboard/out/_next/static/chunks/pages/jobs-c6a6a8a737ad7e2d.js +1 -0
  41. sky/dashboard/out/_next/static/chunks/pages/users-d112a9b3d854abb2.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/pages/volumes-b87fec189298a0c0.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-f72f73bcef9541dc.js → [name]-8a86ca4c98812df9.js} +1 -1
  44. sky/dashboard/out/_next/static/chunks/pages/workspaces-74ef46fc370f7c71.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/webpack-aba778a6d6eb496d.js +1 -0
  46. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  47. sky/dashboard/out/clusters/[cluster].html +1 -1
  48. sky/dashboard/out/clusters.html +1 -1
  49. sky/dashboard/out/config.html +1 -1
  50. sky/dashboard/out/index.html +1 -1
  51. sky/dashboard/out/infra/[context].html +1 -1
  52. sky/dashboard/out/infra.html +1 -1
  53. sky/dashboard/out/jobs/[job].html +1 -1
  54. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  55. sky/dashboard/out/jobs.html +1 -1
  56. sky/dashboard/out/users.html +1 -1
  57. sky/dashboard/out/volumes.html +1 -1
  58. sky/dashboard/out/workspace/new.html +1 -1
  59. sky/dashboard/out/workspaces/[name].html +1 -1
  60. sky/dashboard/out/workspaces.html +1 -1
  61. sky/execution.py +13 -10
  62. sky/global_user_state.py +128 -1
  63. sky/jobs/constants.py +1 -1
  64. sky/jobs/scheduler.py +14 -21
  65. sky/jobs/server/core.py +64 -10
  66. sky/jobs/server/utils.py +1 -1
  67. sky/jobs/state.py +1 -3
  68. sky/jobs/utils.py +159 -8
  69. sky/provision/aws/config.py +19 -3
  70. sky/provision/aws/instance.py +2 -1
  71. sky/provision/nebius/utils.py +101 -86
  72. sky/provision/provisioner.py +13 -8
  73. sky/resources.py +5 -5
  74. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  75. sky/serve/replica_managers.py +123 -101
  76. sky/serve/serve_state.py +32 -0
  77. sky/serve/serve_utils.py +37 -16
  78. sky/serve/service.py +51 -17
  79. sky/server/constants.py +1 -1
  80. sky/server/requests/payloads.py +6 -0
  81. sky/server/requests/serializers/decoders.py +12 -2
  82. sky/server/requests/serializers/encoders.py +10 -2
  83. sky/server/server.py +44 -2
  84. sky/templates/kubernetes-ray.yml.j2 +1 -0
  85. sky/utils/common_utils.py +20 -0
  86. sky/utils/controller_utils.py +17 -4
  87. sky/utils/db/migration_utils.py +1 -1
  88. sky/utils/log_utils.py +14 -5
  89. sky/utils/resources_utils.py +25 -1
  90. sky/utils/schemas.py +3 -0
  91. sky/utils/ux_utils.py +36 -5
  92. {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/METADATA +1 -1
  93. {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/RECORD +99 -98
  94. sky/dashboard/out/_next/static/Y0eNlwi85qGRecLTin11y/_buildManifest.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/1141-a8a8f1adba34c892.js +0 -11
  96. sky/dashboard/out/_next/static/chunks/1559-6c00e20454194859.js +0 -30
  97. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
  98. sky/dashboard/out/_next/static/chunks/2641.142718b6b78a6f9b.js +0 -1
  99. sky/dashboard/out/_next/static/chunks/3785.6003d293cb83eab4.js +0 -1
  100. sky/dashboard/out/_next/static/chunks/4725.29550342bd53afd8.js +0 -1
  101. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
  102. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
  103. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +0 -1
  104. sky/dashboard/out/_next/static/chunks/691.5eeedf82cc243343.js +0 -55
  105. sky/dashboard/out/_next/static/chunks/6990-0f886f16e0d55ff8.js +0 -1
  106. sky/dashboard/out/_next/static/chunks/8056-5bdeda81199c0def.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/8252.62b0d23aed618bb2.js +0 -16
  108. sky/dashboard/out/_next/static/chunks/8969-c9686994ddafcf01.js +0 -1
  109. sky/dashboard/out/_next/static/chunks/9159-11421c0f2909236f.js +0 -1
  110. sky/dashboard/out/_next/static/chunks/9360.85b0b1b4054574dd.js +0 -31
  111. sky/dashboard/out/_next/static/chunks/9666.cd4273f2a5c5802c.js +0 -1
  112. sky/dashboard/out/_next/static/chunks/9847.757720f3b40c0aa5.js +0 -30
  113. sky/dashboard/out/_next/static/chunks/9984.c5564679e467d245.js +0 -1
  114. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-da9cc0901349c2e9.js +0 -1
  115. sky/dashboard/out/_next/static/chunks/pages/clusters-b30460f683e6ba96.js +0 -1
  116. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-154f55cf8af55be5.js +0 -11
  117. sky/dashboard/out/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js +0 -1
  118. sky/dashboard/out/_next/static/chunks/pages/users-7ed36e44e779d5c7.js +0 -1
  119. sky/dashboard/out/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js +0 -1
  120. sky/dashboard/out/_next/static/chunks/pages/workspaces-8f67be60165724cc.js +0 -1
  121. sky/dashboard/out/_next/static/chunks/webpack-00c0a51d21157453.js +0 -1
  122. /sky/dashboard/out/_next/static/{Y0eNlwi85qGRecLTin11y → I-djf3wB8zZl_bI67BOyZ}/_ssgManifest.js +0 -0
  123. /sky/dashboard/out/_next/static/chunks/{6989-37611fe6b86d274d.js → 6989-01359c57e018caa4.js} +0 -0
  124. {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/WHEEL +0 -0
  125. {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/entry_points.txt +0 -0
  126. {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/licenses/LICENSE +0 -0
  127. {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/top_level.txt +0 -0
@@ -102,8 +102,18 @@ def decode_queue(return_value: List[dict],) -> List[Dict[str, Any]]:
102
102
 
103
103
 
104
104
  @register_decoders('jobs.queue')
105
- def decode_jobs_queue(return_value: List[dict],) -> List[Dict[str, Any]]:
106
- jobs = return_value
105
+ def decode_jobs_queue(return_value):
106
+ """Decode jobs queue response.
107
+
108
+ Supports legacy list, or a dict {jobs, total}.
109
+ - Returns list[job]
110
+ """
111
+ # Case 1: dict shape {jobs, total}
112
+ if isinstance(return_value, dict) and 'jobs' in return_value:
113
+ jobs = return_value.get('jobs', [])
114
+ else:
115
+ # Case 2: legacy list
116
+ jobs = return_value
107
117
  for job in jobs:
108
118
  job['status'] = managed_jobs.ManagedJobStatus(job['status'])
109
119
  return jobs
@@ -106,10 +106,18 @@ def encode_status_kubernetes(
106
106
 
107
107
 
108
108
  @register_encoder('jobs.queue')
109
- def encode_jobs_queue(jobs: List[dict],) -> List[Dict[str, Any]]:
109
+ def encode_jobs_queue(jobs_or_tuple):
110
+ # Support returning either a plain jobs list or a (jobs, total) tuple
111
+ if isinstance(jobs_or_tuple, tuple) and len(jobs_or_tuple) == 2:
112
+ jobs, total = jobs_or_tuple
113
+ else:
114
+ jobs = jobs_or_tuple
115
+ total = None
110
116
  for job in jobs:
111
117
  job['status'] = job['status'].value
112
- return jobs
118
+ if total is None:
119
+ return jobs
120
+ return {'jobs': jobs, 'total': total}
113
121
 
114
122
 
115
123
  def _encode_serve_status(
sky/server/server.py CHANGED
@@ -792,8 +792,6 @@ async def validate(validate_body: payloads.ValidateBody) -> None:
792
792
  ctx.override_envs(validate_body.env_vars)
793
793
 
794
794
  def validate_dag(dag: dag_utils.dag_lib.Dag):
795
- # Resolve the volumes before admin policy and validation.
796
- dag.resolve_and_validate_volumes()
797
795
  # TODO: Admin policy may contain arbitrary code, which may be expensive
798
796
  # to run and may block the server thread. However, moving it into the
799
797
  # executor adds a ~150ms penalty on the local API server because of
@@ -802,6 +800,7 @@ async def validate(validate_body: payloads.ValidateBody) -> None:
802
800
  with admin_policy_utils.apply_and_use_config_in_current_request(
803
801
  dag,
804
802
  request_options=validate_body.get_request_options()) as dag:
803
+ dag.resolve_and_validate_volumes()
805
804
  # Skip validating workdir and file_mounts, as those need to be
806
805
  # validated after the files are uploaded to the SkyPilot API server
807
806
  # with `upload_mounts_to_api_server`.
@@ -1284,6 +1283,46 @@ async def download(download_body: payloads.DownloadBody) -> None:
1284
1283
  detail=f'Error creating zip file: {str(e)}')
1285
1284
 
1286
1285
 
1286
+ @app.post('/provision_logs')
1287
+ async def provision_logs(cluster_body: payloads.ClusterNameBody,
1288
+ follow: bool = True,
1289
+ tail: int = 0) -> fastapi.responses.StreamingResponse:
1290
+ """Streams the provision.log for the latest launch request of a cluster."""
1291
+ # Prefer clusters table first, then cluster_history as fallback.
1292
+ log_path_str = global_user_state.get_cluster_provision_log_path(
1293
+ cluster_body.cluster_name)
1294
+ if not log_path_str:
1295
+ log_path_str = global_user_state.get_cluster_history_provision_log_path(
1296
+ cluster_body.cluster_name)
1297
+ if not log_path_str:
1298
+ raise fastapi.HTTPException(
1299
+ status_code=404,
1300
+ detail=('Provision log path is not recorded for this cluster. '
1301
+ 'Please relaunch to generate provisioning logs.'))
1302
+
1303
+ log_path = pathlib.Path(log_path_str).expanduser().resolve()
1304
+ if not log_path.exists():
1305
+ raise fastapi.HTTPException(
1306
+ status_code=404,
1307
+ detail=f'Provision log path does not exist: {str(log_path)}')
1308
+
1309
+ # Tail semantics: 0 means print all lines. Convert 0 -> None for streamer.
1310
+ effective_tail = None if tail is None or tail <= 0 else tail
1311
+
1312
+ return fastapi.responses.StreamingResponse(
1313
+ content=stream_utils.log_streamer(None,
1314
+ log_path,
1315
+ tail=effective_tail,
1316
+ follow=follow),
1317
+ media_type='text/plain',
1318
+ headers={
1319
+ 'Cache-Control': 'no-cache, no-transform',
1320
+ 'X-Accel-Buffering': 'no',
1321
+ 'Transfer-Encoding': 'chunked',
1322
+ },
1323
+ )
1324
+
1325
+
1287
1326
  @app.post('/cost_report')
1288
1327
  async def cost_report(request: fastapi.Request,
1289
1328
  cost_report_body: payloads.CostReportBody) -> None:
@@ -1815,6 +1854,9 @@ if __name__ == '__main__':
1815
1854
  global_tasks.append(background.create_task(metrics_server.serve()))
1816
1855
  global_tasks.append(
1817
1856
  background.create_task(requests_lib.requests_gc_daemon()))
1857
+ global_tasks.append(
1858
+ background.create_task(
1859
+ global_user_state.cluster_event_retention_daemon()))
1818
1860
  threading.Thread(target=background.run_forever, daemon=True).start()
1819
1861
 
1820
1862
  queue_server, workers = executor.start(config)
@@ -378,6 +378,7 @@ available_node_types:
378
378
  {% if volume_mounts %}
379
379
  securityContext:
380
380
  fsGroup: 1000
381
+ fsGroupChangePolicy: OnRootMismatch
381
382
  {% endif %}
382
383
 
383
384
  # Add node selector if GPU/TPUs are requested:
sky/utils/common_utils.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Utils shared between all of sky"""
2
2
 
3
3
  import difflib
4
+ import enum
4
5
  import functools
5
6
  import getpass
6
7
  import hashlib
@@ -55,6 +56,25 @@ _VALID_ENV_VAR_REGEX = '[a-zA-Z_][a-zA-Z0-9_]*'
55
56
  logger = sky_logging.init_logger(__name__)
56
57
 
57
58
 
59
+ class ProcessStatus(enum.Enum):
60
+ """Process status."""
61
+
62
+ # The process is scheduled to run, but not started yet.
63
+ SCHEDULED = 'SCHEDULED'
64
+
65
+ # The process is running
66
+ RUNNING = 'RUNNING'
67
+
68
+ # The process is finished and succeeded
69
+ SUCCEEDED = 'SUCCEEDED'
70
+
71
+ # The process is interrupted
72
+ INTERRUPTED = 'INTERRUPTED'
73
+
74
+ # The process failed
75
+ FAILED = 'FAILED'
76
+
77
+
58
78
  @annotations.lru_cache(scope='request')
59
79
  def get_usage_run_id() -> str:
60
80
  """Returns a unique run id for each 'run'.
@@ -1224,13 +1224,26 @@ def _get_launch_parallelism() -> int:
1224
1224
 
1225
1225
 
1226
1226
  def can_provision() -> bool:
1227
- num_provision = (
1228
- serve_state.total_number_provisioning_replicas() * SERVE_LAUNCH_RATIO +
1229
- managed_job_state.get_num_launching_jobs())
1230
- return num_provision < _get_launch_parallelism()
1227
+ # We always prioritize terminating over provisioning, to save the cost on
1228
+ # idle resources.
1229
+ if serve_state.total_number_scheduled_to_terminate_replicas() > 0:
1230
+ return False
1231
+ return can_terminate()
1231
1232
 
1232
1233
 
1233
1234
  def can_start_new_process() -> bool:
1234
1235
  num_procs = (serve_state.get_num_services() * SERVE_PROC_RATIO +
1235
1236
  managed_job_state.get_num_alive_jobs())
1236
1237
  return num_procs < _get_job_parallelism()
1238
+
1239
+
1240
+ # We limit the number of terminating replicas to the number of CPUs. This is
1241
+ # just a temporary solution to avoid overwhelming the controller. After one job
1242
+ # controller PR, we should use API server to handle resources management.
1243
+ def can_terminate() -> bool:
1244
+ num_terminating = (
1245
+ serve_state.total_number_provisioning_replicas() * SERVE_LAUNCH_RATIO +
1246
+ # Each terminate process will take roughly the same CPUs as job launch.
1247
+ serve_state.total_number_terminating_replicas() +
1248
+ managed_job_state.get_num_launching_jobs())
1249
+ return num_terminating < _get_launch_parallelism()
@@ -19,7 +19,7 @@ logger = sky_logging.init_logger(__name__)
19
19
  DB_INIT_LOCK_TIMEOUT_SECONDS = 10
20
20
 
21
21
  GLOBAL_USER_STATE_DB_NAME = 'state_db'
22
- GLOBAL_USER_STATE_VERSION = '005'
22
+ GLOBAL_USER_STATE_VERSION = '006'
23
23
  GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
24
24
 
25
25
  SPOT_JOBS_DB_NAME = 'spot_jobs_db'
sky/utils/log_utils.py CHANGED
@@ -47,13 +47,16 @@ class RayUpLineProcessor(LineProcessor):
47
47
  RUNTIME_SETUP = 1
48
48
  PULLING_DOCKER_IMAGES = 2
49
49
 
50
- def __init__(self, log_path: str):
50
+ def __init__(self, log_path: str, cluster_name: Optional[str] = None):
51
51
  self.log_path = log_path
52
+ self.cluster_name = cluster_name
52
53
 
53
54
  def __enter__(self) -> None:
54
55
  self.state = self.ProvisionStatus.LAUNCH
55
56
  self.status_display = rich_utils.safe_status(
56
- ux_utils.spinner_message('Launching', self.log_path))
57
+ ux_utils.spinner_message('Launching',
58
+ self.log_path,
59
+ cluster_name=self.cluster_name))
57
60
  self.status_display.start()
58
61
 
59
62
  def process_line(self, log_line: str) -> None:
@@ -62,19 +65,25 @@ class RayUpLineProcessor(LineProcessor):
62
65
  logger.info(' Head VM is up.')
63
66
  self.status_display.update(
64
67
  ux_utils.spinner_message(
65
- 'Launching - Preparing SkyPilot runtime', self.log_path))
68
+ 'Launching - Preparing SkyPilot runtime',
69
+ self.log_path,
70
+ cluster_name=self.cluster_name))
66
71
  self.state = self.ProvisionStatus.RUNTIME_SETUP
67
72
  if ('Pulling from' in log_line and
68
73
  self.state == self.ProvisionStatus.RUNTIME_SETUP):
69
74
  self.status_display.update(
70
75
  ux_utils.spinner_message(
71
- 'Launching - Initializing docker container', self.log_path))
76
+ 'Launching - Initializing docker container',
77
+ self.log_path,
78
+ cluster_name=self.cluster_name))
72
79
  self.state = self.ProvisionStatus.PULLING_DOCKER_IMAGES
73
80
  if ('Status: Downloaded newer image' in log_line and
74
81
  self.state == self.ProvisionStatus.PULLING_DOCKER_IMAGES):
75
82
  self.status_display.update(
76
83
  ux_utils.spinner_message(
77
- 'Launching - Preparing SkyPilot runtime', self.log_path))
84
+ 'Launching - Preparing SkyPilot runtime',
85
+ self.log_path,
86
+ cluster_name=self.cluster_name))
78
87
  self.state = self.ProvisionStatus.RUNTIME_SETUP
79
88
 
80
89
  def __exit__(self, except_type: Optional[Type[BaseException]],
@@ -5,7 +5,7 @@ import itertools
5
5
  import json
6
6
  import math
7
7
  import typing
8
- from typing import Dict, List, Optional, Set, Union
8
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union
9
9
 
10
10
  from sky import skypilot_config
11
11
  from sky.skylet import constants
@@ -435,3 +435,27 @@ def parse_time_minutes(time: str) -> int:
435
435
  continue
436
436
 
437
437
  raise ValueError(f'Invalid time format: {time}')
438
+
439
+
440
+ def normalize_any_of_resources_config(
441
+ any_of: List[Dict[str, Any]]) -> Tuple[str, ...]:
442
+ """Normalize a list of any_of resources config to a canonical form.
443
+
444
+ Args:
445
+ any_of: A list of any_of resources config.
446
+
447
+ Returns:
448
+ A normalized tuple representation that can be compared for equality.
449
+ Two lists with the same resource configurations in different orders
450
+ will produce the same normalized result.
451
+ """
452
+ if not any_of:
453
+ return tuple()
454
+
455
+ # Convert each config to JSON string with sorted keys, then sort the list
456
+ normalized_configs = [
457
+ json.dumps(config, sort_keys=True, separators=(',', ':'))
458
+ for config in any_of
459
+ ]
460
+
461
+ return tuple(sorted(normalized_configs))
sky/utils/schemas.py CHANGED
@@ -1535,6 +1535,9 @@ def get_config_schema():
1535
1535
  'requests_retention_hours': {
1536
1536
  'type': 'integer',
1537
1537
  },
1538
+ 'cluster_event_retention_hours': {
1539
+ 'type': 'number',
1540
+ },
1538
1541
  }
1539
1542
  }
1540
1543
 
sky/utils/ux_utils.py CHANGED
@@ -26,9 +26,16 @@ BOLD = '\033[1m'
26
26
  RESET_BOLD = '\033[0m'
27
27
 
28
28
  # Log path hint in the spinner during launching
29
+ # (old, kept for backward compatibility)
29
30
  _LOG_PATH_HINT = (f'{colorama.Style.DIM}View logs: sky api logs -l '
30
31
  '{log_path}'
31
32
  f'{colorama.Style.RESET_ALL}')
33
+ # Log hint: recommend sky logs --provision <cluster_name>
34
+ _PROVISION_LOG_HINT = (
35
+ f'{colorama.Style.DIM}View logs: '
36
+ f'{BOLD}sky logs --provision {{cluster_name}}{RESET_BOLD}'
37
+ f'{colorama.Style.RESET_ALL}')
38
+ # Legacy path hint retained for local-only cases where we don't have cluster
32
39
  _LOG_PATH_HINT_LOCAL = (f'{colorama.Style.DIM}View logs: '
33
40
  '{log_path}'
34
41
  f'{colorama.Style.RESET_ALL}')
@@ -126,7 +133,10 @@ class RedirectOutputForProcess:
126
133
 
127
134
  def log_path_hint(log_path: Union[str, 'pathlib.Path'],
128
135
  is_local: bool = False) -> str:
129
- """Gets the log path hint for the given log path."""
136
+ """Gets the log path hint for the given log path.
137
+
138
+ Kept for backward compatibility when only paths are available.
139
+ """
130
140
  log_path = str(log_path)
131
141
  expanded_home = os.path.expanduser('~')
132
142
  if log_path.startswith(expanded_home):
@@ -139,6 +149,12 @@ def log_path_hint(log_path: Union[str, 'pathlib.Path'],
139
149
  return _LOG_PATH_HINT.format(log_path=log_path)
140
150
 
141
151
 
152
+ def provision_hint(cluster_name: Optional[str]) -> Optional[str]:
153
+ if not cluster_name:
154
+ return None
155
+ return _PROVISION_LOG_HINT.format(cluster_name=cluster_name)
156
+
157
+
142
158
  def starting_message(message: str) -> str:
143
159
  """Gets the starting message for the given message."""
144
160
  # We have to reset the color before the message, because sometimes if a
@@ -150,7 +166,8 @@ def starting_message(message: str) -> str:
150
166
  def finishing_message(message: str,
151
167
  log_path: Optional[Union[str, 'pathlib.Path']] = None,
152
168
  is_local: bool = False,
153
- follow_up_message: Optional[str] = None) -> str:
169
+ follow_up_message: Optional[str] = None,
170
+ cluster_name: Optional[str] = None) -> str:
154
171
  """Gets the finishing message for the given message.
155
172
 
156
173
  Args:
@@ -168,6 +185,9 @@ def finishing_message(message: str,
168
185
  success_prefix = (f'{colorama.Style.RESET_ALL}{colorama.Fore.GREEN}✓ '
169
186
  f'{message}{colorama.Style.RESET_ALL}{follow_up_message}'
170
187
  f'{colorama.Style.RESET_ALL}')
188
+ hint = provision_hint(cluster_name)
189
+ if hint:
190
+ return f'{success_prefix} {hint}'
171
191
  if log_path is None:
172
192
  return success_prefix
173
193
  path_hint = log_path_hint(log_path, is_local)
@@ -176,13 +196,17 @@ def finishing_message(message: str,
176
196
 
177
197
  def error_message(message: str,
178
198
  log_path: Optional[Union[str, 'pathlib.Path']] = None,
179
- is_local: bool = False) -> str:
199
+ is_local: bool = False,
200
+ cluster_name: Optional[str] = None) -> str:
180
201
  """Gets the error message for the given message."""
181
202
  # We have to reset the color before the message, because sometimes if a
182
203
  # previous spinner with dimmed color overflows in a narrow terminal, the
183
204
  # color might be messed up.
184
205
  error_prefix = (f'{colorama.Style.RESET_ALL}{colorama.Fore.RED}⨯'
185
206
  f'{colorama.Style.RESET_ALL} {message}')
207
+ hint = provision_hint(cluster_name)
208
+ if hint:
209
+ return f'{error_prefix} {hint}'
186
210
  if log_path is None:
187
211
  return error_prefix
188
212
  path_hint = log_path_hint(log_path, is_local)
@@ -200,9 +224,16 @@ def retry_message(message: str) -> str:
200
224
 
201
225
  def spinner_message(message: str,
202
226
  log_path: Optional[Union[str, 'pathlib.Path']] = None,
203
- is_local: bool = False) -> str:
204
- """Gets the spinner message for the given message and log path."""
227
+ is_local: bool = False,
228
+ cluster_name: Optional[str] = None) -> str:
229
+ """Gets the spinner message for the given message and log path.
230
+
231
+ If cluster_name is provided, recommend `sky logs --provision <cluster>`.
232
+ """
205
233
  colored_spinner = f'[bold cyan]{message}[/]'
234
+ hint = provision_hint(cluster_name)
235
+ if hint:
236
+ return f'{colored_spinner} {hint}'
206
237
  if log_path is None:
207
238
  return colored_spinner
208
239
  path_hint = log_path_hint(log_path, is_local)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250814
3
+ Version: 1.0.0.dev20250815
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0