skypilot-nightly 1.0.0.dev20250814__py3-none-any.whl → 1.0.0.dev20250815__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/adaptors/nebius.py +43 -1
- sky/backends/backend_utils.py +6 -2
- sky/backends/cloud_vm_ray_backend.py +13 -4
- sky/client/cli/command.py +22 -8
- sky/client/sdk.py +50 -0
- sky/clouds/kubernetes.py +2 -6
- sky/clouds/nebius.py +3 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/I-djf3wB8zZl_bI67BOyZ/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-a96678fed5043c12.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/3015-77d22ae2fad4071c.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.8ce85b31e5c602e9.js +1 -0
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +21 -0
- sky/dashboard/out/_next/static/chunks/4509-fa63866741388427.js +1 -0
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +10 -0
- sky/dashboard/out/_next/static/chunks/4725.68d5ce4d6bcb7991.js +1 -0
- sky/dashboard/out/_next/static/chunks/6014.d466a44b73af8348.js +6 -0
- sky/dashboard/out/_next/static/chunks/{6135-85426374db04811e.js → 6135-4b4d5e824b7f9d3c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6633-efe924b9b8136699.js +40 -0
- sky/dashboard/out/_next/static/chunks/6856-58370d8c9a79f72b.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +1 -0
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +6 -0
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +18 -0
- sky/dashboard/out/_next/static/chunks/7557-5855617d0421ed55.js +1 -0
- sky/dashboard/out/_next/static/chunks/8310.4ae62d5937045bf3.js +31 -0
- sky/dashboard/out/_next/static/chunks/8838.e7953f42af2b0544.js +45 -0
- sky/dashboard/out/_next/static/chunks/8969-6d493b1e2fa45826.js +1 -0
- sky/dashboard/out/_next/static/chunks/{1871-980a395e92633a5c.js → 9037-f71c3c42670a4be0.js} +2 -2
- sky/dashboard/out/_next/static/chunks/9277.71481d5b2e606e33.js +51 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-c2ea34fda4f1f8c8.js → _app-ce361c6959bc2001.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-078751bad714c017.js → [job]-6d43d6a6bd1d4c77.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-30c5954a7b1f67d7.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-fa94c3548b5834aa.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-13d53fffc03ccb52.js → [context]-5264c5645299cde9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-fc9222e26c8e2f0d.js → infra-83991650ae4bd083.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-ad2cd5aab787bc15.js +6 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-664c36eda967b1ba.js → [pool]-7d4182df6625fe10.js} +2 -7
- sky/dashboard/out/_next/static/chunks/pages/jobs-c6a6a8a737ad7e2d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-d112a9b3d854abb2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b87fec189298a0c0.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-f72f73bcef9541dc.js → [name]-8a86ca4c98812df9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-74ef46fc370f7c71.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-aba778a6d6eb496d.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/execution.py +13 -10
- sky/global_user_state.py +128 -1
- sky/jobs/constants.py +1 -1
- sky/jobs/scheduler.py +14 -21
- sky/jobs/server/core.py +64 -10
- sky/jobs/server/utils.py +1 -1
- sky/jobs/state.py +1 -3
- sky/jobs/utils.py +159 -8
- sky/provision/aws/config.py +19 -3
- sky/provision/aws/instance.py +2 -1
- sky/provision/nebius/utils.py +101 -86
- sky/provision/provisioner.py +13 -8
- sky/resources.py +5 -5
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/serve/replica_managers.py +123 -101
- sky/serve/serve_state.py +32 -0
- sky/serve/serve_utils.py +37 -16
- sky/serve/service.py +51 -17
- sky/server/constants.py +1 -1
- sky/server/requests/payloads.py +6 -0
- sky/server/requests/serializers/decoders.py +12 -2
- sky/server/requests/serializers/encoders.py +10 -2
- sky/server/server.py +44 -2
- sky/templates/kubernetes-ray.yml.j2 +1 -0
- sky/utils/common_utils.py +20 -0
- sky/utils/controller_utils.py +17 -4
- sky/utils/db/migration_utils.py +1 -1
- sky/utils/log_utils.py +14 -5
- sky/utils/resources_utils.py +25 -1
- sky/utils/schemas.py +3 -0
- sky/utils/ux_utils.py +36 -5
- {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/RECORD +99 -98
- sky/dashboard/out/_next/static/Y0eNlwi85qGRecLTin11y/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-a8a8f1adba34c892.js +0 -11
- sky/dashboard/out/_next/static/chunks/1559-6c00e20454194859.js +0 -30
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
- sky/dashboard/out/_next/static/chunks/2641.142718b6b78a6f9b.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.6003d293cb83eab4.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.29550342bd53afd8.js +0 -1
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +0 -1
- sky/dashboard/out/_next/static/chunks/691.5eeedf82cc243343.js +0 -55
- sky/dashboard/out/_next/static/chunks/6990-0f886f16e0d55ff8.js +0 -1
- sky/dashboard/out/_next/static/chunks/8056-5bdeda81199c0def.js +0 -1
- sky/dashboard/out/_next/static/chunks/8252.62b0d23aed618bb2.js +0 -16
- sky/dashboard/out/_next/static/chunks/8969-c9686994ddafcf01.js +0 -1
- sky/dashboard/out/_next/static/chunks/9159-11421c0f2909236f.js +0 -1
- sky/dashboard/out/_next/static/chunks/9360.85b0b1b4054574dd.js +0 -31
- sky/dashboard/out/_next/static/chunks/9666.cd4273f2a5c5802c.js +0 -1
- sky/dashboard/out/_next/static/chunks/9847.757720f3b40c0aa5.js +0 -30
- sky/dashboard/out/_next/static/chunks/9984.c5564679e467d245.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-da9cc0901349c2e9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-b30460f683e6ba96.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-154f55cf8af55be5.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-7ed36e44e779d5c7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-8f67be60165724cc.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-00c0a51d21157453.js +0 -1
- /sky/dashboard/out/_next/static/{Y0eNlwi85qGRecLTin11y → I-djf3wB8zZl_bI67BOyZ}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{6989-37611fe6b86d274d.js → 6989-01359c57e018caa4.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250814.dist-info → skypilot_nightly-1.0.0.dev20250815.dist-info}/top_level.txt +0 -0
|
@@ -102,8 +102,18 @@ def decode_queue(return_value: List[dict],) -> List[Dict[str, Any]]:
|
|
|
102
102
|
|
|
103
103
|
|
|
104
104
|
@register_decoders('jobs.queue')
|
|
105
|
-
def decode_jobs_queue(return_value
|
|
106
|
-
jobs
|
|
105
|
+
def decode_jobs_queue(return_value):
|
|
106
|
+
"""Decode jobs queue response.
|
|
107
|
+
|
|
108
|
+
Supports legacy list, or a dict {jobs, total}.
|
|
109
|
+
- Returns list[job]
|
|
110
|
+
"""
|
|
111
|
+
# Case 1: dict shape {jobs, total}
|
|
112
|
+
if isinstance(return_value, dict) and 'jobs' in return_value:
|
|
113
|
+
jobs = return_value.get('jobs', [])
|
|
114
|
+
else:
|
|
115
|
+
# Case 2: legacy list
|
|
116
|
+
jobs = return_value
|
|
107
117
|
for job in jobs:
|
|
108
118
|
job['status'] = managed_jobs.ManagedJobStatus(job['status'])
|
|
109
119
|
return jobs
|
|
@@ -106,10 +106,18 @@ def encode_status_kubernetes(
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
@register_encoder('jobs.queue')
|
|
109
|
-
def encode_jobs_queue(
|
|
109
|
+
def encode_jobs_queue(jobs_or_tuple):
|
|
110
|
+
# Support returning either a plain jobs list or a (jobs, total) tuple
|
|
111
|
+
if isinstance(jobs_or_tuple, tuple) and len(jobs_or_tuple) == 2:
|
|
112
|
+
jobs, total = jobs_or_tuple
|
|
113
|
+
else:
|
|
114
|
+
jobs = jobs_or_tuple
|
|
115
|
+
total = None
|
|
110
116
|
for job in jobs:
|
|
111
117
|
job['status'] = job['status'].value
|
|
112
|
-
|
|
118
|
+
if total is None:
|
|
119
|
+
return jobs
|
|
120
|
+
return {'jobs': jobs, 'total': total}
|
|
113
121
|
|
|
114
122
|
|
|
115
123
|
def _encode_serve_status(
|
sky/server/server.py
CHANGED
|
@@ -792,8 +792,6 @@ async def validate(validate_body: payloads.ValidateBody) -> None:
|
|
|
792
792
|
ctx.override_envs(validate_body.env_vars)
|
|
793
793
|
|
|
794
794
|
def validate_dag(dag: dag_utils.dag_lib.Dag):
|
|
795
|
-
# Resolve the volumes before admin policy and validation.
|
|
796
|
-
dag.resolve_and_validate_volumes()
|
|
797
795
|
# TODO: Admin policy may contain arbitrary code, which may be expensive
|
|
798
796
|
# to run and may block the server thread. However, moving it into the
|
|
799
797
|
# executor adds a ~150ms penalty on the local API server because of
|
|
@@ -802,6 +800,7 @@ async def validate(validate_body: payloads.ValidateBody) -> None:
|
|
|
802
800
|
with admin_policy_utils.apply_and_use_config_in_current_request(
|
|
803
801
|
dag,
|
|
804
802
|
request_options=validate_body.get_request_options()) as dag:
|
|
803
|
+
dag.resolve_and_validate_volumes()
|
|
805
804
|
# Skip validating workdir and file_mounts, as those need to be
|
|
806
805
|
# validated after the files are uploaded to the SkyPilot API server
|
|
807
806
|
# with `upload_mounts_to_api_server`.
|
|
@@ -1284,6 +1283,46 @@ async def download(download_body: payloads.DownloadBody) -> None:
|
|
|
1284
1283
|
detail=f'Error creating zip file: {str(e)}')
|
|
1285
1284
|
|
|
1286
1285
|
|
|
1286
|
+
@app.post('/provision_logs')
|
|
1287
|
+
async def provision_logs(cluster_body: payloads.ClusterNameBody,
|
|
1288
|
+
follow: bool = True,
|
|
1289
|
+
tail: int = 0) -> fastapi.responses.StreamingResponse:
|
|
1290
|
+
"""Streams the provision.log for the latest launch request of a cluster."""
|
|
1291
|
+
# Prefer clusters table first, then cluster_history as fallback.
|
|
1292
|
+
log_path_str = global_user_state.get_cluster_provision_log_path(
|
|
1293
|
+
cluster_body.cluster_name)
|
|
1294
|
+
if not log_path_str:
|
|
1295
|
+
log_path_str = global_user_state.get_cluster_history_provision_log_path(
|
|
1296
|
+
cluster_body.cluster_name)
|
|
1297
|
+
if not log_path_str:
|
|
1298
|
+
raise fastapi.HTTPException(
|
|
1299
|
+
status_code=404,
|
|
1300
|
+
detail=('Provision log path is not recorded for this cluster. '
|
|
1301
|
+
'Please relaunch to generate provisioning logs.'))
|
|
1302
|
+
|
|
1303
|
+
log_path = pathlib.Path(log_path_str).expanduser().resolve()
|
|
1304
|
+
if not log_path.exists():
|
|
1305
|
+
raise fastapi.HTTPException(
|
|
1306
|
+
status_code=404,
|
|
1307
|
+
detail=f'Provision log path does not exist: {str(log_path)}')
|
|
1308
|
+
|
|
1309
|
+
# Tail semantics: 0 means print all lines. Convert 0 -> None for streamer.
|
|
1310
|
+
effective_tail = None if tail is None or tail <= 0 else tail
|
|
1311
|
+
|
|
1312
|
+
return fastapi.responses.StreamingResponse(
|
|
1313
|
+
content=stream_utils.log_streamer(None,
|
|
1314
|
+
log_path,
|
|
1315
|
+
tail=effective_tail,
|
|
1316
|
+
follow=follow),
|
|
1317
|
+
media_type='text/plain',
|
|
1318
|
+
headers={
|
|
1319
|
+
'Cache-Control': 'no-cache, no-transform',
|
|
1320
|
+
'X-Accel-Buffering': 'no',
|
|
1321
|
+
'Transfer-Encoding': 'chunked',
|
|
1322
|
+
},
|
|
1323
|
+
)
|
|
1324
|
+
|
|
1325
|
+
|
|
1287
1326
|
@app.post('/cost_report')
|
|
1288
1327
|
async def cost_report(request: fastapi.Request,
|
|
1289
1328
|
cost_report_body: payloads.CostReportBody) -> None:
|
|
@@ -1815,6 +1854,9 @@ if __name__ == '__main__':
|
|
|
1815
1854
|
global_tasks.append(background.create_task(metrics_server.serve()))
|
|
1816
1855
|
global_tasks.append(
|
|
1817
1856
|
background.create_task(requests_lib.requests_gc_daemon()))
|
|
1857
|
+
global_tasks.append(
|
|
1858
|
+
background.create_task(
|
|
1859
|
+
global_user_state.cluster_event_retention_daemon()))
|
|
1818
1860
|
threading.Thread(target=background.run_forever, daemon=True).start()
|
|
1819
1861
|
|
|
1820
1862
|
queue_server, workers = executor.start(config)
|
sky/utils/common_utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Utils shared between all of sky"""
|
|
2
2
|
|
|
3
3
|
import difflib
|
|
4
|
+
import enum
|
|
4
5
|
import functools
|
|
5
6
|
import getpass
|
|
6
7
|
import hashlib
|
|
@@ -55,6 +56,25 @@ _VALID_ENV_VAR_REGEX = '[a-zA-Z_][a-zA-Z0-9_]*'
|
|
|
55
56
|
logger = sky_logging.init_logger(__name__)
|
|
56
57
|
|
|
57
58
|
|
|
59
|
+
class ProcessStatus(enum.Enum):
|
|
60
|
+
"""Process status."""
|
|
61
|
+
|
|
62
|
+
# The process is scheduled to run, but not started yet.
|
|
63
|
+
SCHEDULED = 'SCHEDULED'
|
|
64
|
+
|
|
65
|
+
# The process is running
|
|
66
|
+
RUNNING = 'RUNNING'
|
|
67
|
+
|
|
68
|
+
# The process is finished and succeeded
|
|
69
|
+
SUCCEEDED = 'SUCCEEDED'
|
|
70
|
+
|
|
71
|
+
# The process is interrupted
|
|
72
|
+
INTERRUPTED = 'INTERRUPTED'
|
|
73
|
+
|
|
74
|
+
# The process failed
|
|
75
|
+
FAILED = 'FAILED'
|
|
76
|
+
|
|
77
|
+
|
|
58
78
|
@annotations.lru_cache(scope='request')
|
|
59
79
|
def get_usage_run_id() -> str:
|
|
60
80
|
"""Returns a unique run id for each 'run'.
|
sky/utils/controller_utils.py
CHANGED
|
@@ -1224,13 +1224,26 @@ def _get_launch_parallelism() -> int:
|
|
|
1224
1224
|
|
|
1225
1225
|
|
|
1226
1226
|
def can_provision() -> bool:
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1227
|
+
# We always prioritize terminating over provisioning, to save the cost on
|
|
1228
|
+
# idle resources.
|
|
1229
|
+
if serve_state.total_number_scheduled_to_terminate_replicas() > 0:
|
|
1230
|
+
return False
|
|
1231
|
+
return can_terminate()
|
|
1231
1232
|
|
|
1232
1233
|
|
|
1233
1234
|
def can_start_new_process() -> bool:
|
|
1234
1235
|
num_procs = (serve_state.get_num_services() * SERVE_PROC_RATIO +
|
|
1235
1236
|
managed_job_state.get_num_alive_jobs())
|
|
1236
1237
|
return num_procs < _get_job_parallelism()
|
|
1238
|
+
|
|
1239
|
+
|
|
1240
|
+
# We limit the number of terminating replicas to the number of CPUs. This is
|
|
1241
|
+
# just a temporary solution to avoid overwhelming the controller. After one job
|
|
1242
|
+
# controller PR, we should use API server to handle resources management.
|
|
1243
|
+
def can_terminate() -> bool:
|
|
1244
|
+
num_terminating = (
|
|
1245
|
+
serve_state.total_number_provisioning_replicas() * SERVE_LAUNCH_RATIO +
|
|
1246
|
+
# Each terminate process will take roughly the same CPUs as job launch.
|
|
1247
|
+
serve_state.total_number_terminating_replicas() +
|
|
1248
|
+
managed_job_state.get_num_launching_jobs())
|
|
1249
|
+
return num_terminating < _get_launch_parallelism()
|
sky/utils/db/migration_utils.py
CHANGED
|
@@ -19,7 +19,7 @@ logger = sky_logging.init_logger(__name__)
|
|
|
19
19
|
DB_INIT_LOCK_TIMEOUT_SECONDS = 10
|
|
20
20
|
|
|
21
21
|
GLOBAL_USER_STATE_DB_NAME = 'state_db'
|
|
22
|
-
GLOBAL_USER_STATE_VERSION = '
|
|
22
|
+
GLOBAL_USER_STATE_VERSION = '006'
|
|
23
23
|
GLOBAL_USER_STATE_LOCK_PATH = '~/.sky/locks/.state_db.lock'
|
|
24
24
|
|
|
25
25
|
SPOT_JOBS_DB_NAME = 'spot_jobs_db'
|
sky/utils/log_utils.py
CHANGED
|
@@ -47,13 +47,16 @@ class RayUpLineProcessor(LineProcessor):
|
|
|
47
47
|
RUNTIME_SETUP = 1
|
|
48
48
|
PULLING_DOCKER_IMAGES = 2
|
|
49
49
|
|
|
50
|
-
def __init__(self, log_path: str):
|
|
50
|
+
def __init__(self, log_path: str, cluster_name: Optional[str] = None):
|
|
51
51
|
self.log_path = log_path
|
|
52
|
+
self.cluster_name = cluster_name
|
|
52
53
|
|
|
53
54
|
def __enter__(self) -> None:
|
|
54
55
|
self.state = self.ProvisionStatus.LAUNCH
|
|
55
56
|
self.status_display = rich_utils.safe_status(
|
|
56
|
-
ux_utils.spinner_message('Launching',
|
|
57
|
+
ux_utils.spinner_message('Launching',
|
|
58
|
+
self.log_path,
|
|
59
|
+
cluster_name=self.cluster_name))
|
|
57
60
|
self.status_display.start()
|
|
58
61
|
|
|
59
62
|
def process_line(self, log_line: str) -> None:
|
|
@@ -62,19 +65,25 @@ class RayUpLineProcessor(LineProcessor):
|
|
|
62
65
|
logger.info(' Head VM is up.')
|
|
63
66
|
self.status_display.update(
|
|
64
67
|
ux_utils.spinner_message(
|
|
65
|
-
'Launching - Preparing SkyPilot runtime',
|
|
68
|
+
'Launching - Preparing SkyPilot runtime',
|
|
69
|
+
self.log_path,
|
|
70
|
+
cluster_name=self.cluster_name))
|
|
66
71
|
self.state = self.ProvisionStatus.RUNTIME_SETUP
|
|
67
72
|
if ('Pulling from' in log_line and
|
|
68
73
|
self.state == self.ProvisionStatus.RUNTIME_SETUP):
|
|
69
74
|
self.status_display.update(
|
|
70
75
|
ux_utils.spinner_message(
|
|
71
|
-
'Launching - Initializing docker container',
|
|
76
|
+
'Launching - Initializing docker container',
|
|
77
|
+
self.log_path,
|
|
78
|
+
cluster_name=self.cluster_name))
|
|
72
79
|
self.state = self.ProvisionStatus.PULLING_DOCKER_IMAGES
|
|
73
80
|
if ('Status: Downloaded newer image' in log_line and
|
|
74
81
|
self.state == self.ProvisionStatus.PULLING_DOCKER_IMAGES):
|
|
75
82
|
self.status_display.update(
|
|
76
83
|
ux_utils.spinner_message(
|
|
77
|
-
'Launching - Preparing SkyPilot runtime',
|
|
84
|
+
'Launching - Preparing SkyPilot runtime',
|
|
85
|
+
self.log_path,
|
|
86
|
+
cluster_name=self.cluster_name))
|
|
78
87
|
self.state = self.ProvisionStatus.RUNTIME_SETUP
|
|
79
88
|
|
|
80
89
|
def __exit__(self, except_type: Optional[Type[BaseException]],
|
sky/utils/resources_utils.py
CHANGED
|
@@ -5,7 +5,7 @@ import itertools
|
|
|
5
5
|
import json
|
|
6
6
|
import math
|
|
7
7
|
import typing
|
|
8
|
-
from typing import Dict, List, Optional, Set, Union
|
|
8
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
9
9
|
|
|
10
10
|
from sky import skypilot_config
|
|
11
11
|
from sky.skylet import constants
|
|
@@ -435,3 +435,27 @@ def parse_time_minutes(time: str) -> int:
|
|
|
435
435
|
continue
|
|
436
436
|
|
|
437
437
|
raise ValueError(f'Invalid time format: {time}')
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def normalize_any_of_resources_config(
|
|
441
|
+
any_of: List[Dict[str, Any]]) -> Tuple[str, ...]:
|
|
442
|
+
"""Normalize a list of any_of resources config to a canonical form.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
any_of: A list of any_of resources config.
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
A normalized tuple representation that can be compared for equality.
|
|
449
|
+
Two lists with the same resource configurations in different orders
|
|
450
|
+
will produce the same normalized result.
|
|
451
|
+
"""
|
|
452
|
+
if not any_of:
|
|
453
|
+
return tuple()
|
|
454
|
+
|
|
455
|
+
# Convert each config to JSON string with sorted keys, then sort the list
|
|
456
|
+
normalized_configs = [
|
|
457
|
+
json.dumps(config, sort_keys=True, separators=(',', ':'))
|
|
458
|
+
for config in any_of
|
|
459
|
+
]
|
|
460
|
+
|
|
461
|
+
return tuple(sorted(normalized_configs))
|
sky/utils/schemas.py
CHANGED
sky/utils/ux_utils.py
CHANGED
|
@@ -26,9 +26,16 @@ BOLD = '\033[1m'
|
|
|
26
26
|
RESET_BOLD = '\033[0m'
|
|
27
27
|
|
|
28
28
|
# Log path hint in the spinner during launching
|
|
29
|
+
# (old, kept for backward compatibility)
|
|
29
30
|
_LOG_PATH_HINT = (f'{colorama.Style.DIM}View logs: sky api logs -l '
|
|
30
31
|
'{log_path}'
|
|
31
32
|
f'{colorama.Style.RESET_ALL}')
|
|
33
|
+
# Log hint: recommend sky logs --provision <cluster_name>
|
|
34
|
+
_PROVISION_LOG_HINT = (
|
|
35
|
+
f'{colorama.Style.DIM}View logs: '
|
|
36
|
+
f'{BOLD}sky logs --provision {{cluster_name}}{RESET_BOLD}'
|
|
37
|
+
f'{colorama.Style.RESET_ALL}')
|
|
38
|
+
# Legacy path hint retained for local-only cases where we don't have cluster
|
|
32
39
|
_LOG_PATH_HINT_LOCAL = (f'{colorama.Style.DIM}View logs: '
|
|
33
40
|
'{log_path}'
|
|
34
41
|
f'{colorama.Style.RESET_ALL}')
|
|
@@ -126,7 +133,10 @@ class RedirectOutputForProcess:
|
|
|
126
133
|
|
|
127
134
|
def log_path_hint(log_path: Union[str, 'pathlib.Path'],
|
|
128
135
|
is_local: bool = False) -> str:
|
|
129
|
-
"""Gets the log path hint for the given log path.
|
|
136
|
+
"""Gets the log path hint for the given log path.
|
|
137
|
+
|
|
138
|
+
Kept for backward compatibility when only paths are available.
|
|
139
|
+
"""
|
|
130
140
|
log_path = str(log_path)
|
|
131
141
|
expanded_home = os.path.expanduser('~')
|
|
132
142
|
if log_path.startswith(expanded_home):
|
|
@@ -139,6 +149,12 @@ def log_path_hint(log_path: Union[str, 'pathlib.Path'],
|
|
|
139
149
|
return _LOG_PATH_HINT.format(log_path=log_path)
|
|
140
150
|
|
|
141
151
|
|
|
152
|
+
def provision_hint(cluster_name: Optional[str]) -> Optional[str]:
|
|
153
|
+
if not cluster_name:
|
|
154
|
+
return None
|
|
155
|
+
return _PROVISION_LOG_HINT.format(cluster_name=cluster_name)
|
|
156
|
+
|
|
157
|
+
|
|
142
158
|
def starting_message(message: str) -> str:
|
|
143
159
|
"""Gets the starting message for the given message."""
|
|
144
160
|
# We have to reset the color before the message, because sometimes if a
|
|
@@ -150,7 +166,8 @@ def starting_message(message: str) -> str:
|
|
|
150
166
|
def finishing_message(message: str,
|
|
151
167
|
log_path: Optional[Union[str, 'pathlib.Path']] = None,
|
|
152
168
|
is_local: bool = False,
|
|
153
|
-
follow_up_message: Optional[str] = None
|
|
169
|
+
follow_up_message: Optional[str] = None,
|
|
170
|
+
cluster_name: Optional[str] = None) -> str:
|
|
154
171
|
"""Gets the finishing message for the given message.
|
|
155
172
|
|
|
156
173
|
Args:
|
|
@@ -168,6 +185,9 @@ def finishing_message(message: str,
|
|
|
168
185
|
success_prefix = (f'{colorama.Style.RESET_ALL}{colorama.Fore.GREEN}✓ '
|
|
169
186
|
f'{message}{colorama.Style.RESET_ALL}{follow_up_message}'
|
|
170
187
|
f'{colorama.Style.RESET_ALL}')
|
|
188
|
+
hint = provision_hint(cluster_name)
|
|
189
|
+
if hint:
|
|
190
|
+
return f'{success_prefix} {hint}'
|
|
171
191
|
if log_path is None:
|
|
172
192
|
return success_prefix
|
|
173
193
|
path_hint = log_path_hint(log_path, is_local)
|
|
@@ -176,13 +196,17 @@ def finishing_message(message: str,
|
|
|
176
196
|
|
|
177
197
|
def error_message(message: str,
|
|
178
198
|
log_path: Optional[Union[str, 'pathlib.Path']] = None,
|
|
179
|
-
is_local: bool = False
|
|
199
|
+
is_local: bool = False,
|
|
200
|
+
cluster_name: Optional[str] = None) -> str:
|
|
180
201
|
"""Gets the error message for the given message."""
|
|
181
202
|
# We have to reset the color before the message, because sometimes if a
|
|
182
203
|
# previous spinner with dimmed color overflows in a narrow terminal, the
|
|
183
204
|
# color might be messed up.
|
|
184
205
|
error_prefix = (f'{colorama.Style.RESET_ALL}{colorama.Fore.RED}⨯'
|
|
185
206
|
f'{colorama.Style.RESET_ALL} {message}')
|
|
207
|
+
hint = provision_hint(cluster_name)
|
|
208
|
+
if hint:
|
|
209
|
+
return f'{error_prefix} {hint}'
|
|
186
210
|
if log_path is None:
|
|
187
211
|
return error_prefix
|
|
188
212
|
path_hint = log_path_hint(log_path, is_local)
|
|
@@ -200,9 +224,16 @@ def retry_message(message: str) -> str:
|
|
|
200
224
|
|
|
201
225
|
def spinner_message(message: str,
|
|
202
226
|
log_path: Optional[Union[str, 'pathlib.Path']] = None,
|
|
203
|
-
is_local: bool = False
|
|
204
|
-
|
|
227
|
+
is_local: bool = False,
|
|
228
|
+
cluster_name: Optional[str] = None) -> str:
|
|
229
|
+
"""Gets the spinner message for the given message and log path.
|
|
230
|
+
|
|
231
|
+
If cluster_name is provided, recommend `sky logs --provision <cluster>`.
|
|
232
|
+
"""
|
|
205
233
|
colored_spinner = f'[bold cyan]{message}[/]'
|
|
234
|
+
hint = provision_hint(cluster_name)
|
|
235
|
+
if hint:
|
|
236
|
+
return f'{colored_spinner} {hint}'
|
|
206
237
|
if log_path is None:
|
|
207
238
|
return colored_spinner
|
|
208
239
|
path_hint = log_path_hint(log_path, is_local)
|