skypilot-nightly 1.0.0.dev20250303__py3-none-any.whl → 1.0.0.dev20250304__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +16 -8
- sky/cli.py +35 -17
- sky/client/cli.py +35 -17
- sky/client/sdk.py +32 -9
- sky/core.py +55 -6
- sky/exceptions.py +80 -1
- sky/jobs/client/sdk.py +7 -2
- sky/jobs/constants.py +1 -1
- sky/jobs/server/core.py +11 -6
- sky/jobs/utils.py +51 -21
- sky/server/constants.py +1 -1
- sky/server/requests/payloads.py +16 -8
- sky/server/server.py +15 -2
- sky/skylet/constants.py +1 -1
- sky/skylet/job_lib.py +10 -1
- {skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/METADATA +28 -41
- {skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/RECORD +22 -22
- {skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '5c126400a3a3791b34beebd5e7b5a3717efd9505'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250304'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -3823,6 +3823,10 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3823
3823
|
follow: Whether to follow the logs.
|
3824
3824
|
tail: The number of lines to display from the end of the
|
3825
3825
|
log file. If 0, print all lines.
|
3826
|
+
|
3827
|
+
Returns:
|
3828
|
+
The exit code of the tail command. Returns code 100 if the job has
|
3829
|
+
failed. See exceptions.JobExitCode for possible return codes.
|
3826
3830
|
"""
|
3827
3831
|
code = job_lib.JobLibCodeGen.tail_logs(job_id,
|
3828
3832
|
managed_job_id=managed_job_id,
|
@@ -3856,7 +3860,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3856
3860
|
job_id: Optional[int] = None,
|
3857
3861
|
job_name: Optional[str] = None,
|
3858
3862
|
controller: bool = False,
|
3859
|
-
follow: bool = True) ->
|
3863
|
+
follow: bool = True) -> int:
|
3860
3864
|
# if job_name is not None, job_id should be None
|
3861
3865
|
assert job_name is None or job_id is None, (job_name, job_id)
|
3862
3866
|
code = managed_jobs.ManagedJobCodeGen.stream_logs(
|
@@ -3869,13 +3873,17 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3869
3873
|
signal.signal(signal.SIGTSTP, backend_utils.stop_handler)
|
3870
3874
|
|
3871
3875
|
# Refer to the notes in tail_logs.
|
3872
|
-
|
3873
|
-
|
3874
|
-
|
3875
|
-
|
3876
|
-
|
3877
|
-
|
3878
|
-
|
3876
|
+
try:
|
3877
|
+
returncode = self.run_on_head(
|
3878
|
+
handle,
|
3879
|
+
code,
|
3880
|
+
stream_logs=True,
|
3881
|
+
process_stream=False,
|
3882
|
+
ssh_mode=command_runner.SshMode.INTERACTIVE,
|
3883
|
+
)
|
3884
|
+
except SystemExit as e:
|
3885
|
+
returncode = e.code
|
3886
|
+
return returncode
|
3879
3887
|
|
3880
3888
|
def sync_down_managed_job_logs(
|
3881
3889
|
self,
|
sky/cli.py
CHANGED
@@ -1227,11 +1227,15 @@ def launch(
|
|
1227
1227
|
clusters=[handle.get_cluster_name()])
|
1228
1228
|
# job_id will be None if no job was submitted (e.g. no entrypoint
|
1229
1229
|
# provided)
|
1230
|
+
returncode = 0
|
1230
1231
|
if not detach_run and job_id is not None:
|
1231
|
-
sdk.tail_logs(handle.get_cluster_name(),
|
1232
|
+
returncode = sdk.tail_logs(handle.get_cluster_name(),
|
1233
|
+
job_id,
|
1234
|
+
follow=True)
|
1232
1235
|
click.secho(
|
1233
1236
|
ux_utils.command_hint_messages(ux_utils.CommandHintType.CLUSTER_JOB,
|
1234
1237
|
job_id, handle.get_cluster_name()))
|
1238
|
+
sys.exit(returncode)
|
1235
1239
|
|
1236
1240
|
|
1237
1241
|
@cli.command(cls=_DocumentedCodeCommand)
|
@@ -1377,7 +1381,8 @@ def exec(cluster: Optional[str], cluster_option: Optional[str],
|
|
1377
1381
|
job_id_handle = _async_call_or_wait(request_id, async_call, 'sky.exec')
|
1378
1382
|
if not async_call and not detach_run:
|
1379
1383
|
job_id, _ = job_id_handle
|
1380
|
-
sdk.tail_logs(cluster, job_id, follow=True)
|
1384
|
+
returncode = sdk.tail_logs(cluster, job_id, follow=True)
|
1385
|
+
sys.exit(returncode)
|
1381
1386
|
|
1382
1387
|
|
1383
1388
|
def _handle_jobs_queue_request(
|
@@ -2121,12 +2126,20 @@ def logs(
|
|
2121
2126
|
one job_id can be provided.
|
2122
2127
|
|
2123
2128
|
2. If ``--status`` is specified, print the status of the job and exit with
|
2124
|
-
returncode 0 if the job succeeded
|
2125
|
-
be specified.
|
2129
|
+
returncode 0 if the job succeeded. At most one job_id can
|
2130
|
+
be specified. Other possible return codes:
|
2131
|
+
|
2132
|
+
- 100: job failed.
|
2133
|
+
- 101: job not finished.
|
2134
|
+
- 102: job not found.
|
2135
|
+
- 103: job was cancelled by the user.
|
2126
2136
|
|
2127
2137
|
3. If ``--sync-down`` is specified, the logs of the job will be downloaded
|
2128
2138
|
from the cluster and saved to the local machine under
|
2129
|
-
``~/sky_logs``.
|
2139
|
+
``~/sky_logs``. Multiple job_ids can be specified.
|
2140
|
+
|
2141
|
+
4. If the job fails or fetching the logs fails, the command will exit with
|
2142
|
+
a non-zero return code.
|
2130
2143
|
"""
|
2131
2144
|
if sync_down and status:
|
2132
2145
|
raise click.UsageError(
|
@@ -2174,17 +2187,18 @@ def logs(
|
|
2174
2187
|
# it will return {None: None}.
|
2175
2188
|
if job_id is None:
|
2176
2189
|
click.secho(f'No job found on cluster {cluster!r}.', fg='red')
|
2177
|
-
sys.exit(
|
2190
|
+
sys.exit(exceptions.JobExitCode.NOT_FOUND)
|
2178
2191
|
job_status = list(job_statuses.values())[0]
|
2179
2192
|
job_status_str = job_status.value if job_status is not None else 'None'
|
2180
2193
|
click.echo(f'Job {job_id}: {job_status_str}')
|
2181
2194
|
if job_status == job_lib.JobStatus.SUCCEEDED:
|
2182
2195
|
return
|
2183
2196
|
else:
|
2197
|
+
returncode = exceptions.JobExitCode.from_job_status(job_status)
|
2184
2198
|
if job_status is None:
|
2185
2199
|
id_str = '' if job_id is None else f'{job_id} '
|
2186
2200
|
click.secho(f'Job {id_str}not found', fg='red')
|
2187
|
-
sys.exit(
|
2201
|
+
sys.exit(returncode)
|
2188
2202
|
|
2189
2203
|
job_str = f'job {job_id}'
|
2190
2204
|
if job_id is None:
|
@@ -2194,7 +2208,8 @@ def logs(
|
|
2194
2208
|
f'{colorama.Style.RESET_ALL}')
|
2195
2209
|
|
2196
2210
|
# Stream logs from the server.
|
2197
|
-
sdk.tail_logs(cluster, job_id, follow, tail=tail)
|
2211
|
+
returncode = sdk.tail_logs(cluster, job_id, follow, tail=tail)
|
2212
|
+
sys.exit(returncode)
|
2198
2213
|
|
2199
2214
|
|
2200
2215
|
@cli.command()
|
@@ -3729,6 +3744,7 @@ def storage_delete(names: List[str], all: bool, yes: bool, async_call: bool): #
|
|
3729
3744
|
if not storages:
|
3730
3745
|
click.echo('No storage(s) to delete.')
|
3731
3746
|
return
|
3747
|
+
names = [storage['name'] for storage in storages]
|
3732
3748
|
else:
|
3733
3749
|
names = _get_glob_storages(names)
|
3734
3750
|
if names:
|
@@ -3893,10 +3909,11 @@ def jobs_launch(
|
|
3893
3909
|
'sky.jobs.launch')
|
3894
3910
|
if not async_call and not detach_run:
|
3895
3911
|
job_id = job_id_handle[0]
|
3896
|
-
managed_jobs.tail_logs(name=None,
|
3897
|
-
|
3898
|
-
|
3899
|
-
|
3912
|
+
returncode = managed_jobs.tail_logs(name=None,
|
3913
|
+
job_id=job_id,
|
3914
|
+
follow=True,
|
3915
|
+
controller=False)
|
3916
|
+
sys.exit(returncode)
|
3900
3917
|
|
3901
3918
|
|
3902
3919
|
@jobs.command('queue', cls=_DocumentedCodeCommand)
|
@@ -4127,11 +4144,12 @@ def jobs_logs(name: Optional[str], job_id: Optional[int], follow: bool,
|
|
4127
4144
|
logger.info(f'{fore.CYAN}Job {job} logs{controller_str}: '
|
4128
4145
|
f'{log_local_path}{style.RESET_ALL}')
|
4129
4146
|
else:
|
4130
|
-
managed_jobs.tail_logs(name=name,
|
4131
|
-
|
4132
|
-
|
4133
|
-
|
4134
|
-
|
4147
|
+
returncode = managed_jobs.tail_logs(name=name,
|
4148
|
+
job_id=job_id,
|
4149
|
+
follow=follow,
|
4150
|
+
controller=controller,
|
4151
|
+
refresh=refresh)
|
4152
|
+
sys.exit(returncode)
|
4135
4153
|
except exceptions.ClusterNotUpError:
|
4136
4154
|
with ux_utils.print_exception_no_traceback():
|
4137
4155
|
raise
|
sky/client/cli.py
CHANGED
@@ -1227,11 +1227,15 @@ def launch(
|
|
1227
1227
|
clusters=[handle.get_cluster_name()])
|
1228
1228
|
# job_id will be None if no job was submitted (e.g. no entrypoint
|
1229
1229
|
# provided)
|
1230
|
+
returncode = 0
|
1230
1231
|
if not detach_run and job_id is not None:
|
1231
|
-
sdk.tail_logs(handle.get_cluster_name(),
|
1232
|
+
returncode = sdk.tail_logs(handle.get_cluster_name(),
|
1233
|
+
job_id,
|
1234
|
+
follow=True)
|
1232
1235
|
click.secho(
|
1233
1236
|
ux_utils.command_hint_messages(ux_utils.CommandHintType.CLUSTER_JOB,
|
1234
1237
|
job_id, handle.get_cluster_name()))
|
1238
|
+
sys.exit(returncode)
|
1235
1239
|
|
1236
1240
|
|
1237
1241
|
@cli.command(cls=_DocumentedCodeCommand)
|
@@ -1377,7 +1381,8 @@ def exec(cluster: Optional[str], cluster_option: Optional[str],
|
|
1377
1381
|
job_id_handle = _async_call_or_wait(request_id, async_call, 'sky.exec')
|
1378
1382
|
if not async_call and not detach_run:
|
1379
1383
|
job_id, _ = job_id_handle
|
1380
|
-
sdk.tail_logs(cluster, job_id, follow=True)
|
1384
|
+
returncode = sdk.tail_logs(cluster, job_id, follow=True)
|
1385
|
+
sys.exit(returncode)
|
1381
1386
|
|
1382
1387
|
|
1383
1388
|
def _handle_jobs_queue_request(
|
@@ -2121,12 +2126,20 @@ def logs(
|
|
2121
2126
|
one job_id can be provided.
|
2122
2127
|
|
2123
2128
|
2. If ``--status`` is specified, print the status of the job and exit with
|
2124
|
-
returncode 0 if the job succeeded
|
2125
|
-
be specified.
|
2129
|
+
returncode 0 if the job succeeded. At most one job_id can
|
2130
|
+
be specified. Other possible return codes:
|
2131
|
+
|
2132
|
+
- 100: job failed.
|
2133
|
+
- 101: job not finished.
|
2134
|
+
- 102: job not found.
|
2135
|
+
- 103: job was cancelled by the user.
|
2126
2136
|
|
2127
2137
|
3. If ``--sync-down`` is specified, the logs of the job will be downloaded
|
2128
2138
|
from the cluster and saved to the local machine under
|
2129
|
-
``~/sky_logs``.
|
2139
|
+
``~/sky_logs``. Multiple job_ids can be specified.
|
2140
|
+
|
2141
|
+
4. If the job fails or fetching the logs fails, the command will exit with
|
2142
|
+
a non-zero return code.
|
2130
2143
|
"""
|
2131
2144
|
if sync_down and status:
|
2132
2145
|
raise click.UsageError(
|
@@ -2174,17 +2187,18 @@ def logs(
|
|
2174
2187
|
# it will return {None: None}.
|
2175
2188
|
if job_id is None:
|
2176
2189
|
click.secho(f'No job found on cluster {cluster!r}.', fg='red')
|
2177
|
-
sys.exit(
|
2190
|
+
sys.exit(exceptions.JobExitCode.NOT_FOUND)
|
2178
2191
|
job_status = list(job_statuses.values())[0]
|
2179
2192
|
job_status_str = job_status.value if job_status is not None else 'None'
|
2180
2193
|
click.echo(f'Job {job_id}: {job_status_str}')
|
2181
2194
|
if job_status == job_lib.JobStatus.SUCCEEDED:
|
2182
2195
|
return
|
2183
2196
|
else:
|
2197
|
+
returncode = exceptions.JobExitCode.from_job_status(job_status)
|
2184
2198
|
if job_status is None:
|
2185
2199
|
id_str = '' if job_id is None else f'{job_id} '
|
2186
2200
|
click.secho(f'Job {id_str}not found', fg='red')
|
2187
|
-
sys.exit(
|
2201
|
+
sys.exit(returncode)
|
2188
2202
|
|
2189
2203
|
job_str = f'job {job_id}'
|
2190
2204
|
if job_id is None:
|
@@ -2194,7 +2208,8 @@ def logs(
|
|
2194
2208
|
f'{colorama.Style.RESET_ALL}')
|
2195
2209
|
|
2196
2210
|
# Stream logs from the server.
|
2197
|
-
sdk.tail_logs(cluster, job_id, follow, tail=tail)
|
2211
|
+
returncode = sdk.tail_logs(cluster, job_id, follow, tail=tail)
|
2212
|
+
sys.exit(returncode)
|
2198
2213
|
|
2199
2214
|
|
2200
2215
|
@cli.command()
|
@@ -3729,6 +3744,7 @@ def storage_delete(names: List[str], all: bool, yes: bool, async_call: bool): #
|
|
3729
3744
|
if not storages:
|
3730
3745
|
click.echo('No storage(s) to delete.')
|
3731
3746
|
return
|
3747
|
+
names = [storage['name'] for storage in storages]
|
3732
3748
|
else:
|
3733
3749
|
names = _get_glob_storages(names)
|
3734
3750
|
if names:
|
@@ -3893,10 +3909,11 @@ def jobs_launch(
|
|
3893
3909
|
'sky.jobs.launch')
|
3894
3910
|
if not async_call and not detach_run:
|
3895
3911
|
job_id = job_id_handle[0]
|
3896
|
-
managed_jobs.tail_logs(name=None,
|
3897
|
-
|
3898
|
-
|
3899
|
-
|
3912
|
+
returncode = managed_jobs.tail_logs(name=None,
|
3913
|
+
job_id=job_id,
|
3914
|
+
follow=True,
|
3915
|
+
controller=False)
|
3916
|
+
sys.exit(returncode)
|
3900
3917
|
|
3901
3918
|
|
3902
3919
|
@jobs.command('queue', cls=_DocumentedCodeCommand)
|
@@ -4127,11 +4144,12 @@ def jobs_logs(name: Optional[str], job_id: Optional[int], follow: bool,
|
|
4127
4144
|
logger.info(f'{fore.CYAN}Job {job} logs{controller_str}: '
|
4128
4145
|
f'{log_local_path}{style.RESET_ALL}')
|
4129
4146
|
else:
|
4130
|
-
managed_jobs.tail_logs(name=name,
|
4131
|
-
|
4132
|
-
|
4133
|
-
|
4134
|
-
|
4147
|
+
returncode = managed_jobs.tail_logs(name=name,
|
4148
|
+
job_id=job_id,
|
4149
|
+
follow=follow,
|
4150
|
+
controller=controller,
|
4151
|
+
refresh=refresh)
|
4152
|
+
sys.exit(returncode)
|
4135
4153
|
except exceptions.ClusterNotUpError:
|
4136
4154
|
with ux_utils.print_exception_no_traceback():
|
4137
4155
|
raise
|
sky/client/sdk.py
CHANGED
@@ -25,6 +25,7 @@ import filelock
|
|
25
25
|
import psutil
|
26
26
|
import requests
|
27
27
|
|
28
|
+
from sky import admin_policy
|
28
29
|
from sky import backends
|
29
30
|
from sky import exceptions
|
30
31
|
from sky import sky_logging
|
@@ -212,13 +213,17 @@ def list_accelerator_counts(
|
|
212
213
|
@annotations.client_api
|
213
214
|
def optimize(
|
214
215
|
dag: 'sky.Dag',
|
215
|
-
minimize: common.OptimizeTarget = common.OptimizeTarget.COST
|
216
|
+
minimize: common.OptimizeTarget = common.OptimizeTarget.COST,
|
217
|
+
admin_policy_request_options: Optional[admin_policy.RequestOptions] = None
|
216
218
|
) -> server_common.RequestId:
|
217
219
|
"""Finds the best execution plan for the given DAG.
|
218
220
|
|
219
221
|
Args:
|
220
222
|
dag: the DAG to optimize.
|
221
223
|
minimize: whether to minimize cost or time.
|
224
|
+
admin_policy_request_options: Request options used for admin policy
|
225
|
+
validation. This is only required when a admin policy is in use,
|
226
|
+
see: https://docs.skypilot.co/en/latest/cloud-setup/policy.html
|
222
227
|
|
223
228
|
Returns:
|
224
229
|
The request ID of the optimize request.
|
@@ -233,7 +238,9 @@ def optimize(
|
|
233
238
|
"""
|
234
239
|
dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
|
235
240
|
|
236
|
-
body = payloads.OptimizeBody(dag=dag_str,
|
241
|
+
body = payloads.OptimizeBody(dag=dag_str,
|
242
|
+
minimize=minimize,
|
243
|
+
request_options=admin_policy_request_options)
|
237
244
|
response = requests.post(f'{server_common.get_server_url()}/optimize',
|
238
245
|
json=json.loads(body.model_dump_json()))
|
239
246
|
return server_common.get_request_id(response)
|
@@ -242,7 +249,11 @@ def optimize(
|
|
242
249
|
@usage_lib.entrypoint
|
243
250
|
@server_common.check_server_healthy_or_start
|
244
251
|
@annotations.client_api
|
245
|
-
def validate(
|
252
|
+
def validate(
|
253
|
+
dag: 'sky.Dag',
|
254
|
+
workdir_only: bool = False,
|
255
|
+
admin_policy_request_options: Optional[admin_policy.RequestOptions] = None
|
256
|
+
) -> None:
|
246
257
|
"""Validates the tasks.
|
247
258
|
|
248
259
|
The file paths (workdir and file_mounts) are validated on the client side
|
@@ -254,13 +265,17 @@ def validate(dag: 'sky.Dag', workdir_only: bool = False) -> None:
|
|
254
265
|
dag: the DAG to validate.
|
255
266
|
workdir_only: whether to only validate the workdir. This is used for
|
256
267
|
`exec` as it does not need other files/folders in file_mounts.
|
268
|
+
admin_policy_request_options: Request options used for admin policy
|
269
|
+
validation. This is only required when a admin policy is in use,
|
270
|
+
see: https://docs.skypilot.co/en/latest/cloud-setup/policy.html
|
257
271
|
"""
|
258
272
|
for task in dag.tasks:
|
259
273
|
task.expand_and_validate_workdir()
|
260
274
|
if not workdir_only:
|
261
275
|
task.expand_and_validate_file_mounts()
|
262
276
|
dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
|
263
|
-
body = payloads.ValidateBody(dag=dag_str
|
277
|
+
body = payloads.ValidateBody(dag=dag_str,
|
278
|
+
request_options=admin_policy_request_options)
|
264
279
|
response = requests.post(f'{server_common.get_server_url()}/validate',
|
265
280
|
json=json.loads(body.model_dump_json()))
|
266
281
|
if response.status_code == 400:
|
@@ -386,7 +401,12 @@ def launch(
|
|
386
401
|
'Please contact the SkyPilot team if you '
|
387
402
|
'need this feature at slack.skypilot.co.')
|
388
403
|
dag = dag_utils.convert_entrypoint_to_dag(task)
|
389
|
-
|
404
|
+
request_options = admin_policy.RequestOptions(
|
405
|
+
cluster_name=cluster_name,
|
406
|
+
idle_minutes_to_autostop=idle_minutes_to_autostop,
|
407
|
+
down=down,
|
408
|
+
dryrun=dryrun)
|
409
|
+
validate(dag, admin_policy_request_options=request_options)
|
390
410
|
|
391
411
|
confirm_shown = False
|
392
412
|
if _need_confirmation:
|
@@ -400,7 +420,8 @@ def launch(
|
|
400
420
|
if not clusters:
|
401
421
|
# Show the optimize log before the prompt if the cluster does not
|
402
422
|
# exist.
|
403
|
-
request_id = optimize(dag
|
423
|
+
request_id = optimize(dag,
|
424
|
+
admin_policy_request_options=request_options)
|
404
425
|
stream_and_get(request_id)
|
405
426
|
else:
|
406
427
|
cluster_record = clusters[0]
|
@@ -562,7 +583,7 @@ def tail_logs(cluster_name: str,
|
|
562
583
|
job_id: Optional[int],
|
563
584
|
follow: bool,
|
564
585
|
tail: int = 0,
|
565
|
-
output_stream: Optional['io.TextIOBase'] = None) ->
|
586
|
+
output_stream: Optional['io.TextIOBase'] = None) -> int:
|
566
587
|
"""Tails the logs of a job.
|
567
588
|
|
568
589
|
Args:
|
@@ -575,7 +596,9 @@ def tail_logs(cluster_name: str,
|
|
575
596
|
console.
|
576
597
|
|
577
598
|
Returns:
|
578
|
-
|
599
|
+
Exit code based on success or failure of the job. 0 if success,
|
600
|
+
100 if the job failed. See exceptions.JobExitCode for possible exit
|
601
|
+
codes.
|
579
602
|
|
580
603
|
Request Raises:
|
581
604
|
ValueError: if arguments are invalid or the cluster is not supported.
|
@@ -601,7 +624,7 @@ def tail_logs(cluster_name: str,
|
|
601
624
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
602
625
|
None))
|
603
626
|
request_id = server_common.get_request_id(response)
|
604
|
-
stream_response(request_id, response, output_stream)
|
627
|
+
return stream_response(request_id, response, output_stream)
|
605
628
|
|
606
629
|
|
607
630
|
@usage_lib.entrypoint
|
sky/core.py
CHANGED
@@ -6,16 +6,18 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
6
|
|
7
7
|
import colorama
|
8
8
|
|
9
|
+
from sky import admin_policy
|
9
10
|
from sky import backends
|
10
11
|
from sky import check as sky_check
|
11
12
|
from sky import clouds
|
12
|
-
from sky import dag
|
13
|
+
from sky import dag as dag_lib
|
13
14
|
from sky import data
|
14
15
|
from sky import exceptions
|
15
16
|
from sky import global_user_state
|
16
17
|
from sky import models
|
18
|
+
from sky import optimizer
|
17
19
|
from sky import sky_logging
|
18
|
-
from sky import task
|
20
|
+
from sky import task as task_lib
|
19
21
|
from sky.backends import backend_utils
|
20
22
|
from sky.clouds import service_catalog
|
21
23
|
from sky.jobs.server import core as managed_jobs_core
|
@@ -25,6 +27,7 @@ from sky.skylet import constants
|
|
25
27
|
from sky.skylet import job_lib
|
26
28
|
from sky.skylet import log_lib
|
27
29
|
from sky.usage import usage_lib
|
30
|
+
from sky.utils import admin_policy_utils
|
28
31
|
from sky.utils import common
|
29
32
|
from sky.utils import common_utils
|
30
33
|
from sky.utils import controller_utils
|
@@ -44,6 +47,46 @@ logger = sky_logging.init_logger(__name__)
|
|
44
47
|
# ======================
|
45
48
|
|
46
49
|
|
50
|
+
@usage_lib.entrypoint
|
51
|
+
def optimize(
|
52
|
+
dag: 'dag_lib.Dag',
|
53
|
+
minimize: common.OptimizeTarget = common.OptimizeTarget.COST,
|
54
|
+
blocked_resources: Optional[List['resources_lib.Resources']] = None,
|
55
|
+
quiet: bool = False,
|
56
|
+
request_options: Optional[admin_policy.RequestOptions] = None
|
57
|
+
) -> 'dag_lib.Dag':
|
58
|
+
"""Finds the best execution plan for the given DAG.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
dag: the DAG to optimize.
|
62
|
+
minimize: whether to minimize cost or time.
|
63
|
+
blocked_resources: a list of resources that should not be used.
|
64
|
+
quiet: whether to suppress logging.
|
65
|
+
request_options: Request options used in enforcing admin policies.
|
66
|
+
This is only required when a admin policy is in use,
|
67
|
+
see: https://docs.skypilot.co/en/latest/cloud-setup/policy.html
|
68
|
+
Returns:
|
69
|
+
The optimized DAG.
|
70
|
+
|
71
|
+
Raises:
|
72
|
+
exceptions.ResourcesUnavailableError: if no resources are available
|
73
|
+
for a task.
|
74
|
+
exceptions.NoCloudAccessError: if no public clouds are enabled.
|
75
|
+
"""
|
76
|
+
# TODO: We apply the admin policy only on the first DAG optimization which
|
77
|
+
# is shown on `sky launch`. The optimizer is also invoked during failover,
|
78
|
+
# but we do not apply the admin policy there. We should apply the admin
|
79
|
+
# policy in the optimizer, but that will require some refactoring.
|
80
|
+
dag, _ = admin_policy_utils.apply(
|
81
|
+
dag,
|
82
|
+
use_mutated_config_in_current_request=True,
|
83
|
+
request_options=request_options)
|
84
|
+
return optimizer.Optimizer.optimize(dag=dag,
|
85
|
+
minimize=minimize,
|
86
|
+
blocked_resources=blocked_resources,
|
87
|
+
quiet=quiet)
|
88
|
+
|
89
|
+
|
47
90
|
@usage_lib.entrypoint
|
48
91
|
def status(
|
49
92
|
cluster_names: Optional[Union[str, List[str]]] = None,
|
@@ -325,8 +368,8 @@ def _start(
|
|
325
368
|
|
326
369
|
usage_lib.record_cluster_name_for_current_operation(cluster_name)
|
327
370
|
|
328
|
-
with
|
329
|
-
dummy_task =
|
371
|
+
with dag_lib.Dag():
|
372
|
+
dummy_task = task_lib.Task().set_resources(handle.launched_resources)
|
330
373
|
dummy_task.num_nodes = handle.launched_nodes
|
331
374
|
handle = backend.provision(dummy_task,
|
332
375
|
to_provision=handle.launched_resources,
|
@@ -783,7 +826,7 @@ def cancel(
|
|
783
826
|
def tail_logs(cluster_name: str,
|
784
827
|
job_id: Optional[int],
|
785
828
|
follow: bool = True,
|
786
|
-
tail: int = 0) ->
|
829
|
+
tail: int = 0) -> int:
|
787
830
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
788
831
|
"""Tails the logs of a job.
|
789
832
|
|
@@ -799,6 +842,12 @@ def tail_logs(cluster_name: str,
|
|
799
842
|
not the same as the user who created the cluster.
|
800
843
|
sky.exceptions.CloudUserIdentityError: if we fail to get the current
|
801
844
|
user identity.
|
845
|
+
|
846
|
+
Returns:
|
847
|
+
Return code based on success or failure of the job. 0 if success,
|
848
|
+
100 if the job failed. Note: This is not the return code of the job
|
849
|
+
script.
|
850
|
+
|
802
851
|
"""
|
803
852
|
# Check the status of the cluster.
|
804
853
|
handle = backend_utils.check_cluster_available(
|
@@ -808,7 +857,7 @@ def tail_logs(cluster_name: str,
|
|
808
857
|
backend = backend_utils.get_backend_from_handle(handle)
|
809
858
|
|
810
859
|
usage_lib.record_cluster_name_for_current_operation(cluster_name)
|
811
|
-
backend.tail_logs(handle, job_id, follow=follow, tail=tail)
|
860
|
+
return backend.tail_logs(handle, job_id, follow=follow, tail=tail)
|
812
861
|
|
813
862
|
|
814
863
|
@usage_lib.entrypoint
|
sky/exceptions.py
CHANGED
@@ -9,7 +9,9 @@ from typing import Any, Dict, List, Optional, Sequence
|
|
9
9
|
from sky.utils import env_options
|
10
10
|
|
11
11
|
if typing.TYPE_CHECKING:
|
12
|
+
from sky import jobs as managed_jobs
|
12
13
|
from sky.backends import backend
|
14
|
+
from sky.skylet import job_lib
|
13
15
|
from sky.utils import status_lib
|
14
16
|
|
15
17
|
# Return code for keyboard interruption and SIGTSTP
|
@@ -236,7 +238,7 @@ class CommandError(SkyPilotExcludeArgsBaseException):
|
|
236
238
|
else:
|
237
239
|
if (len(command) > 100 and
|
238
240
|
not env_options.Options.SHOW_DEBUG_INFO.get()):
|
239
|
-
#
|
241
|
+
# Chunk the command to avoid overflow.
|
240
242
|
command = command[:100] + '...'
|
241
243
|
message = (f'Command {command} failed with return code '
|
242
244
|
f'{returncode}.\n{error_msg}')
|
@@ -449,3 +451,80 @@ class ApiServerConnectionError(RuntimeError):
|
|
449
451
|
f'Could not connect to SkyPilot API server at {server_url}. '
|
450
452
|
f'Please ensure that the server is running. '
|
451
453
|
f'Try: curl {server_url}/api/health')
|
454
|
+
|
455
|
+
|
456
|
+
class JobExitCode(enum.IntEnum):
|
457
|
+
"""Job exit code enum.
|
458
|
+
|
459
|
+
These codes are used as return codes for job-related operations and as
|
460
|
+
process exit codes to indicate job status.
|
461
|
+
"""
|
462
|
+
|
463
|
+
SUCCEEDED = 0
|
464
|
+
"""The job completed successfully"""
|
465
|
+
|
466
|
+
FAILED = 100
|
467
|
+
"""The job failed (due to user code, setup, or driver failure)"""
|
468
|
+
|
469
|
+
NOT_FINISHED = 101
|
470
|
+
"""The job has not finished yet"""
|
471
|
+
|
472
|
+
NOT_FOUND = 102
|
473
|
+
"""The job was not found"""
|
474
|
+
|
475
|
+
CANCELLED = 103
|
476
|
+
"""The job was cancelled by the user"""
|
477
|
+
|
478
|
+
@classmethod
|
479
|
+
def from_job_status(cls,
|
480
|
+
status: Optional['job_lib.JobStatus']) -> 'JobExitCode':
|
481
|
+
"""Convert a job status to an exit code."""
|
482
|
+
# Import here to avoid circular imports
|
483
|
+
# pylint: disable=import-outside-toplevel
|
484
|
+
from sky.skylet import job_lib
|
485
|
+
|
486
|
+
if status is None:
|
487
|
+
return cls.NOT_FOUND
|
488
|
+
|
489
|
+
if not status.is_terminal():
|
490
|
+
return cls.NOT_FINISHED
|
491
|
+
|
492
|
+
if status == job_lib.JobStatus.SUCCEEDED:
|
493
|
+
return cls.SUCCEEDED
|
494
|
+
|
495
|
+
if status == job_lib.JobStatus.CANCELLED:
|
496
|
+
return cls.CANCELLED
|
497
|
+
|
498
|
+
if status in job_lib.JobStatus.user_code_failure_states(
|
499
|
+
) or status == job_lib.JobStatus.FAILED_DRIVER:
|
500
|
+
return cls.FAILED
|
501
|
+
|
502
|
+
# Should not hit this case, but included to avoid errors
|
503
|
+
return cls.FAILED
|
504
|
+
|
505
|
+
@classmethod
|
506
|
+
def from_managed_job_status(
|
507
|
+
cls,
|
508
|
+
status: Optional['managed_jobs.ManagedJobStatus']) -> 'JobExitCode':
|
509
|
+
"""Convert a managed job status to an exit code."""
|
510
|
+
# Import here to avoid circular imports
|
511
|
+
# pylint: disable=import-outside-toplevel
|
512
|
+
from sky import jobs as managed_jobs
|
513
|
+
|
514
|
+
if status is None:
|
515
|
+
return cls.NOT_FOUND
|
516
|
+
|
517
|
+
if not status.is_terminal():
|
518
|
+
return cls.NOT_FINISHED
|
519
|
+
|
520
|
+
if status == managed_jobs.ManagedJobStatus.SUCCEEDED:
|
521
|
+
return cls.SUCCEEDED
|
522
|
+
|
523
|
+
if status == managed_jobs.ManagedJobStatus.CANCELLED:
|
524
|
+
return cls.CANCELLED
|
525
|
+
|
526
|
+
if status.is_failed():
|
527
|
+
return cls.FAILED
|
528
|
+
|
529
|
+
# Should not hit this case, but included to avoid errors
|
530
|
+
return cls.FAILED
|
sky/jobs/client/sdk.py
CHANGED
@@ -184,7 +184,7 @@ def tail_logs(name: Optional[str] = None,
|
|
184
184
|
follow: bool = True,
|
185
185
|
controller: bool = False,
|
186
186
|
refresh: bool = False,
|
187
|
-
output_stream: Optional['io.TextIOBase'] = None) ->
|
187
|
+
output_stream: Optional['io.TextIOBase'] = None) -> int:
|
188
188
|
"""Tails logs of managed jobs.
|
189
189
|
|
190
190
|
You can provide either a job name or a job ID to tail logs. If both are not
|
@@ -199,6 +199,11 @@ def tail_logs(name: Optional[str] = None,
|
|
199
199
|
output_stream: The stream to write the logs to. If None, print to the
|
200
200
|
console.
|
201
201
|
|
202
|
+
Returns:
|
203
|
+
Exit code based on success or failure of the job. 0 if success,
|
204
|
+
100 if the job failed. See exceptions.JobExitCode for possible exit
|
205
|
+
codes.
|
206
|
+
|
202
207
|
Request Raises:
|
203
208
|
ValueError: invalid arguments.
|
204
209
|
sky.exceptions.ClusterNotUpError: the jobs controller is not up.
|
@@ -217,7 +222,7 @@ def tail_logs(name: Optional[str] = None,
|
|
217
222
|
timeout=(5, None),
|
218
223
|
)
|
219
224
|
request_id = server_common.get_request_id(response)
|
220
|
-
sdk.stream_response(request_id, response, output_stream)
|
225
|
+
return sdk.stream_response(request_id, response, output_stream)
|
221
226
|
|
222
227
|
|
223
228
|
@usage_lib.entrypoint
|
sky/jobs/constants.py
CHANGED
@@ -40,7 +40,7 @@ JOBS_CLUSTER_NAME_PREFIX_LENGTH = 25
|
|
40
40
|
# The version of the lib files that jobs/utils use. Whenever there is an API
|
41
41
|
# change for the jobs/utils, we need to bump this version and update
|
42
42
|
# job.utils.ManagedJobCodeGen to handle the version update.
|
43
|
-
MANAGED_JOBS_VERSION =
|
43
|
+
MANAGED_JOBS_VERSION = 3
|
44
44
|
|
45
45
|
# The command for setting up the jobs dashboard on the controller. It firstly
|
46
46
|
# checks if the systemd services are available, and if not (e.g., Kubernetes
|
sky/jobs/server/core.py
CHANGED
@@ -460,12 +460,17 @@ def cancel(name: Optional[str] = None,
|
|
460
460
|
|
461
461
|
@usage_lib.entrypoint
|
462
462
|
def tail_logs(name: Optional[str], job_id: Optional[int], follow: bool,
|
463
|
-
controller: bool, refresh: bool) ->
|
463
|
+
controller: bool, refresh: bool) -> int:
|
464
464
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
465
465
|
"""Tail logs of managed jobs.
|
466
466
|
|
467
467
|
Please refer to sky.cli.job_logs for documentation.
|
468
468
|
|
469
|
+
Returns:
|
470
|
+
Exit code based on success or failure of the job. 0 if success,
|
471
|
+
100 if the job failed. See exceptions.JobExitCode for possible exit
|
472
|
+
codes.
|
473
|
+
|
469
474
|
Raises:
|
470
475
|
ValueError: invalid arguments.
|
471
476
|
sky.exceptions.ClusterNotUpError: the jobs controller is not up.
|
@@ -494,11 +499,11 @@ def tail_logs(name: Optional[str], job_id: Optional[int], follow: bool,
|
|
494
499
|
backend = backend_utils.get_backend_from_handle(handle)
|
495
500
|
assert isinstance(backend, backends.CloudVmRayBackend), backend
|
496
501
|
|
497
|
-
backend.tail_managed_job_logs(handle,
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
+
return backend.tail_managed_job_logs(handle,
|
503
|
+
job_id=job_id,
|
504
|
+
job_name=name,
|
505
|
+
follow=follow,
|
506
|
+
controller=controller)
|
502
507
|
|
503
508
|
|
504
509
|
def start_dashboard_forwarding(refresh: bool = False) -> Tuple[int, int]:
|
sky/jobs/utils.py
CHANGED
@@ -511,8 +511,14 @@ def cancel_job_by_name(job_name: str) -> str:
|
|
511
511
|
return f'Job {job_name!r} is scheduled to be cancelled.'
|
512
512
|
|
513
513
|
|
514
|
-
def stream_logs_by_id(job_id: int, follow: bool = True) -> str:
|
515
|
-
"""Stream logs by job id.
|
514
|
+
def stream_logs_by_id(job_id: int, follow: bool = True) -> Tuple[str, int]:
|
515
|
+
"""Stream logs by job id.
|
516
|
+
|
517
|
+
Returns:
|
518
|
+
A tuple containing the log message and an exit code based on success or
|
519
|
+
failure of the job. 0 if success, 100 if the job failed.
|
520
|
+
See exceptions.JobExitCode for possible exit codes.
|
521
|
+
"""
|
516
522
|
|
517
523
|
def should_keep_logging(status: managed_job_state.ManagedJobStatus) -> bool:
|
518
524
|
# If we see CANCELLING, just exit - we could miss some job logs but the
|
@@ -547,13 +553,16 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> str:
|
|
547
553
|
start_streaming = True
|
548
554
|
if start_streaming:
|
549
555
|
print(line, end='', flush=True)
|
550
|
-
return ''
|
556
|
+
return '', exceptions.JobExitCode.from_managed_job_status(
|
557
|
+
managed_job_status)
|
551
558
|
return (f'{colorama.Fore.YELLOW}'
|
552
559
|
f'Job {job_id} is already in terminal state '
|
553
560
|
f'{managed_job_status.value}. For more details, run: '
|
554
561
|
f'sky jobs logs --controller {job_id}'
|
555
562
|
f'{colorama.Style.RESET_ALL}'
|
556
|
-
f'{job_msg}'
|
563
|
+
f'{job_msg}',
|
564
|
+
exceptions.JobExitCode.from_managed_job_status(
|
565
|
+
managed_job_status))
|
557
566
|
backend = backends.CloudVmRayBackend()
|
558
567
|
task_id, managed_job_status = (
|
559
568
|
managed_job_state.get_latest_task_id_status(job_id))
|
@@ -604,11 +613,12 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> str:
|
|
604
613
|
job_id=None,
|
605
614
|
managed_job_id=job_id,
|
606
615
|
follow=follow)
|
607
|
-
if returncode
|
608
|
-
# If the log tailing
|
609
|
-
#
|
610
|
-
#
|
611
|
-
#
|
616
|
+
if returncode in [rc.value for rc in exceptions.JobExitCode]:
|
617
|
+
# If the log tailing exits with a known exit code we can safely
|
618
|
+
# break the loop because it indicates the tailing process
|
619
|
+
# succeeded (even though the real job can be SUCCEEDED or
|
620
|
+
# FAILED). We use the status in job queue to show the
|
621
|
+
# information, as the ManagedJobStatus is not updated yet.
|
612
622
|
job_statuses = backend.get_job_status(handle, stream_logs=False)
|
613
623
|
job_status = list(job_statuses.values())[0]
|
614
624
|
assert job_status is not None, 'No job found.'
|
@@ -728,18 +738,25 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> str:
|
|
728
738
|
logger.info(
|
729
739
|
ux_utils.finishing_message(f'Managed job finished: {job_id} '
|
730
740
|
f'(status: {managed_job_status.value}).'))
|
731
|
-
return ''
|
741
|
+
return '', exceptions.JobExitCode.from_managed_job_status(
|
742
|
+
managed_job_status)
|
732
743
|
|
733
744
|
|
734
745
|
def stream_logs(job_id: Optional[int],
|
735
746
|
job_name: Optional[str],
|
736
747
|
controller: bool = False,
|
737
|
-
follow: bool = True) -> str:
|
738
|
-
"""Stream logs by job id or job name.
|
748
|
+
follow: bool = True) -> Tuple[str, int]:
|
749
|
+
"""Stream logs by job id or job name.
|
750
|
+
|
751
|
+
Returns:
|
752
|
+
A tuple containing the log message and the exit code based on success
|
753
|
+
or failure of the job. 0 if success, 100 if the job failed.
|
754
|
+
See exceptions.JobExitCode for possible exit codes.
|
755
|
+
"""
|
739
756
|
if job_id is None and job_name is None:
|
740
757
|
job_id = managed_job_state.get_latest_job_id()
|
741
758
|
if job_id is None:
|
742
|
-
return 'No managed job found.'
|
759
|
+
return 'No managed job found.', exceptions.JobExitCode.NOT_FOUND
|
743
760
|
|
744
761
|
if controller:
|
745
762
|
if job_id is None:
|
@@ -754,7 +771,8 @@ def stream_logs(job_id: Optional[int],
|
|
754
771
|
if job['job_name'] == job_name
|
755
772
|
}
|
756
773
|
if not managed_job_ids:
|
757
|
-
return f'No managed job found with name {job_name!r}.'
|
774
|
+
return (f'No managed job found with name {job_name!r}.',
|
775
|
+
exceptions.JobExitCode.NOT_FOUND)
|
758
776
|
if len(managed_job_ids) > 1:
|
759
777
|
job_ids_str = ', '.join(
|
760
778
|
str(job_id) for job_id in managed_job_ids)
|
@@ -776,7 +794,7 @@ def stream_logs(job_id: Optional[int],
|
|
776
794
|
if not follow:
|
777
795
|
# Assume that the log file hasn't been written yet. Since we
|
778
796
|
# aren't following, just return.
|
779
|
-
return ''
|
797
|
+
return '', exceptions.JobExitCode.SUCCEEDED
|
780
798
|
|
781
799
|
job_status = managed_job_state.get_status(job_id)
|
782
800
|
if job_status is None:
|
@@ -787,7 +805,8 @@ def stream_logs(job_id: Optional[int],
|
|
787
805
|
# point, it never will be. This job may have been submitted
|
788
806
|
# using an old version that did not create the log file, so this
|
789
807
|
# is not considered an exceptional case.
|
790
|
-
return ''
|
808
|
+
return '', exceptions.JobExitCode.from_managed_job_status(
|
809
|
+
job_status)
|
791
810
|
|
792
811
|
time.sleep(log_lib.SKY_LOG_WAITING_GAP_SECONDS)
|
793
812
|
|
@@ -833,15 +852,17 @@ def stream_logs(job_id: Optional[int],
|
|
833
852
|
|
834
853
|
if follow:
|
835
854
|
return ux_utils.finishing_message(
|
836
|
-
f'Job finished (status: {job_status}).'
|
855
|
+
f'Job finished (status: {job_status}).'
|
856
|
+
), exceptions.JobExitCode.from_managed_job_status(job_status)
|
837
857
|
|
838
|
-
return ''
|
858
|
+
return '', exceptions.JobExitCode.SUCCEEDED
|
839
859
|
|
840
860
|
if job_id is None:
|
841
861
|
assert job_name is not None
|
842
862
|
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(job_name)
|
843
863
|
if not job_ids:
|
844
|
-
return f'No running managed job found with name {job_name!r}.'
|
864
|
+
return (f'No running managed job found with name {job_name!r}.',
|
865
|
+
exceptions.JobExitCode.NOT_FOUND)
|
845
866
|
if len(job_ids) > 1:
|
846
867
|
raise ValueError(
|
847
868
|
f'Multiple running jobs found with name {job_name!r}.')
|
@@ -1167,6 +1188,7 @@ class ManagedJobCodeGen:
|
|
1167
1188
|
>> codegen = ManagedJobCodeGen.show_jobs(...)
|
1168
1189
|
"""
|
1169
1190
|
_PREFIX = textwrap.dedent("""\
|
1191
|
+
import sys
|
1170
1192
|
from sky.jobs import utils
|
1171
1193
|
from sky.jobs import state as managed_job_state
|
1172
1194
|
from sky.jobs import constants as managed_job_constants
|
@@ -1222,9 +1244,17 @@ class ManagedJobCodeGen:
|
|
1222
1244
|
follow: bool = True,
|
1223
1245
|
controller: bool = False) -> str:
|
1224
1246
|
code = textwrap.dedent(f"""\
|
1225
|
-
|
1247
|
+
result = utils.stream_logs(job_id={job_id!r}, job_name={job_name!r},
|
1226
1248
|
follow={follow}, controller={controller})
|
1227
|
-
|
1249
|
+
if managed_job_version < 3:
|
1250
|
+
# Versions 2 and older did not return a retcode, so we just print
|
1251
|
+
# the result.
|
1252
|
+
# TODO: Remove compatibility before 0.12.0
|
1253
|
+
print(result, flush=True)
|
1254
|
+
else:
|
1255
|
+
msg, retcode = result
|
1256
|
+
print(msg, flush=True)
|
1257
|
+
sys.exit(retcode)
|
1228
1258
|
""")
|
1229
1259
|
return cls._build(code)
|
1230
1260
|
|
sky/server/constants.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# API server version, whenever there is a change in API server that requires a
|
4
4
|
# restart of the local API server or error out when the client does not match
|
5
5
|
# the server version.
|
6
|
-
API_VERSION = '
|
6
|
+
API_VERSION = '3'
|
7
7
|
|
8
8
|
# Prefix for API request names.
|
9
9
|
REQUEST_NAME_PREFIX = 'sky.'
|
sky/server/requests/payloads.py
CHANGED
@@ -12,6 +12,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
|
12
12
|
|
13
13
|
import pydantic
|
14
14
|
|
15
|
+
from sky import admin_policy
|
15
16
|
from sky import serve
|
16
17
|
from sky import sky_logging
|
17
18
|
from sky import skypilot_config
|
@@ -113,15 +114,9 @@ class CheckBody(RequestBody):
|
|
113
114
|
verbose: bool
|
114
115
|
|
115
116
|
|
116
|
-
class
|
117
|
-
"""
|
118
|
-
dag: str
|
119
|
-
|
120
|
-
|
121
|
-
class OptimizeBody(RequestBody):
|
122
|
-
"""The request body for the optimize endpoint."""
|
117
|
+
class DagRequestBody(RequestBody):
|
118
|
+
"""Request body base class for endpoints with a dag."""
|
123
119
|
dag: str
|
124
|
-
minimize: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
|
125
120
|
|
126
121
|
def to_kwargs(self) -> Dict[str, Any]:
|
127
122
|
# Import here to avoid requirement of the whole SkyPilot dependency on
|
@@ -139,6 +134,19 @@ class OptimizeBody(RequestBody):
|
|
139
134
|
return kwargs
|
140
135
|
|
141
136
|
|
137
|
+
class ValidateBody(DagRequestBody):
|
138
|
+
"""The request body for the validate endpoint."""
|
139
|
+
dag: str
|
140
|
+
request_options: Optional[admin_policy.RequestOptions]
|
141
|
+
|
142
|
+
|
143
|
+
class OptimizeBody(DagRequestBody):
|
144
|
+
"""The request body for the optimize endpoint."""
|
145
|
+
dag: str
|
146
|
+
minimize: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
|
147
|
+
request_options: Optional[admin_policy.RequestOptions]
|
148
|
+
|
149
|
+
|
142
150
|
class LaunchBody(RequestBody):
|
143
151
|
"""The request body for the launch endpoint."""
|
144
152
|
task: str
|
sky/server/server.py
CHANGED
@@ -27,7 +27,6 @@ from sky import core
|
|
27
27
|
from sky import exceptions
|
28
28
|
from sky import execution
|
29
29
|
from sky import global_user_state
|
30
|
-
from sky import optimizer
|
31
30
|
from sky import sky_logging
|
32
31
|
from sky.clouds import service_catalog
|
33
32
|
from sky.data import storage_utils
|
@@ -42,6 +41,7 @@ from sky.server.requests import payloads
|
|
42
41
|
from sky.server.requests import requests as requests_lib
|
43
42
|
from sky.skylet import constants
|
44
43
|
from sky.usage import usage_lib
|
44
|
+
from sky.utils import admin_policy_utils
|
45
45
|
from sky.utils import common as common_lib
|
46
46
|
from sky.utils import common_utils
|
47
47
|
from sky.utils import dag_utils
|
@@ -258,9 +258,22 @@ async def validate(validate_body: payloads.ValidateBody) -> None:
|
|
258
258
|
"""Validates the user's DAG."""
|
259
259
|
# TODO(SKY-1035): validate if existing cluster satisfies the requested
|
260
260
|
# resources, e.g. sky exec --gpus V100:8 existing-cluster-with-no-gpus
|
261
|
+
|
262
|
+
# TODO: Our current launch process is split into three calls:
|
263
|
+
# validate, optimize, and launch. This requires us to apply the admin policy
|
264
|
+
# in each step, which may be an expensive operation. We should consolidate
|
265
|
+
# these into a single call or have a TTL cache for (task, admin_policy)
|
266
|
+
# pairs.
|
261
267
|
logger.debug(f'Validating tasks: {validate_body.dag}')
|
262
268
|
try:
|
263
269
|
dag = dag_utils.load_chain_dag_from_yaml_str(validate_body.dag)
|
270
|
+
# TODO: Admin policy may contain arbitrary code, which may be expensive
|
271
|
+
# to run and may block the server thread. However, moving it into the
|
272
|
+
# executor adds a ~150ms penalty on the local API server because of
|
273
|
+
# added RTTs. For now, we stick to doing the validation inline in the
|
274
|
+
# server thread.
|
275
|
+
dag, _ = admin_policy_utils.apply(
|
276
|
+
dag, request_options=validate_body.request_options)
|
264
277
|
for task in dag.tasks:
|
265
278
|
# Will validate workdir and file_mounts in the backend, as those
|
266
279
|
# need to be validated after the files are uploaded to the SkyPilot
|
@@ -283,7 +296,7 @@ async def optimize(optimize_body: payloads.OptimizeBody,
|
|
283
296
|
request_name='optimize',
|
284
297
|
request_body=optimize_body,
|
285
298
|
ignore_return_value=True,
|
286
|
-
func=
|
299
|
+
func=core.optimize,
|
287
300
|
schedule_type=requests_lib.ScheduleType.SHORT,
|
288
301
|
)
|
289
302
|
|
sky/skylet/constants.py
CHANGED
@@ -93,7 +93,7 @@ SKYLET_VERSION = '12'
|
|
93
93
|
# The version of the lib files that skylet/jobs use. Whenever there is an API
|
94
94
|
# change for the job_lib or log_lib, we need to bump this version, so that the
|
95
95
|
# user can be notified to update their SkyPilot version on the remote cluster.
|
96
|
-
SKYLET_LIB_VERSION =
|
96
|
+
SKYLET_LIB_VERSION = 3
|
97
97
|
SKYLET_VERSION_FILE = '~/.sky/skylet_version'
|
98
98
|
|
99
99
|
# `sky jobs dashboard`-related
|
sky/skylet/job_lib.py
CHANGED
@@ -938,7 +938,9 @@ class JobLibCodeGen:
|
|
938
938
|
_PREFIX = [
|
939
939
|
'import os',
|
940
940
|
'import getpass',
|
941
|
-
'
|
941
|
+
'import sys',
|
942
|
+
'from sky import exceptions',
|
943
|
+
'from sky.skylet import log_lib, job_lib, constants',
|
942
944
|
]
|
943
945
|
|
944
946
|
@classmethod
|
@@ -1033,6 +1035,13 @@ class JobLibCodeGen:
|
|
1033
1035
|
f'tail_log_kwargs = {{"job_id": job_id, "log_dir": log_dir, "managed_job_id": {managed_job_id!r}, "follow": {follow}}}',
|
1034
1036
|
f'{_LINUX_NEW_LINE}if getattr(constants, "SKYLET_LIB_VERSION", 1) > 1: tail_log_kwargs["tail"] = {tail}',
|
1035
1037
|
f'{_LINUX_NEW_LINE}log_lib.tail_logs(**tail_log_kwargs)',
|
1038
|
+
# After tailing, check the job status and exit with appropriate code
|
1039
|
+
'job_status = job_lib.get_status(job_id)',
|
1040
|
+
# Backward compatibility for returning exit code: Skylet versions 2
|
1041
|
+
# and older did not have JobExitCode, so we use 0 for those versions
|
1042
|
+
# TODO: Remove this special handling after 0.10.0.
|
1043
|
+
'exit_code = exceptions.JobExitCode.from_job_status(job_status) if getattr(constants, "SKYLET_LIB_VERSION", 1) > 2 else 0',
|
1044
|
+
'sys.exit(exit_code)',
|
1036
1045
|
]
|
1037
1046
|
return cls._build(code)
|
1038
1047
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20250304
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -241,13 +241,13 @@ pip install "skypilot-nightly[kubernetes,aws,gcp,azure,oci,lambda,runpod,fluidst
|
|
241
241
|
</p>
|
242
242
|
|
243
243
|
|
244
|
-
## Getting
|
244
|
+
## Getting started
|
245
245
|
You can find our documentation [here](https://docs.skypilot.co/).
|
246
246
|
- [Installation](https://docs.skypilot.co/en/latest/getting-started/installation.html)
|
247
247
|
- [Quickstart](https://docs.skypilot.co/en/latest/getting-started/quickstart.html)
|
248
248
|
- [CLI reference](https://docs.skypilot.co/en/latest/reference/cli.html)
|
249
249
|
|
250
|
-
## SkyPilot in 1
|
250
|
+
## SkyPilot in 1 minute
|
251
251
|
|
252
252
|
A SkyPilot task specifies: resource requirements, data to be synced, setup commands, and the task commands.
|
253
253
|
|
@@ -299,59 +299,46 @@ SkyPilot then performs the heavy-lifting for you, including:
|
|
299
299
|
</p>
|
300
300
|
|
301
301
|
|
302
|
-
|
302
|
+
See [Quickstart](https://docs.skypilot.co/en/latest/getting-started/quickstart.html) to get started with SkyPilot.
|
303
303
|
|
304
|
-
##
|
304
|
+
## Runnable examples
|
305
|
+
|
306
|
+
See [**SkyPilot examples**](https://docs.skypilot.co/en/docs-examples/examples/index.html) that cover: development, training, serving, LLM models, AI apps, and common frameworks.
|
307
|
+
|
308
|
+
Latest featured examples:
|
309
|
+
|
310
|
+
| Task | Examples |
|
311
|
+
|----------|----------|
|
312
|
+
| Training | [PyTorch](https://docs.skypilot.co/en/latest/getting-started/tutorial.html), [DeepSpeed](https://docs.skypilot.co/en/latest/examples/training/deepspeed.html), [Finetune Llama 3](https://docs.skypilot.co/en/latest/examples/training/llama-3_1-finetuning.html), [NeMo](https://docs.skypilot.co/en/latest/examples/training/nemo.html), [Ray](https://docs.skypilot.co/en/latest/examples/training/ray.html), [Unsloth](https://docs.skypilot.co/en/latest/examples/training/unsloth.html), [Jax/TPU](https://docs.skypilot.co/en/latest/examples/training/tpu.html) |
|
313
|
+
| Serving | [vLLM](https://docs.skypilot.co/en/latest/examples/serving/vllm.html), [SGLang](https://docs.skypilot.co/en/latest/examples/serving/sglang.html), [Ollama](https://docs.skypilot.co/en/latest/examples/serving/ollama.html) |
|
314
|
+
| Models | [DeepSeek-R1](https://docs.skypilot.co/en/latest/examples/models/deepseek-r1.html), [Llama 3](https://docs.skypilot.co/en/latest/examples/models/llama-3.html), [CodeLlama](https://docs.skypilot.co/en/latest/examples/models/codellama.html), [Qwen](https://docs.skypilot.co/en/latest/examples/models/qwen.html), [Mixtral](https://docs.skypilot.co/en/latest/examples/models/mixtral.html) |
|
315
|
+
| AI apps | [RAG](https://docs.skypilot.co/en/latest/examples/applications/rag.html), [vector databases](https://docs.skypilot.co/en/latest/examples/applications/vector_database.html) (ChromaDB, CLIP) |
|
316
|
+
| Common frameworks | [Airflow](https://docs.skypilot.co/en/latest/examples/frameworks/airflow.html), [Jupyter](https://docs.skypilot.co/en/latest/examples/frameworks/jupyter.html) |
|
317
|
+
|
318
|
+
Source files and more examples can be found in [`llm/`](https://github.com/skypilot-org/skypilot/tree/master/llm) and [`examples/`](https://github.com/skypilot-org/skypilot/tree/master/examples).
|
319
|
+
|
320
|
+
## More information
|
305
321
|
To learn more, see [SkyPilot Overview](https://docs.skypilot.co/en/latest/overview.html), [SkyPilot docs](https://docs.skypilot.co/en/latest/), and [SkyPilot blog](https://blog.skypilot.co/).
|
306
322
|
|
307
|
-
|
308
|
-
Runnable examples:
|
309
|
-
- [**AI Gallery**](https://docs.skypilot.co/en/latest/gallery/index.html)
|
310
|
-
- LLMs on SkyPilot
|
311
|
-
- [DeepSeek-R1](./llm/deepseek-r1/)
|
312
|
-
- [DeepSeek-Janus](./llm/deepseek-janus/)
|
313
|
-
- [Llama 3.2: lightweight and vision models](./llm/llama-3_2/)
|
314
|
-
- [Pixtral](./llm/pixtral/)
|
315
|
-
- [Llama 3.1 finetuning](./llm/llama-3_1-finetuning/) and [serving](./llm/llama-3_1/)
|
316
|
-
- [GPT-2 via `llm.c`](./llm/gpt-2/)
|
317
|
-
- [Llama 3](./llm/llama-3/)
|
318
|
-
- [Qwen](./llm/qwen/)
|
319
|
-
- [Databricks DBRX](./llm/dbrx/)
|
320
|
-
- [Gemma](./llm/gemma/)
|
321
|
-
- [Mixtral 8x7B](./llm/mixtral/); [Mistral 7B](https://docs.mistral.ai/self-deployment/skypilot/) (from official Mistral team)
|
322
|
-
- [Code Llama](./llm/codellama/)
|
323
|
-
- [vLLM: Serving LLM 24x Faster On the Cloud](./llm/vllm/) (from official vLLM team)
|
324
|
-
- [SGLang: Fast and Expressive LLM Serving On the Cloud](./llm/sglang/) (from official SGLang team)
|
325
|
-
- [Vicuna chatbots: Training & Serving](./llm/vicuna/) (from official Vicuna team)
|
326
|
-
- [Train your own Vicuna on Llama-2](./llm/vicuna-llama-2/)
|
327
|
-
- [Self-Hosted Llama-2 Chatbot](./llm/llama-2/)
|
328
|
-
- [Ollama: Quantized LLMs on CPUs](./llm/ollama/)
|
329
|
-
- [LoRAX](./llm/lorax/)
|
330
|
-
- [QLoRA](https://github.com/artidoro/qlora/pull/132)
|
331
|
-
- [LLaMA-LoRA-Tuner](https://github.com/zetavg/LLaMA-LoRA-Tuner#run-on-a-cloud-service-via-skypilot)
|
332
|
-
- [Tabby: Self-hosted AI coding assistant](https://github.com/TabbyML/tabby/blob/bed723fcedb44a6b867ce22a7b1f03d2f3531c1e/experimental/eval/skypilot.yaml)
|
333
|
-
- [LocalGPT](./llm/localgpt)
|
334
|
-
- [Falcon](./llm/falcon)
|
335
|
-
- Add yours here & see more in [`llm/`](./llm)!
|
336
|
-
- Framework examples: [Vector Database](./examples/vector_database/), [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/), [programmatic grid search](https://github.com/skypilot-org/skypilot/blob/master/examples/huggingface_glue_imdb_grid_search_app.py), [Docker](https://github.com/skypilot-org/skypilot/blob/master/examples/docker/echo_app.yaml), [Cog](https://github.com/skypilot-org/skypilot/blob/master/examples/cog/), [Unsloth](https://github.com/skypilot-org/skypilot/blob/master/examples/unsloth/unsloth.yaml), [Ollama](https://github.com/skypilot-org/skypilot/blob/master/llm/ollama), [llm.c](https://github.com/skypilot-org/skypilot/tree/master/llm/gpt-2), [Airflow](./examples/airflow/training_workflow) and [many more (`examples/`)](./examples).
|
337
|
-
|
338
|
-
Case Studies and Integrations: [Community Spotlights](https://blog.skypilot.co/community/)
|
323
|
+
Case studies and integrations: [Community Spotlights](https://blog.skypilot.co/community/)
|
339
324
|
|
340
325
|
Follow updates:
|
341
|
-
- [Twitter](https://twitter.com/skypilot_org)
|
342
326
|
- [Slack](http://slack.skypilot.co)
|
327
|
+
- [X / Twitter](https://twitter.com/skypilot_org)
|
328
|
+
- [LinkedIn](https://www.linkedin.com/company/skypilot-oss/)
|
343
329
|
- [SkyPilot Blog](https://blog.skypilot.co/) ([Introductory blog post](https://blog.skypilot.co/introducing-skypilot/))
|
344
330
|
|
345
331
|
Read the research:
|
346
332
|
- [SkyPilot paper](https://www.usenix.org/system/files/nsdi23-yang-zongheng.pdf) and [talk](https://www.usenix.org/conference/nsdi23/presentation/yang-zongheng) (NSDI 2023)
|
347
333
|
- [Sky Computing whitepaper](https://arxiv.org/abs/2205.07147)
|
348
334
|
- [Sky Computing vision paper](https://sigops.org/s/conferences/hotos/2021/papers/hotos21-s02-stoica.pdf) (HotOS 2021)
|
349
|
-
- [
|
335
|
+
- [SkyServe: AI serving across regions and clouds](https://arxiv.org/pdf/2411.01438) (EuroSys 2025)
|
336
|
+
- [Managed jobs spot instance policy](https://www.usenix.org/conference/nsdi24/presentation/wu-zhanghao) (NSDI 2024)
|
350
337
|
|
351
338
|
SkyPilot was initially started at the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley and has since gained many industry contributors. To read about the project's origin and vision, see [Concept: Sky Computing](https://docs.skypilot.co/en/latest/sky-computing.html).
|
352
339
|
|
353
|
-
##
|
354
|
-
We are excited to hear your feedback
|
340
|
+
## Questions and feedback
|
341
|
+
We are excited to hear your feedback:
|
355
342
|
* For issues and feature requests, please [open a GitHub issue](https://github.com/skypilot-org/skypilot/issues/new).
|
356
343
|
* For questions, please use [GitHub Discussions](https://github.com/skypilot-org/skypilot/discussions).
|
357
344
|
|
{skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/RECORD
RENAMED
@@ -1,12 +1,12 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=nBS4cQvvG4s3GLt8Ueva04qVaT4Zdxdxpq8oD7kS2SI,6428
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=hCEqi77nprQEg3ktfRL51xiiw16zwZOmFEDB_Z7fWVU,22384
|
4
4
|
sky/check.py,sha256=NDKx_Zm7YRxPjMv82wz3ESLnGIPljaACyqVdVNM0PzY,11258
|
5
|
-
sky/cli.py,sha256
|
5
|
+
sky/cli.py,sha256=FDCSA5L__Djdk2Dc5eAmbE0YsfEu9zdE8vSu51oA19Q,221469
|
6
6
|
sky/cloud_stores.py,sha256=kEHXd2divyra-1c3EusHxKyM5yTQlTXc6cKVXofsefA,23978
|
7
|
-
sky/core.py,sha256=
|
7
|
+
sky/core.py,sha256=MU9hcTdh8baMGrr2ZXmbxx12vNlhajrkeyg5QtV717c,47609
|
8
8
|
sky/dag.py,sha256=Yl7Ry26Vql5cv4YMz8g9kOUgtoCihJnw7c8NgZYakMY,3242
|
9
|
-
sky/exceptions.py,sha256=
|
9
|
+
sky/exceptions.py,sha256=MrDCfAmxmkTRSrUhOTLNNAYqEZLus_aErJm8b9SvbLk,16077
|
10
10
|
sky/execution.py,sha256=0M4RTEzWn-B9oz221XdZOIGH12XOACmNq0j-WGUT_No,28023
|
11
11
|
sky/global_user_state.py,sha256=sUDdSsJeiJkbgmZNwy8YGFK0XeNh-RBr1VDUvbmjf0g,33246
|
12
12
|
sky/models.py,sha256=4xSW05BdDPEjW8Ubvj3VlVOVnzv0TbrolsFvR5R5v1U,638
|
@@ -34,7 +34,7 @@ sky/adaptors/vsphere.py,sha256=zJP9SeObEoLrpgHW2VHvZE48EhgVf8GfAEIwBeaDMfM,2129
|
|
34
34
|
sky/backends/__init__.py,sha256=UDjwbUgpTRApbPJnNfR786GadUuwgRk3vsWoVu5RB_c,536
|
35
35
|
sky/backends/backend.py,sha256=4BOqKZ-bwBTpjNnZF4JAHX2m2Iga7EmEn8Ao3tEivaM,7527
|
36
36
|
sky/backends/backend_utils.py,sha256=B_46tG9PyrppxLWdg4mWGuuIr3TEcWTz6qhYXjAY2bw,133452
|
37
|
-
sky/backends/cloud_vm_ray_backend.py,sha256=
|
37
|
+
sky/backends/cloud_vm_ray_backend.py,sha256=KIU4IkUTBGE__7MC3ayjYMwE14mSxeiHjrGnK7wAQXw,247773
|
38
38
|
sky/backends/docker_utils.py,sha256=Hyw1YY20EyghhEbYx6O2FIMDcGkNzBzV9TM7LFynei8,8358
|
39
39
|
sky/backends/local_docker_backend.py,sha256=nSYCjms3HOPjPNOrcCqsUKm1WV3AAovRFjEQ7hcEXW4,17021
|
40
40
|
sky/backends/wheel_utils.py,sha256=5BUzBqfYz7p1ME6_0PXGmcsAkLVb8NrFt317p7a4X8s,8278
|
@@ -43,9 +43,9 @@ sky/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
sky/benchmark/benchmark_state.py,sha256=X8CXmuU9KgsDRhKedhFgjeRMUFWtQsjFs1qECvPG2yg,8723
|
44
44
|
sky/benchmark/benchmark_utils.py,sha256=o4RymqSceq5mLEZL0upQM6NVEzJJQzj9s9tTm49uUTc,26365
|
45
45
|
sky/client/__init__.py,sha256=pz6xvVSd9X-gwqbsDL0E9QOojYqM0KAD0j-NCyCIF1k,38
|
46
|
-
sky/client/cli.py,sha256
|
46
|
+
sky/client/cli.py,sha256=FDCSA5L__Djdk2Dc5eAmbE0YsfEu9zdE8vSu51oA19Q,221469
|
47
47
|
sky/client/common.py,sha256=axDic7WOG1e78SdFm5XIwdhX7YNvf3g4k7INrsW3X4s,14611
|
48
|
-
sky/client/sdk.py,sha256=
|
48
|
+
sky/client/sdk.py,sha256=IRx72BXqOn_WVvtOuTXfgR5zcSm_lyoXeYxa5c_2_qk,68723
|
49
49
|
sky/clouds/__init__.py,sha256=OW6mJ-9hpJSBORCgt2LippLQEYZHNfnBW1mooRNNvxo,1416
|
50
50
|
sky/clouds/aws.py,sha256=J8tczaTDL239UowN9tUlhI92SeHw01wtFucSckvG63w,54112
|
51
51
|
sky/clouds/azure.py,sha256=bawEw6wOLAVyrjxMD-4UjLCuMj1H5_jH8qggpfZYS54,31703
|
@@ -107,19 +107,19 @@ sky/data/mounting_utils.py,sha256=i79Y-DZXVR88fjG_MBPB8EgsZBnHdpf1LGnJSm_VhAg,16
|
|
107
107
|
sky/data/storage.py,sha256=mTgMGdfSV6Gia076Dvgmc18ZlqF6eObima558UShiXA,207165
|
108
108
|
sky/data/storage_utils.py,sha256=zB99nRTJjh8isU0UmqERmlwwRNgfig91IwrwVH8CcNw,12383
|
109
109
|
sky/jobs/__init__.py,sha256=qoI53-xXE0-SOkrLWigvhgFXjk7dWE0OTqGPYIk-kmM,1458
|
110
|
-
sky/jobs/constants.py,sha256=
|
110
|
+
sky/jobs/constants.py,sha256=1XiIqdR5dEgGgepLKWkZCRT3MYSsMBR-dO7N4RTsjwg,3088
|
111
111
|
sky/jobs/controller.py,sha256=4G1CKI7M7D1BgJLbJMeqzg0iDDv7FR4ObB1BKZFFjhk,29585
|
112
112
|
sky/jobs/recovery_strategy.py,sha256=RLrqq8B1likxTknPzt3_BqO26sFVpoatxzUuGfwc18A,26170
|
113
113
|
sky/jobs/scheduler.py,sha256=8k2ieJ1TTvJ0TOalnklJtrMwFuatsh-ojoPMBgFRBlI,13119
|
114
114
|
sky/jobs/state.py,sha256=tDULLH6DVs4oKUIKhh0UAn3RzyVGuIUtEq5kW7K1Ojw,44585
|
115
|
-
sky/jobs/utils.py,sha256=
|
115
|
+
sky/jobs/utils.py,sha256=O1cOXeWXzZNxQzEZ4xwadskQr1Azm1pCRe4Ju0dfvfg,55845
|
116
116
|
sky/jobs/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
117
|
-
sky/jobs/client/sdk.py,sha256=
|
117
|
+
sky/jobs/client/sdk.py,sha256=4STtriCWLUq1mm-tEsh_iXC7r-U7_PY0R9X6-DNpaXs,10122
|
118
118
|
sky/jobs/dashboard/dashboard.py,sha256=JaVrNUEFQPLmsDZnrR76Uo8QqcAHdgYzx7GZTxDfl9M,7885
|
119
119
|
sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
|
120
120
|
sky/jobs/dashboard/templates/index.html,sha256=tz95q8O2pF7IvfY6yv0rnPyhj4DX8WX4RIVVxqFKV1Y,28519
|
121
121
|
sky/jobs/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
122
|
-
sky/jobs/server/core.py,sha256=
|
122
|
+
sky/jobs/server/core.py,sha256=s6A3KJsSQz1GlD6qfJ-XiEg6scc3sqMTqVd1Kr6ZTIU,25113
|
123
123
|
sky/jobs/server/dashboard_utils.py,sha256=2Mbx40W1pQqPEPHsSDbHeaF0j5cgyKy-_A9Owdwp_AQ,2315
|
124
124
|
sky/jobs/server/server.py,sha256=vdVxl4ZkBRlfOdsUO5Ttxon_-NE9XoMVMSo8fJ-Y73Y,7803
|
125
125
|
sky/provision/__init__.py,sha256=LzOo5LjkRXwSf29dUqN14YbjzQu3liXLQcmweTeZ4dE,6457
|
@@ -229,13 +229,13 @@ sky/serve/server/core.py,sha256=pRvFadEIH_WTUkTtSmuFoPBP4JFq8Obt68ifi9DWuog,3686
|
|
229
229
|
sky/serve/server/server.py,sha256=gQGVU9nHYdGbaLhGjIUNIYn4xwKjRASRJkiiTL5AI1Y,3283
|
230
230
|
sky/server/__init__.py,sha256=MPPBqFzXz6Jv5QSk6td_IcvnfXfNErDZVcizu4MLRow,27
|
231
231
|
sky/server/common.py,sha256=pEa-q3P5aOm6RMlit0pVzlDoJnZU_6zViO7aK_7htn0,17843
|
232
|
-
sky/server/constants.py,sha256=
|
233
|
-
sky/server/server.py,sha256=
|
232
|
+
sky/server/constants.py,sha256=_ZNrxYh8vmgbf3DmkGDduxjvO2y43ZSPTkH5rCNsVjU,770
|
233
|
+
sky/server/server.py,sha256=VOro33c4ZybLeZF57ANiZRWUjtyUvCDEeQMX7yd_HYE,43271
|
234
234
|
sky/server/stream_utils.py,sha256=-3IX1YCgxAFfcvQIV0TCvOn1wbRLWovAx3ckCrsExWU,5651
|
235
235
|
sky/server/html/log.html,sha256=TSGZktua9Ysl_ysg3w60rjxAxhH61AJnsYDHdtqrjmI,6929
|
236
236
|
sky/server/requests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
237
237
|
sky/server/requests/executor.py,sha256=Jk8RJoQlicDqaHhgVWMH3UiL-dJS7lGSGd05GPv-Lrc,19781
|
238
|
-
sky/server/requests/payloads.py,sha256=
|
238
|
+
sky/server/requests/payloads.py,sha256=nVb7vr1SNAq6ay2dNe9301zLHp7NrM79M7nsWAECBms,16340
|
239
239
|
sky/server/requests/requests.py,sha256=aMdjiK5kjSYP36pxdXFU6qgKOXcOmtViHbFm3V8Dvf8,19590
|
240
240
|
sky/server/requests/queues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
241
241
|
sky/server/requests/queues/mp_queue.py,sha256=_7AFas__0b1L8e7Bwy4lu0VYU18R85YwMlDHPhQCfh0,2998
|
@@ -250,9 +250,9 @@ sky/skylet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
250
250
|
sky/skylet/attempt_skylet.py,sha256=GZ6ITjjA0m-da3IxXXfoHR6n4pjp3X3TOXUqVvSrV0k,2136
|
251
251
|
sky/skylet/autostop_lib.py,sha256=W4CtMira6QnmYToFT5kYTGjNPRZNC-bZPfsF1k3tluE,4480
|
252
252
|
sky/skylet/configs.py,sha256=UtnpmEL0F9hH6PSjhsps7xgjGZ6qzPOfW1p2yj9tSng,1887
|
253
|
-
sky/skylet/constants.py,sha256=
|
253
|
+
sky/skylet/constants.py,sha256=6I_JGIQXHGDNodNCQYLqflmJotDVnFUt4R48BASyUN0,18037
|
254
254
|
sky/skylet/events.py,sha256=pnV3ZiwWhXqTHpU5B5Y9Xwam_7FQDI6IrxgSx7X_NVA,12743
|
255
|
-
sky/skylet/job_lib.py,sha256=
|
255
|
+
sky/skylet/job_lib.py,sha256=j_VRDWcEGIStLLEC0cD9B3JxggPJOZaDAaNKe50uhy4,44319
|
256
256
|
sky/skylet/log_lib.py,sha256=DzOrgY8C7RdEMLC9O9kEKV-iLMb9wVMPSnDha8eMx28,20900
|
257
257
|
sky/skylet/log_lib.pyi,sha256=rRk4eUX0RHGs1QL9CXsJq6RE7FqqxZlfuPJOLXTvg7I,4453
|
258
258
|
sky/skylet/skylet.py,sha256=mWmqCvxSlfdVU_L8NL6P52jmCt3smd8K0HdyNBfMPeI,1234
|
@@ -344,9 +344,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
|
|
344
344
|
sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=otzHzpliHDCpzYT-nU9Q0ZExbiFpDPWvhxwkvchZj7k,10073
|
345
345
|
sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
|
346
346
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
|
347
|
-
skypilot_nightly-1.0.0.
|
348
|
-
skypilot_nightly-1.0.0.
|
349
|
-
skypilot_nightly-1.0.0.
|
350
|
-
skypilot_nightly-1.0.0.
|
351
|
-
skypilot_nightly-1.0.0.
|
352
|
-
skypilot_nightly-1.0.0.
|
347
|
+
skypilot_nightly-1.0.0.dev20250304.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
348
|
+
skypilot_nightly-1.0.0.dev20250304.dist-info/METADATA,sha256=jiLF-ux0ZwUp4eKySgZB9QR7PrPU24Iavg7gSuzVKcw,18173
|
349
|
+
skypilot_nightly-1.0.0.dev20250304.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
350
|
+
skypilot_nightly-1.0.0.dev20250304.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
351
|
+
skypilot_nightly-1.0.0.dev20250304.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
352
|
+
skypilot_nightly-1.0.0.dev20250304.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20250303.dist-info → skypilot_nightly-1.0.0.dev20250304.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|