skypilot-nightly 1.0.0.dev20250226__py3-none-any.whl → 1.0.0.dev20250228__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +4 -6
- sky/cli.py +59 -21
- sky/client/cli.py +59 -21
- sky/jobs/client/sdk.py +10 -4
- sky/jobs/constants.py +1 -1
- sky/jobs/dashboard/dashboard.py +4 -1
- sky/jobs/scheduler.py +23 -8
- sky/jobs/server/core.py +33 -9
- sky/jobs/server/server.py +9 -0
- sky/jobs/state.py +30 -10
- sky/jobs/utils.py +57 -12
- sky/server/constants.py +1 -1
- sky/server/requests/payloads.py +2 -0
- sky/templates/jobs-controller.yaml.j2 +8 -1
- {skypilot_nightly-1.0.0.dev20250226.dist-info → skypilot_nightly-1.0.0.dev20250228.dist-info}/METADATA +2 -2
- {skypilot_nightly-1.0.0.dev20250226.dist-info → skypilot_nightly-1.0.0.dev20250228.dist-info}/RECORD +21 -21
- {skypilot_nightly-1.0.0.dev20250226.dist-info → skypilot_nightly-1.0.0.dev20250228.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250226.dist-info → skypilot_nightly-1.0.0.dev20250228.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250226.dist-info → skypilot_nightly-1.0.0.dev20250228.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250226.dist-info → skypilot_nightly-1.0.0.dev20250228.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '0f178b2af47ec2b185bc685cce6965c675613cc7'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250228'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/kubernetes.py
CHANGED
@@ -6,7 +6,7 @@ from typing import Any, Callable, Optional, Set
|
|
6
6
|
from sky.adaptors import common
|
7
7
|
from sky.sky_logging import set_logging_level
|
8
8
|
from sky.utils import annotations
|
9
|
-
from sky.utils import
|
9
|
+
from sky.utils import common_utils
|
10
10
|
from sky.utils import ux_utils
|
11
11
|
|
12
12
|
_IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for Kubernetes. '
|
@@ -69,22 +69,20 @@ def _load_config(context: Optional[str] = None):
|
|
69
69
|
try:
|
70
70
|
kubernetes.config.load_kube_config(context=context)
|
71
71
|
except kubernetes.config.config_exception.ConfigException as e:
|
72
|
-
suffix =
|
73
|
-
if env_options.Options.SHOW_DEBUG_INFO.get():
|
74
|
-
suffix += f' Error: {str(e)}'
|
72
|
+
suffix = common_utils.format_exception(e, use_bracket=True)
|
75
73
|
# Check if exception was due to no current-context
|
76
74
|
if 'Expected key current-context' in str(e):
|
77
75
|
err_str = (
|
78
76
|
f'Failed to load Kubernetes configuration for {context!r}. '
|
79
77
|
'Kubeconfig does not contain any valid context(s).'
|
80
|
-
f'{suffix}\n'
|
78
|
+
f'\n{suffix}\n'
|
81
79
|
' If you were running a local Kubernetes '
|
82
80
|
'cluster, run `sky local up` to start the cluster.')
|
83
81
|
else:
|
84
82
|
err_str = (
|
85
83
|
f'Failed to load Kubernetes configuration for {context!r}. '
|
86
84
|
'Please check if your kubeconfig file exists at '
|
87
|
-
f'~/.kube/config and is valid
|
85
|
+
f'~/.kube/config and is valid.\n{suffix}')
|
88
86
|
err_str += '\nTo disable Kubernetes for SkyPilot: run `sky check`.'
|
89
87
|
with ux_utils.print_exception_no_traceback():
|
90
88
|
raise ValueError(err_str) from None
|
sky/cli.py
CHANGED
@@ -133,7 +133,11 @@ def _get_cluster_records_and_set_ssh_config(
|
|
133
133
|
# Update the SSH config for all clusters
|
134
134
|
for record in cluster_records:
|
135
135
|
handle = record['handle']
|
136
|
-
|
136
|
+
# During the failover, even though a cluster does not exist, the handle
|
137
|
+
# can still exist in the record, and we check for credentials to avoid
|
138
|
+
# updating the SSH config for non-existent clusters.
|
139
|
+
if (handle is not None and handle.cached_external_ips is not None and
|
140
|
+
'credentials' in record):
|
137
141
|
credentials = record['credentials']
|
138
142
|
if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
|
139
143
|
# Replace the proxy command to proxy through the SkyPilot API
|
@@ -169,9 +173,9 @@ def _get_cluster_records_and_set_ssh_config(
|
|
169
173
|
handle.ssh_user,
|
170
174
|
)
|
171
175
|
else:
|
172
|
-
# If the cluster is not UP or does not have
|
173
|
-
# the cluster from the SSH config.
|
174
|
-
cluster_utils.SSHConfigHelper.remove_cluster(
|
176
|
+
# If the cluster is not UP or does not have credentials available,
|
177
|
+
# we need to remove the cluster from the SSH config.
|
178
|
+
cluster_utils.SSHConfigHelper.remove_cluster(record['name'])
|
175
179
|
|
176
180
|
# Clean up SSH configs for clusters that do not exist.
|
177
181
|
#
|
@@ -1379,12 +1383,14 @@ def exec(cluster: Optional[str], cluster_option: Optional[str],
|
|
1379
1383
|
def _handle_jobs_queue_request(
|
1380
1384
|
request_id: str,
|
1381
1385
|
show_all: bool,
|
1386
|
+
show_user: bool,
|
1382
1387
|
limit_num_jobs_to_show: bool = False,
|
1383
1388
|
is_called_by_user: bool = False) -> Tuple[Optional[int], str]:
|
1384
1389
|
"""Get the in-progress managed jobs.
|
1385
1390
|
|
1386
1391
|
Args:
|
1387
1392
|
show_all: Show all information of each job (e.g., region, price).
|
1393
|
+
show_user: Show the user who submitted the job.
|
1388
1394
|
limit_num_jobs_to_show: If True, limit the number of jobs to show to
|
1389
1395
|
_NUM_MANAGED_JOBS_TO_SHOW_IN_STATUS, which is mainly used by
|
1390
1396
|
`sky status`.
|
@@ -1452,6 +1458,7 @@ def _handle_jobs_queue_request(
|
|
1452
1458
|
if limit_num_jobs_to_show else None)
|
1453
1459
|
msg = managed_jobs.format_job_table(managed_jobs_,
|
1454
1460
|
show_all=show_all,
|
1461
|
+
show_user=show_user,
|
1455
1462
|
max_jobs=max_jobs_to_show)
|
1456
1463
|
return num_in_progress_jobs, msg
|
1457
1464
|
|
@@ -1561,7 +1568,9 @@ def _status_kubernetes(show_all: bool):
|
|
1561
1568
|
click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
1562
1569
|
f'Managed jobs'
|
1563
1570
|
f'{colorama.Style.RESET_ALL}')
|
1564
|
-
msg = managed_jobs.format_job_table(all_jobs,
|
1571
|
+
msg = managed_jobs.format_job_table(all_jobs,
|
1572
|
+
show_all=show_all,
|
1573
|
+
show_user=False)
|
1565
1574
|
click.echo(msg)
|
1566
1575
|
if any(['sky-serve-controller' in c.cluster_name for c in all_clusters]):
|
1567
1576
|
# TODO: Parse serve controllers and show services separately.
|
@@ -1779,7 +1788,8 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1779
1788
|
show_managed_jobs = show_managed_jobs and not any([clusters, ip, endpoints])
|
1780
1789
|
if show_managed_jobs:
|
1781
1790
|
managed_jobs_queue_request_id = managed_jobs.queue(refresh=False,
|
1782
|
-
skip_finished=True
|
1791
|
+
skip_finished=True,
|
1792
|
+
all_users=all_users)
|
1783
1793
|
show_endpoints = endpoints or endpoint is not None
|
1784
1794
|
show_single_endpoint = endpoint is not None
|
1785
1795
|
show_services = show_services and not any([clusters, ip, endpoints])
|
@@ -1859,6 +1869,7 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1859
1869
|
num_in_progress_jobs, msg = _handle_jobs_queue_request(
|
1860
1870
|
managed_jobs_queue_request_id,
|
1861
1871
|
show_all=False,
|
1872
|
+
show_user=False,
|
1862
1873
|
limit_num_jobs_to_show=not all,
|
1863
1874
|
is_called_by_user=False)
|
1864
1875
|
except KeyboardInterrupt:
|
@@ -2751,7 +2762,7 @@ def start(
|
|
2751
2762
|
def down(
|
2752
2763
|
clusters: List[str],
|
2753
2764
|
all: bool, # pylint: disable=redefined-builtin
|
2754
|
-
all_users: bool,
|
2765
|
+
all_users: bool,
|
2755
2766
|
yes: bool,
|
2756
2767
|
purge: bool,
|
2757
2768
|
async_call: bool,
|
@@ -2812,7 +2823,9 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str,
|
|
2812
2823
|
with rich_utils.client_status(
|
2813
2824
|
'[bold cyan]Checking for in-progress managed jobs[/]'):
|
2814
2825
|
try:
|
2815
|
-
request_id = managed_jobs.queue(refresh=False,
|
2826
|
+
request_id = managed_jobs.queue(refresh=False,
|
2827
|
+
skip_finished=True,
|
2828
|
+
all_users=True)
|
2816
2829
|
managed_jobs_ = sdk.stream_and_get(request_id)
|
2817
2830
|
except exceptions.ClusterNotUpError as e:
|
2818
2831
|
if controller.value.connection_error_hint in str(e):
|
@@ -2836,7 +2849,9 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str,
|
|
2836
2849
|
'jobs (output of `sky jobs queue`) will be lost.')
|
2837
2850
|
click.echo(msg)
|
2838
2851
|
if managed_jobs_:
|
2839
|
-
job_table = managed_jobs.format_job_table(managed_jobs_,
|
2852
|
+
job_table = managed_jobs.format_job_table(managed_jobs_,
|
2853
|
+
show_all=False,
|
2854
|
+
show_user=True)
|
2840
2855
|
msg = controller.value.decline_down_for_dirty_controller_hint
|
2841
2856
|
# Add prefix to each line to align with the bullet point.
|
2842
2857
|
msg += '\n'.join(
|
@@ -3905,9 +3920,16 @@ def jobs_launch(
|
|
3905
3920
|
is_flag=True,
|
3906
3921
|
required=False,
|
3907
3922
|
help='Show only pending/running jobs\' information.')
|
3923
|
+
@click.option('--all-users',
|
3924
|
+
'-u',
|
3925
|
+
default=False,
|
3926
|
+
is_flag=True,
|
3927
|
+
required=False,
|
3928
|
+
help='Show jobs from all users.')
|
3908
3929
|
@usage_lib.entrypoint
|
3909
3930
|
# pylint: disable=redefined-builtin
|
3910
|
-
def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool
|
3931
|
+
def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool,
|
3932
|
+
all_users: bool):
|
3911
3933
|
"""Show statuses of managed jobs.
|
3912
3934
|
|
3913
3935
|
Each managed jobs can have one of the following statuses:
|
@@ -3964,9 +3986,10 @@ def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool):
|
|
3964
3986
|
click.secho('Fetching managed job statuses...', fg='cyan')
|
3965
3987
|
with rich_utils.client_status('[cyan]Checking managed jobs[/]'):
|
3966
3988
|
managed_jobs_request_id = managed_jobs.queue(
|
3967
|
-
refresh=refresh, skip_finished=skip_finished)
|
3989
|
+
refresh=refresh, skip_finished=skip_finished, all_users=all_users)
|
3968
3990
|
_, msg = _handle_jobs_queue_request(managed_jobs_request_id,
|
3969
3991
|
show_all=verbose,
|
3992
|
+
show_user=all_users,
|
3970
3993
|
is_called_by_user=True)
|
3971
3994
|
if not skip_finished:
|
3972
3995
|
in_progress_only_hint = ''
|
@@ -3989,16 +4012,23 @@ def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool):
|
|
3989
4012
|
is_flag=True,
|
3990
4013
|
default=False,
|
3991
4014
|
required=False,
|
3992
|
-
help='Cancel all managed jobs.')
|
4015
|
+
help='Cancel all managed jobs for the current user.')
|
3993
4016
|
@click.option('--yes',
|
3994
4017
|
'-y',
|
3995
4018
|
is_flag=True,
|
3996
4019
|
default=False,
|
3997
4020
|
required=False,
|
3998
4021
|
help='Skip confirmation prompt.')
|
4022
|
+
@click.option('--all-users',
|
4023
|
+
'-u',
|
4024
|
+
is_flag=True,
|
4025
|
+
default=False,
|
4026
|
+
required=False,
|
4027
|
+
help='Cancel all managed jobs from all users.')
|
3999
4028
|
@usage_lib.entrypoint
|
4000
4029
|
# pylint: disable=redefined-builtin
|
4001
|
-
def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool
|
4030
|
+
def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool,
|
4031
|
+
all_users: bool):
|
4002
4032
|
"""Cancel managed jobs.
|
4003
4033
|
|
4004
4034
|
You can provide either a job name or a list of job IDs to be cancelled.
|
@@ -4015,25 +4045,33 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool):
|
|
4015
4045
|
$ sky jobs cancel 1 2 3
|
4016
4046
|
"""
|
4017
4047
|
job_id_str = ','.join(map(str, job_ids))
|
4018
|
-
if sum([bool(job_ids), name is not None, all]) != 1:
|
4019
|
-
|
4020
|
-
|
4021
|
-
|
4048
|
+
if sum([bool(job_ids), name is not None, all or all_users]) != 1:
|
4049
|
+
arguments = []
|
4050
|
+
arguments += [f'--job-ids {job_id_str}'] if job_ids else []
|
4051
|
+
arguments += [f'--name {name}'] if name is not None else []
|
4052
|
+
arguments += ['--all'] if all else []
|
4053
|
+
arguments += ['--all-users'] if all_users else []
|
4022
4054
|
raise click.UsageError(
|
4023
|
-
'Can only specify one of JOB_IDS
|
4024
|
-
f'Provided {
|
4055
|
+
'Can only specify one of JOB_IDS, --name, or --all/--all-users. '
|
4056
|
+
f'Provided {" ".join(arguments)!r}.')
|
4025
4057
|
|
4026
4058
|
if not yes:
|
4027
4059
|
job_identity_str = (f'managed jobs with IDs {job_id_str}'
|
4028
4060
|
if job_ids else repr(name))
|
4029
|
-
if
|
4061
|
+
if all_users:
|
4062
|
+
job_identity_str = 'all managed jobs FOR ALL USERS'
|
4063
|
+
elif all:
|
4030
4064
|
job_identity_str = 'all managed jobs'
|
4031
4065
|
click.confirm(f'Cancelling {job_identity_str}. Proceed?',
|
4032
4066
|
default=True,
|
4033
4067
|
abort=True,
|
4034
4068
|
show_default=True)
|
4035
4069
|
|
4036
|
-
sdk.stream_and_get(
|
4070
|
+
sdk.stream_and_get(
|
4071
|
+
managed_jobs.cancel(job_ids=job_ids,
|
4072
|
+
name=name,
|
4073
|
+
all=all,
|
4074
|
+
all_users=all_users))
|
4037
4075
|
|
4038
4076
|
|
4039
4077
|
@jobs.command('logs', cls=_DocumentedCodeCommand)
|
sky/client/cli.py
CHANGED
@@ -133,7 +133,11 @@ def _get_cluster_records_and_set_ssh_config(
|
|
133
133
|
# Update the SSH config for all clusters
|
134
134
|
for record in cluster_records:
|
135
135
|
handle = record['handle']
|
136
|
-
|
136
|
+
# During the failover, even though a cluster does not exist, the handle
|
137
|
+
# can still exist in the record, and we check for credentials to avoid
|
138
|
+
# updating the SSH config for non-existent clusters.
|
139
|
+
if (handle is not None and handle.cached_external_ips is not None and
|
140
|
+
'credentials' in record):
|
137
141
|
credentials = record['credentials']
|
138
142
|
if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
|
139
143
|
# Replace the proxy command to proxy through the SkyPilot API
|
@@ -169,9 +173,9 @@ def _get_cluster_records_and_set_ssh_config(
|
|
169
173
|
handle.ssh_user,
|
170
174
|
)
|
171
175
|
else:
|
172
|
-
# If the cluster is not UP or does not have
|
173
|
-
# the cluster from the SSH config.
|
174
|
-
cluster_utils.SSHConfigHelper.remove_cluster(
|
176
|
+
# If the cluster is not UP or does not have credentials available,
|
177
|
+
# we need to remove the cluster from the SSH config.
|
178
|
+
cluster_utils.SSHConfigHelper.remove_cluster(record['name'])
|
175
179
|
|
176
180
|
# Clean up SSH configs for clusters that do not exist.
|
177
181
|
#
|
@@ -1379,12 +1383,14 @@ def exec(cluster: Optional[str], cluster_option: Optional[str],
|
|
1379
1383
|
def _handle_jobs_queue_request(
|
1380
1384
|
request_id: str,
|
1381
1385
|
show_all: bool,
|
1386
|
+
show_user: bool,
|
1382
1387
|
limit_num_jobs_to_show: bool = False,
|
1383
1388
|
is_called_by_user: bool = False) -> Tuple[Optional[int], str]:
|
1384
1389
|
"""Get the in-progress managed jobs.
|
1385
1390
|
|
1386
1391
|
Args:
|
1387
1392
|
show_all: Show all information of each job (e.g., region, price).
|
1393
|
+
show_user: Show the user who submitted the job.
|
1388
1394
|
limit_num_jobs_to_show: If True, limit the number of jobs to show to
|
1389
1395
|
_NUM_MANAGED_JOBS_TO_SHOW_IN_STATUS, which is mainly used by
|
1390
1396
|
`sky status`.
|
@@ -1452,6 +1458,7 @@ def _handle_jobs_queue_request(
|
|
1452
1458
|
if limit_num_jobs_to_show else None)
|
1453
1459
|
msg = managed_jobs.format_job_table(managed_jobs_,
|
1454
1460
|
show_all=show_all,
|
1461
|
+
show_user=show_user,
|
1455
1462
|
max_jobs=max_jobs_to_show)
|
1456
1463
|
return num_in_progress_jobs, msg
|
1457
1464
|
|
@@ -1561,7 +1568,9 @@ def _status_kubernetes(show_all: bool):
|
|
1561
1568
|
click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
1562
1569
|
f'Managed jobs'
|
1563
1570
|
f'{colorama.Style.RESET_ALL}')
|
1564
|
-
msg = managed_jobs.format_job_table(all_jobs,
|
1571
|
+
msg = managed_jobs.format_job_table(all_jobs,
|
1572
|
+
show_all=show_all,
|
1573
|
+
show_user=False)
|
1565
1574
|
click.echo(msg)
|
1566
1575
|
if any(['sky-serve-controller' in c.cluster_name for c in all_clusters]):
|
1567
1576
|
# TODO: Parse serve controllers and show services separately.
|
@@ -1779,7 +1788,8 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1779
1788
|
show_managed_jobs = show_managed_jobs and not any([clusters, ip, endpoints])
|
1780
1789
|
if show_managed_jobs:
|
1781
1790
|
managed_jobs_queue_request_id = managed_jobs.queue(refresh=False,
|
1782
|
-
skip_finished=True
|
1791
|
+
skip_finished=True,
|
1792
|
+
all_users=all_users)
|
1783
1793
|
show_endpoints = endpoints or endpoint is not None
|
1784
1794
|
show_single_endpoint = endpoint is not None
|
1785
1795
|
show_services = show_services and not any([clusters, ip, endpoints])
|
@@ -1859,6 +1869,7 @@ def status(verbose: bool, refresh: bool, ip: bool, endpoints: bool,
|
|
1859
1869
|
num_in_progress_jobs, msg = _handle_jobs_queue_request(
|
1860
1870
|
managed_jobs_queue_request_id,
|
1861
1871
|
show_all=False,
|
1872
|
+
show_user=False,
|
1862
1873
|
limit_num_jobs_to_show=not all,
|
1863
1874
|
is_called_by_user=False)
|
1864
1875
|
except KeyboardInterrupt:
|
@@ -2751,7 +2762,7 @@ def start(
|
|
2751
2762
|
def down(
|
2752
2763
|
clusters: List[str],
|
2753
2764
|
all: bool, # pylint: disable=redefined-builtin
|
2754
|
-
all_users: bool,
|
2765
|
+
all_users: bool,
|
2755
2766
|
yes: bool,
|
2756
2767
|
purge: bool,
|
2757
2768
|
async_call: bool,
|
@@ -2812,7 +2823,9 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str,
|
|
2812
2823
|
with rich_utils.client_status(
|
2813
2824
|
'[bold cyan]Checking for in-progress managed jobs[/]'):
|
2814
2825
|
try:
|
2815
|
-
request_id = managed_jobs.queue(refresh=False,
|
2826
|
+
request_id = managed_jobs.queue(refresh=False,
|
2827
|
+
skip_finished=True,
|
2828
|
+
all_users=True)
|
2816
2829
|
managed_jobs_ = sdk.stream_and_get(request_id)
|
2817
2830
|
except exceptions.ClusterNotUpError as e:
|
2818
2831
|
if controller.value.connection_error_hint in str(e):
|
@@ -2836,7 +2849,9 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str,
|
|
2836
2849
|
'jobs (output of `sky jobs queue`) will be lost.')
|
2837
2850
|
click.echo(msg)
|
2838
2851
|
if managed_jobs_:
|
2839
|
-
job_table = managed_jobs.format_job_table(managed_jobs_,
|
2852
|
+
job_table = managed_jobs.format_job_table(managed_jobs_,
|
2853
|
+
show_all=False,
|
2854
|
+
show_user=True)
|
2840
2855
|
msg = controller.value.decline_down_for_dirty_controller_hint
|
2841
2856
|
# Add prefix to each line to align with the bullet point.
|
2842
2857
|
msg += '\n'.join(
|
@@ -3905,9 +3920,16 @@ def jobs_launch(
|
|
3905
3920
|
is_flag=True,
|
3906
3921
|
required=False,
|
3907
3922
|
help='Show only pending/running jobs\' information.')
|
3923
|
+
@click.option('--all-users',
|
3924
|
+
'-u',
|
3925
|
+
default=False,
|
3926
|
+
is_flag=True,
|
3927
|
+
required=False,
|
3928
|
+
help='Show jobs from all users.')
|
3908
3929
|
@usage_lib.entrypoint
|
3909
3930
|
# pylint: disable=redefined-builtin
|
3910
|
-
def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool
|
3931
|
+
def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool,
|
3932
|
+
all_users: bool):
|
3911
3933
|
"""Show statuses of managed jobs.
|
3912
3934
|
|
3913
3935
|
Each managed jobs can have one of the following statuses:
|
@@ -3964,9 +3986,10 @@ def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool):
|
|
3964
3986
|
click.secho('Fetching managed job statuses...', fg='cyan')
|
3965
3987
|
with rich_utils.client_status('[cyan]Checking managed jobs[/]'):
|
3966
3988
|
managed_jobs_request_id = managed_jobs.queue(
|
3967
|
-
refresh=refresh, skip_finished=skip_finished)
|
3989
|
+
refresh=refresh, skip_finished=skip_finished, all_users=all_users)
|
3968
3990
|
_, msg = _handle_jobs_queue_request(managed_jobs_request_id,
|
3969
3991
|
show_all=verbose,
|
3992
|
+
show_user=all_users,
|
3970
3993
|
is_called_by_user=True)
|
3971
3994
|
if not skip_finished:
|
3972
3995
|
in_progress_only_hint = ''
|
@@ -3989,16 +4012,23 @@ def jobs_queue(verbose: bool, refresh: bool, skip_finished: bool):
|
|
3989
4012
|
is_flag=True,
|
3990
4013
|
default=False,
|
3991
4014
|
required=False,
|
3992
|
-
help='Cancel all managed jobs.')
|
4015
|
+
help='Cancel all managed jobs for the current user.')
|
3993
4016
|
@click.option('--yes',
|
3994
4017
|
'-y',
|
3995
4018
|
is_flag=True,
|
3996
4019
|
default=False,
|
3997
4020
|
required=False,
|
3998
4021
|
help='Skip confirmation prompt.')
|
4022
|
+
@click.option('--all-users',
|
4023
|
+
'-u',
|
4024
|
+
is_flag=True,
|
4025
|
+
default=False,
|
4026
|
+
required=False,
|
4027
|
+
help='Cancel all managed jobs from all users.')
|
3999
4028
|
@usage_lib.entrypoint
|
4000
4029
|
# pylint: disable=redefined-builtin
|
4001
|
-
def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool
|
4030
|
+
def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool,
|
4031
|
+
all_users: bool):
|
4002
4032
|
"""Cancel managed jobs.
|
4003
4033
|
|
4004
4034
|
You can provide either a job name or a list of job IDs to be cancelled.
|
@@ -4015,25 +4045,33 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool):
|
|
4015
4045
|
$ sky jobs cancel 1 2 3
|
4016
4046
|
"""
|
4017
4047
|
job_id_str = ','.join(map(str, job_ids))
|
4018
|
-
if sum([bool(job_ids), name is not None, all]) != 1:
|
4019
|
-
|
4020
|
-
|
4021
|
-
|
4048
|
+
if sum([bool(job_ids), name is not None, all or all_users]) != 1:
|
4049
|
+
arguments = []
|
4050
|
+
arguments += [f'--job-ids {job_id_str}'] if job_ids else []
|
4051
|
+
arguments += [f'--name {name}'] if name is not None else []
|
4052
|
+
arguments += ['--all'] if all else []
|
4053
|
+
arguments += ['--all-users'] if all_users else []
|
4022
4054
|
raise click.UsageError(
|
4023
|
-
'Can only specify one of JOB_IDS
|
4024
|
-
f'Provided {
|
4055
|
+
'Can only specify one of JOB_IDS, --name, or --all/--all-users. '
|
4056
|
+
f'Provided {" ".join(arguments)!r}.')
|
4025
4057
|
|
4026
4058
|
if not yes:
|
4027
4059
|
job_identity_str = (f'managed jobs with IDs {job_id_str}'
|
4028
4060
|
if job_ids else repr(name))
|
4029
|
-
if
|
4061
|
+
if all_users:
|
4062
|
+
job_identity_str = 'all managed jobs FOR ALL USERS'
|
4063
|
+
elif all:
|
4030
4064
|
job_identity_str = 'all managed jobs'
|
4031
4065
|
click.confirm(f'Cancelling {job_identity_str}. Proceed?',
|
4032
4066
|
default=True,
|
4033
4067
|
abort=True,
|
4034
4068
|
show_default=True)
|
4035
4069
|
|
4036
|
-
sdk.stream_and_get(
|
4070
|
+
sdk.stream_and_get(
|
4071
|
+
managed_jobs.cancel(job_ids=job_ids,
|
4072
|
+
name=name,
|
4073
|
+
all=all,
|
4074
|
+
all_users=all_users))
|
4037
4075
|
|
4038
4076
|
|
4039
4077
|
@jobs.command('logs', cls=_DocumentedCodeCommand)
|
sky/jobs/client/sdk.py
CHANGED
@@ -85,7 +85,8 @@ def launch(
|
|
85
85
|
@usage_lib.entrypoint
|
86
86
|
@server_common.check_server_healthy_or_start
|
87
87
|
def queue(refresh: bool,
|
88
|
-
skip_finished: bool = False
|
88
|
+
skip_finished: bool = False,
|
89
|
+
all_users: bool = False) -> server_common.RequestId:
|
89
90
|
"""Gets statuses of managed jobs.
|
90
91
|
|
91
92
|
Please refer to sky.cli.job_queue for documentation.
|
@@ -93,6 +94,7 @@ def queue(refresh: bool,
|
|
93
94
|
Args:
|
94
95
|
refresh: Whether to restart the jobs controller if it is stopped.
|
95
96
|
skip_finished: Whether to skip finished jobs.
|
97
|
+
all_users: Whether to show all users' jobs.
|
96
98
|
|
97
99
|
Returns:
|
98
100
|
The request ID of the queue request.
|
@@ -126,6 +128,7 @@ def queue(refresh: bool,
|
|
126
128
|
body = payloads.JobsQueueBody(
|
127
129
|
refresh=refresh,
|
128
130
|
skip_finished=skip_finished,
|
131
|
+
all_users=all_users,
|
129
132
|
)
|
130
133
|
response = requests.post(
|
131
134
|
f'{server_common.get_server_url()}/jobs/queue',
|
@@ -138,9 +141,10 @@ def queue(refresh: bool,
|
|
138
141
|
@usage_lib.entrypoint
|
139
142
|
@server_common.check_server_healthy_or_start
|
140
143
|
def cancel(
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
+
name: Optional[str] = None,
|
145
|
+
job_ids: Optional[List[int]] = None,
|
146
|
+
all: bool = False, # pylint: disable=redefined-builtin
|
147
|
+
all_users: bool = False,
|
144
148
|
) -> server_common.RequestId:
|
145
149
|
"""Cancels managed jobs.
|
146
150
|
|
@@ -150,6 +154,7 @@ def cancel(
|
|
150
154
|
name: Name of the managed job to cancel.
|
151
155
|
job_ids: IDs of the managed jobs to cancel.
|
152
156
|
all: Whether to cancel all managed jobs.
|
157
|
+
all_users: Whether to cancel all managed jobs from all users.
|
153
158
|
|
154
159
|
Returns:
|
155
160
|
The request ID of the cancel request.
|
@@ -162,6 +167,7 @@ def cancel(
|
|
162
167
|
name=name,
|
163
168
|
job_ids=job_ids,
|
164
169
|
all=all,
|
170
|
+
all_users=all_users,
|
165
171
|
)
|
166
172
|
response = requests.post(
|
167
173
|
f'{server_common.get_server_url()}/jobs/cancel',
|
sky/jobs/constants.py
CHANGED
@@ -40,7 +40,7 @@ JOBS_CLUSTER_NAME_PREFIX_LENGTH = 25
|
|
40
40
|
# The version of the lib files that jobs/utils use. Whenever there is an API
|
41
41
|
# change for the jobs/utils, we need to bump this version and update
|
42
42
|
# job.utils.ManagedJobCodeGen to handle the version update.
|
43
|
-
MANAGED_JOBS_VERSION =
|
43
|
+
MANAGED_JOBS_VERSION = 2
|
44
44
|
|
45
45
|
# The command for setting up the jobs dashboard on the controller. It firstly
|
46
46
|
# checks if the systemd services are available, and if not (e.g., Kubernetes
|
sky/jobs/dashboard/dashboard.py
CHANGED
@@ -16,6 +16,7 @@ import flask
|
|
16
16
|
import yaml
|
17
17
|
|
18
18
|
from sky import jobs as managed_jobs
|
19
|
+
from sky.client import sdk
|
19
20
|
from sky.jobs import constants as managed_job_constants
|
20
21
|
from sky.utils import common_utils
|
21
22
|
from sky.utils import controller_utils
|
@@ -134,7 +135,8 @@ def _extract_launch_history(log_content: str) -> str:
|
|
134
135
|
def home():
|
135
136
|
if not _is_running_on_jobs_controller():
|
136
137
|
# Experimental: run on laptop (refresh is very slow).
|
137
|
-
|
138
|
+
request_id = managed_jobs.queue(refresh=True, skip_finished=False)
|
139
|
+
all_managed_jobs = sdk.get(request_id)
|
138
140
|
else:
|
139
141
|
job_table = managed_jobs.dump_managed_job_queue()
|
140
142
|
all_managed_jobs = managed_jobs.load_managed_job_queue(job_table)
|
@@ -142,6 +144,7 @@ def home():
|
|
142
144
|
timestamp = datetime.datetime.now(datetime.timezone.utc)
|
143
145
|
rows = managed_jobs.format_job_table(all_managed_jobs,
|
144
146
|
show_all=True,
|
147
|
+
show_user=False,
|
145
148
|
return_rows=True)
|
146
149
|
|
147
150
|
status_counts = collections.defaultdict(int)
|
sky/jobs/scheduler.py
CHANGED
@@ -49,6 +49,7 @@ from sky import sky_logging
|
|
49
49
|
from sky.jobs import constants as managed_job_constants
|
50
50
|
from sky.jobs import state
|
51
51
|
from sky.skylet import constants
|
52
|
+
from sky.utils import common_utils
|
52
53
|
from sky.utils import subprocess_utils
|
53
54
|
|
54
55
|
logger = sky_logging.init_logger('sky.jobs.controller')
|
@@ -151,12 +152,20 @@ def maybe_schedule_next_jobs() -> None:
|
|
151
152
|
job_id = maybe_next_job['job_id']
|
152
153
|
dag_yaml_path = maybe_next_job['dag_yaml_path']
|
153
154
|
|
155
|
+
activate_python_env_cmd = (
|
156
|
+
f'{constants.ACTIVATE_SKY_REMOTE_PYTHON_ENV};')
|
157
|
+
env_file = maybe_next_job['env_file_path']
|
158
|
+
source_environment_cmd = (f'source {env_file};'
|
159
|
+
if env_file else '')
|
160
|
+
run_controller_cmd = ('python -u -m sky.jobs.controller '
|
161
|
+
f'{dag_yaml_path} --job-id {job_id};')
|
162
|
+
|
154
163
|
# If the command line here is changed, please also update
|
155
164
|
# utils._controller_process_alive. `--job-id X` should be at
|
156
165
|
# the end.
|
157
|
-
run_cmd = (f'{
|
158
|
-
'
|
159
|
-
f'{
|
166
|
+
run_cmd = (f'{activate_python_env_cmd}'
|
167
|
+
f'{source_environment_cmd}'
|
168
|
+
f'{run_controller_cmd}')
|
160
169
|
|
161
170
|
logs_dir = os.path.expanduser(
|
162
171
|
managed_job_constants.JOBS_CONTROLLER_LOGS_DIR)
|
@@ -175,16 +184,19 @@ def maybe_schedule_next_jobs() -> None:
|
|
175
184
|
pass
|
176
185
|
|
177
186
|
|
178
|
-
def submit_job(job_id: int, dag_yaml_path: str) -> None:
|
187
|
+
def submit_job(job_id: int, dag_yaml_path: str, env_file_path: str) -> None:
|
179
188
|
"""Submit an existing job to the scheduler.
|
180
189
|
|
181
190
|
This should be called after a job is created in the `spot` table as
|
182
191
|
PENDING. It will tell the scheduler to try and start the job controller, if
|
183
192
|
there are resources available. It may block to acquire the lock, so it
|
184
193
|
should not be on the critical path for `sky jobs launch -d`.
|
194
|
+
|
195
|
+
The user hash should be set (e.g. via SKYPILOT_USER_ID) before calling this.
|
185
196
|
"""
|
186
197
|
with filelock.FileLock(_get_lock_path()):
|
187
|
-
state.scheduler_set_waiting(job_id, dag_yaml_path
|
198
|
+
state.scheduler_set_waiting(job_id, dag_yaml_path, env_file_path,
|
199
|
+
common_utils.get_user_hash())
|
188
200
|
maybe_schedule_next_jobs()
|
189
201
|
|
190
202
|
|
@@ -281,12 +293,15 @@ def _can_lauch_in_alive_job() -> bool:
|
|
281
293
|
|
282
294
|
if __name__ == '__main__':
|
283
295
|
parser = ArgumentParser()
|
296
|
+
parser.add_argument('dag_yaml',
|
297
|
+
type=str,
|
298
|
+
help='The path to the user job yaml file.')
|
284
299
|
parser.add_argument('--job-id',
|
285
300
|
required=True,
|
286
301
|
type=int,
|
287
302
|
help='Job id for the controller job.')
|
288
|
-
parser.add_argument('
|
303
|
+
parser.add_argument('--env-file',
|
289
304
|
type=str,
|
290
|
-
help='The path to the
|
305
|
+
help='The path to the controller env file.')
|
291
306
|
args = parser.parse_args()
|
292
|
-
submit_job(args.job_id, args.dag_yaml)
|
307
|
+
submit_job(args.job_id, args.dag_yaml, args.env_file)
|
sky/jobs/server/core.py
CHANGED
@@ -140,6 +140,7 @@ def launch(
|
|
140
140
|
prefix = managed_job_constants.JOBS_TASK_YAML_PREFIX
|
141
141
|
remote_user_yaml_path = f'{prefix}/{dag.name}-{dag_uuid}.yaml'
|
142
142
|
remote_user_config_path = f'{prefix}/{dag.name}-{dag_uuid}.config_yaml'
|
143
|
+
remote_env_file_path = f'{prefix}/{dag.name}-{dag_uuid}.env'
|
143
144
|
controller_resources = controller_utils.get_controller_resources(
|
144
145
|
controller=controller_utils.Controllers.JOBS_CONTROLLER,
|
145
146
|
task_resources=sum([list(t.resources) for t in dag.tasks], []))
|
@@ -152,6 +153,7 @@ def launch(
|
|
152
153
|
# Note: actual cluster name will be <task.name>-<managed job ID>
|
153
154
|
'dag_name': dag.name,
|
154
155
|
'remote_user_config_path': remote_user_config_path,
|
156
|
+
'remote_env_file_path': remote_env_file_path,
|
155
157
|
'modified_catalogs':
|
156
158
|
service_catalog_common.get_modified_catalog_file_mounts(),
|
157
159
|
'dashboard_setup_cmd': managed_job_constants.DASHBOARD_SETUP_CMD,
|
@@ -318,7 +320,9 @@ def _maybe_restart_controller(
|
|
318
320
|
|
319
321
|
|
320
322
|
@usage_lib.entrypoint
|
321
|
-
def queue(refresh: bool,
|
323
|
+
def queue(refresh: bool,
|
324
|
+
skip_finished: bool = False,
|
325
|
+
all_users: bool = False) -> List[Dict[str, Any]]:
|
322
326
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
323
327
|
"""Gets statuses of managed jobs.
|
324
328
|
|
@@ -366,6 +370,19 @@ def queue(refresh: bool, skip_finished: bool = False) -> List[Dict[str, Any]]:
|
|
366
370
|
f'{returncode}')
|
367
371
|
|
368
372
|
jobs = managed_job_utils.load_managed_job_queue(job_table_payload)
|
373
|
+
|
374
|
+
if not all_users:
|
375
|
+
|
376
|
+
def user_hash_matches_or_missing(job: Dict[str, Any]) -> bool:
|
377
|
+
user_hash = job.get('user_hash', None)
|
378
|
+
if user_hash is None:
|
379
|
+
# For backwards compatibility, we show jobs that do not have a
|
380
|
+
# user_hash. TODO(cooperc): Remove before 0.12.0.
|
381
|
+
return True
|
382
|
+
return user_hash == common_utils.get_user_hash()
|
383
|
+
|
384
|
+
jobs = list(filter(user_hash_matches_or_missing, jobs))
|
385
|
+
|
369
386
|
if skip_finished:
|
370
387
|
# Filter out the finished jobs. If a multi-task job is partially
|
371
388
|
# finished, we will include all its tasks.
|
@@ -374,6 +391,7 @@ def queue(refresh: bool, skip_finished: bool = False) -> List[Dict[str, Any]]:
|
|
374
391
|
non_finished_job_ids = {job['job_id'] for job in non_finished_tasks}
|
375
392
|
jobs = list(
|
376
393
|
filter(lambda job: job['job_id'] in non_finished_job_ids, jobs))
|
394
|
+
|
377
395
|
return jobs
|
378
396
|
|
379
397
|
|
@@ -381,7 +399,8 @@ def queue(refresh: bool, skip_finished: bool = False) -> List[Dict[str, Any]]:
|
|
381
399
|
# pylint: disable=redefined-builtin
|
382
400
|
def cancel(name: Optional[str] = None,
|
383
401
|
job_ids: Optional[List[int]] = None,
|
384
|
-
all: bool = False
|
402
|
+
all: bool = False,
|
403
|
+
all_users: bool = False) -> None:
|
385
404
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
386
405
|
"""Cancels managed jobs.
|
387
406
|
|
@@ -397,17 +416,22 @@ def cancel(name: Optional[str] = None,
|
|
397
416
|
stopped_message='All managed jobs should have finished.')
|
398
417
|
|
399
418
|
job_id_str = ','.join(map(str, job_ids))
|
400
|
-
if sum([bool(job_ids), name is not None, all]) != 1:
|
401
|
-
|
402
|
-
|
403
|
-
|
419
|
+
if sum([bool(job_ids), name is not None, all or all_users]) != 1:
|
420
|
+
arguments = []
|
421
|
+
arguments += [f'job_ids={job_id_str}'] if job_ids else []
|
422
|
+
arguments += [f'name={name}'] if name is not None else []
|
423
|
+
arguments += ['all'] if all else []
|
424
|
+
arguments += ['all_users'] if all_users else []
|
404
425
|
with ux_utils.print_exception_no_traceback():
|
405
|
-
raise ValueError('Can only specify one of JOB_IDS
|
406
|
-
f'Provided {
|
426
|
+
raise ValueError('Can only specify one of JOB_IDS, name, or all/'
|
427
|
+
f'all_users. Provided {" ".join(arguments)!r}.')
|
407
428
|
|
408
429
|
backend = backend_utils.get_backend_from_handle(handle)
|
409
430
|
assert isinstance(backend, backends.CloudVmRayBackend)
|
410
|
-
if
|
431
|
+
if all_users:
|
432
|
+
code = managed_job_utils.ManagedJobCodeGen.cancel_jobs_by_id(
|
433
|
+
None, all_users=True)
|
434
|
+
elif all:
|
411
435
|
code = managed_job_utils.ManagedJobCodeGen.cancel_jobs_by_id(None)
|
412
436
|
elif job_ids:
|
413
437
|
code = managed_job_utils.ManagedJobCodeGen.cancel_jobs_by_id(job_ids)
|
sky/jobs/server/server.py
CHANGED
@@ -109,9 +109,18 @@ async def download_logs(
|
|
109
109
|
@router.get('/dashboard')
|
110
110
|
async def dashboard(request: fastapi.Request,
|
111
111
|
user_hash: str) -> fastapi.Response:
|
112
|
+
# TODO(cooperc): Support showing only jobs for a specific user.
|
113
|
+
|
114
|
+
# FIX(zhwu/cooperc/eric): Fix log downloading (assumes global
|
115
|
+
# /download_log/xx route)
|
116
|
+
|
112
117
|
# Note: before #4717, each user had their own controller, and thus their own
|
113
118
|
# dashboard. Now, all users share the same controller, so this isn't really
|
114
119
|
# necessary. TODO(cooperc): clean up.
|
120
|
+
|
121
|
+
# TODO: Put this in an executor to avoid blocking the main server thread.
|
122
|
+
# It can take a long time if it needs to check the controller status.
|
123
|
+
|
115
124
|
# Find the port for the dashboard of the user
|
116
125
|
os.environ[constants.USER_ID_ENV_VAR] = user_hash
|
117
126
|
server_common.reload_for_new_request(client_entrypoint=None,
|
sky/jobs/state.py
CHANGED
@@ -116,7 +116,9 @@ def create_table(cursor, conn):
|
|
116
116
|
name TEXT,
|
117
117
|
schedule_state TEXT,
|
118
118
|
controller_pid INTEGER DEFAULT NULL,
|
119
|
-
dag_yaml_path TEXT
|
119
|
+
dag_yaml_path TEXT,
|
120
|
+
env_file_path TEXT,
|
121
|
+
user_hash TEXT)""")
|
120
122
|
|
121
123
|
db_utils.add_column_to_table(cursor, conn, 'job_info', 'schedule_state',
|
122
124
|
'TEXT')
|
@@ -127,6 +129,11 @@ def create_table(cursor, conn):
|
|
127
129
|
db_utils.add_column_to_table(cursor, conn, 'job_info', 'dag_yaml_path',
|
128
130
|
'TEXT')
|
129
131
|
|
132
|
+
db_utils.add_column_to_table(cursor, conn, 'job_info', 'env_file_path',
|
133
|
+
'TEXT')
|
134
|
+
|
135
|
+
db_utils.add_column_to_table(cursor, conn, 'job_info', 'user_hash', 'TEXT')
|
136
|
+
|
130
137
|
conn.commit()
|
131
138
|
|
132
139
|
|
@@ -181,6 +188,8 @@ columns = [
|
|
181
188
|
'schedule_state',
|
182
189
|
'controller_pid',
|
183
190
|
'dag_yaml_path',
|
191
|
+
'env_file_path',
|
192
|
+
'user_hash',
|
184
193
|
]
|
185
194
|
|
186
195
|
|
@@ -683,20 +692,24 @@ def set_local_log_file(job_id: int, task_id: Optional[int],
|
|
683
692
|
|
684
693
|
|
685
694
|
# ======== utility functions ========
|
686
|
-
def get_nonterminal_job_ids_by_name(name: Optional[str]
|
695
|
+
def get_nonterminal_job_ids_by_name(name: Optional[str],
|
696
|
+
all_users: bool = False) -> List[int]:
|
687
697
|
"""Get non-terminal job ids by name."""
|
688
698
|
statuses = ', '.join(['?'] * len(ManagedJobStatus.terminal_statuses()))
|
689
699
|
field_values = [
|
690
700
|
status.value for status in ManagedJobStatus.terminal_statuses()
|
691
701
|
]
|
692
702
|
|
693
|
-
|
703
|
+
job_filter = ''
|
704
|
+
if name is None and not all_users:
|
705
|
+
job_filter += 'AND (job_info.user_hash=(?)) '
|
706
|
+
field_values.append(common_utils.get_user_hash())
|
694
707
|
if name is not None:
|
695
708
|
# We match the job name from `job_info` for the jobs submitted after
|
696
709
|
# #1982, and from `spot` for the jobs submitted before #1982, whose
|
697
710
|
# job_info is not available.
|
698
|
-
|
699
|
-
'(job_info.name IS NULL AND spot.task_name=(?)))')
|
711
|
+
job_filter += ('AND (job_info.name=(?) OR '
|
712
|
+
'(job_info.name IS NULL AND spot.task_name=(?))) ')
|
700
713
|
field_values.extend([name, name])
|
701
714
|
|
702
715
|
# Left outer join is used here instead of join, because the job_info does
|
@@ -710,7 +723,7 @@ def get_nonterminal_job_ids_by_name(name: Optional[str]) -> List[int]:
|
|
710
723
|
ON spot.spot_job_id=job_info.spot_job_id
|
711
724
|
WHERE status NOT IN
|
712
725
|
({statuses})
|
713
|
-
{
|
726
|
+
{job_filter}
|
714
727
|
ORDER BY spot.spot_job_id DESC""", field_values).fetchall()
|
715
728
|
job_ids = [row[0] for row in rows if row[0] is not None]
|
716
729
|
return job_ids
|
@@ -906,6 +919,9 @@ def get_managed_jobs(job_id: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
906
919
|
# existing controller before #1982, the job_info table may not exist,
|
907
920
|
# and all the managed jobs created before will not present in the
|
908
921
|
# job_info.
|
922
|
+
# Note: we will get the user_hash here, but don't try to call
|
923
|
+
# global_user_state.get_user() on it. This runs on the controller, which may
|
924
|
+
# not have the user info. Prefer to do it on the API server side.
|
909
925
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
910
926
|
rows = cursor.execute(f"""\
|
911
927
|
SELECT *
|
@@ -978,14 +994,17 @@ def get_local_log_file(job_id: int, task_id: Optional[int]) -> Optional[str]:
|
|
978
994
|
# scheduler lock to work correctly.
|
979
995
|
|
980
996
|
|
981
|
-
def scheduler_set_waiting(job_id: int, dag_yaml_path: str
|
997
|
+
def scheduler_set_waiting(job_id: int, dag_yaml_path: str, env_file_path: str,
|
998
|
+
user_hash: str) -> None:
|
982
999
|
"""Do not call without holding the scheduler lock."""
|
983
1000
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
984
1001
|
updated_count = cursor.execute(
|
985
1002
|
'UPDATE job_info SET '
|
986
|
-
'schedule_state = (?), dag_yaml_path = (?) '
|
1003
|
+
'schedule_state = (?), dag_yaml_path = (?), env_file_path = (?), '
|
1004
|
+
' user_hash = (?) '
|
987
1005
|
'WHERE spot_job_id = (?) AND schedule_state = (?)',
|
988
|
-
(ManagedJobScheduleState.WAITING.value, dag_yaml_path,
|
1006
|
+
(ManagedJobScheduleState.WAITING.value, dag_yaml_path,
|
1007
|
+
env_file_path, user_hash, job_id,
|
989
1008
|
ManagedJobScheduleState.INACTIVE.value)).rowcount
|
990
1009
|
assert updated_count == 1, (job_id, updated_count)
|
991
1010
|
|
@@ -1085,7 +1104,7 @@ def get_waiting_job() -> Optional[Dict[str, Any]]:
|
|
1085
1104
|
"""
|
1086
1105
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
1087
1106
|
row = cursor.execute(
|
1088
|
-
'SELECT spot_job_id, schedule_state, dag_yaml_path '
|
1107
|
+
'SELECT spot_job_id, schedule_state, dag_yaml_path, env_file_path '
|
1089
1108
|
'FROM job_info '
|
1090
1109
|
'WHERE schedule_state in (?, ?) '
|
1091
1110
|
'ORDER BY spot_job_id LIMIT 1',
|
@@ -1095,4 +1114,5 @@ def get_waiting_job() -> Optional[Dict[str, Any]]:
|
|
1095
1114
|
'job_id': row[0],
|
1096
1115
|
'schedule_state': ManagedJobScheduleState(row[1]),
|
1097
1116
|
'dag_yaml_path': row[2],
|
1117
|
+
'env_file_path': row[3],
|
1098
1118
|
} if row is not None else None
|
sky/jobs/utils.py
CHANGED
@@ -449,13 +449,15 @@ def generate_managed_job_cluster_name(task_name: str, job_id: int) -> str:
|
|
449
449
|
return f'{cluster_name}-{job_id}'
|
450
450
|
|
451
451
|
|
452
|
-
def cancel_jobs_by_id(job_ids: Optional[List[int]]
|
452
|
+
def cancel_jobs_by_id(job_ids: Optional[List[int]],
|
453
|
+
all_users: bool = False) -> str:
|
453
454
|
"""Cancel jobs by id.
|
454
455
|
|
455
456
|
If job_ids is None, cancel all jobs.
|
456
457
|
"""
|
457
458
|
if job_ids is None:
|
458
|
-
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(
|
459
|
+
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(
|
460
|
+
None, all_users)
|
459
461
|
job_ids = list(set(job_ids))
|
460
462
|
if not job_ids:
|
461
463
|
return 'No job to cancel.'
|
@@ -917,6 +919,7 @@ def _get_job_status_from_tasks(
|
|
917
919
|
@typing.overload
|
918
920
|
def format_job_table(tasks: List[Dict[str, Any]],
|
919
921
|
show_all: bool,
|
922
|
+
show_user: bool,
|
920
923
|
return_rows: Literal[False] = False,
|
921
924
|
max_jobs: Optional[int] = None) -> str:
|
922
925
|
...
|
@@ -925,6 +928,7 @@ def format_job_table(tasks: List[Dict[str, Any]],
|
|
925
928
|
@typing.overload
|
926
929
|
def format_job_table(tasks: List[Dict[str, Any]],
|
927
930
|
show_all: bool,
|
931
|
+
show_user: bool,
|
928
932
|
return_rows: Literal[True],
|
929
933
|
max_jobs: Optional[int] = None) -> List[List[str]]:
|
930
934
|
...
|
@@ -933,6 +937,7 @@ def format_job_table(tasks: List[Dict[str, Any]],
|
|
933
937
|
def format_job_table(
|
934
938
|
tasks: List[Dict[str, Any]],
|
935
939
|
show_all: bool,
|
940
|
+
show_user: bool,
|
936
941
|
return_rows: bool = False,
|
937
942
|
max_jobs: Optional[int] = None) -> Union[str, List[List[str]]]:
|
938
943
|
"""Returns managed jobs as a formatted string.
|
@@ -948,13 +953,14 @@ def format_job_table(
|
|
948
953
|
a list of "rows" (each of which is a list of str).
|
949
954
|
"""
|
950
955
|
jobs = collections.defaultdict(list)
|
951
|
-
# Check if the tasks have user information.
|
952
|
-
|
953
|
-
|
956
|
+
# Check if the tasks have user information from kubernetes.
|
957
|
+
# This is only used for sky status --kubernetes.
|
958
|
+
tasks_have_k8s_user = any([task.get('user') for task in tasks])
|
959
|
+
if max_jobs and tasks_have_k8s_user:
|
954
960
|
raise ValueError('max_jobs is not supported when tasks have user info.')
|
955
961
|
|
956
962
|
def get_hash(task):
|
957
|
-
if
|
963
|
+
if tasks_have_k8s_user:
|
958
964
|
return (task['user'], task['job_id'])
|
959
965
|
return task['job_id']
|
960
966
|
|
@@ -969,10 +975,17 @@ def format_job_table(
|
|
969
975
|
if not managed_job_status.is_terminal():
|
970
976
|
status_counts[managed_job_status.value] += 1
|
971
977
|
|
978
|
+
user_cols: List[str] = []
|
979
|
+
if show_user:
|
980
|
+
user_cols = ['USER']
|
981
|
+
if show_all:
|
982
|
+
user_cols.append('USER_ID')
|
983
|
+
|
972
984
|
columns = [
|
973
985
|
'ID',
|
974
986
|
'TASK',
|
975
987
|
'NAME',
|
988
|
+
*user_cols,
|
976
989
|
'RESOURCES',
|
977
990
|
'SUBMITTED',
|
978
991
|
'TOT. DURATION',
|
@@ -983,7 +996,7 @@ def format_job_table(
|
|
983
996
|
if show_all:
|
984
997
|
# TODO: move SCHED. STATE to a separate flag (e.g. --debug)
|
985
998
|
columns += ['STARTED', 'CLUSTER', 'REGION', 'SCHED. STATE', 'DETAILS']
|
986
|
-
if
|
999
|
+
if tasks_have_k8s_user:
|
987
1000
|
columns.insert(0, 'USER')
|
988
1001
|
job_table = log_utils.create_table(columns)
|
989
1002
|
|
@@ -1006,6 +1019,22 @@ def format_job_table(
|
|
1006
1019
|
return f'Failure: {failure_reason}'
|
1007
1020
|
return '-'
|
1008
1021
|
|
1022
|
+
def get_user_column_values(task: Dict[str, Any]) -> List[str]:
|
1023
|
+
user_values: List[str] = []
|
1024
|
+
if show_user:
|
1025
|
+
|
1026
|
+
user_name = '-'
|
1027
|
+
user_hash = task.get('user_hash', None)
|
1028
|
+
if user_hash:
|
1029
|
+
user = global_user_state.get_user(user_hash)
|
1030
|
+
user_name = user.name if user.name else '-'
|
1031
|
+
user_values = [user_name]
|
1032
|
+
|
1033
|
+
if show_all:
|
1034
|
+
user_values.append(user_hash if user_hash is not None else '-')
|
1035
|
+
|
1036
|
+
return user_values
|
1037
|
+
|
1009
1038
|
for job_hash, job_tasks in jobs.items():
|
1010
1039
|
if show_all:
|
1011
1040
|
schedule_state = job_tasks[0]['schedule_state']
|
@@ -1044,11 +1073,14 @@ def format_job_table(
|
|
1044
1073
|
if not managed_job_status.is_terminal():
|
1045
1074
|
status_str += f' (task: {current_task_id})'
|
1046
1075
|
|
1047
|
-
|
1076
|
+
user_values = get_user_column_values(job_tasks[0])
|
1077
|
+
|
1078
|
+
job_id = job_hash[1] if tasks_have_k8s_user else job_hash
|
1048
1079
|
job_values = [
|
1049
1080
|
job_id,
|
1050
1081
|
'',
|
1051
1082
|
job_name,
|
1083
|
+
*user_values,
|
1052
1084
|
'-',
|
1053
1085
|
submitted,
|
1054
1086
|
total_duration,
|
@@ -1065,7 +1097,7 @@ def format_job_table(
|
|
1065
1097
|
job_tasks[0]['schedule_state'],
|
1066
1098
|
generate_details(failure_reason),
|
1067
1099
|
])
|
1068
|
-
if
|
1100
|
+
if tasks_have_k8s_user:
|
1069
1101
|
job_values.insert(0, job_tasks[0].get('user', '-'))
|
1070
1102
|
job_table.add_row(job_values)
|
1071
1103
|
|
@@ -1075,10 +1107,12 @@ def format_job_table(
|
|
1075
1107
|
job_duration = log_utils.readable_time_duration(
|
1076
1108
|
0, task['job_duration'], absolute=True)
|
1077
1109
|
submitted = log_utils.readable_time_duration(task['submitted_at'])
|
1110
|
+
user_values = get_user_column_values(task)
|
1078
1111
|
values = [
|
1079
1112
|
task['job_id'] if len(job_tasks) == 1 else ' \u21B3',
|
1080
1113
|
task['task_id'] if len(job_tasks) > 1 else '-',
|
1081
1114
|
task['task_name'],
|
1115
|
+
*user_values,
|
1082
1116
|
task['resources'],
|
1083
1117
|
# SUBMITTED
|
1084
1118
|
submitted if submitted != '-' else submitted,
|
@@ -1103,7 +1137,7 @@ def format_job_table(
|
|
1103
1137
|
schedule_state,
|
1104
1138
|
generate_details(task['failure_reason']),
|
1105
1139
|
])
|
1106
|
-
if
|
1140
|
+
if tasks_have_k8s_user:
|
1107
1141
|
values.insert(0, task.get('user', '-'))
|
1108
1142
|
job_table.add_row(values)
|
1109
1143
|
|
@@ -1135,6 +1169,9 @@ class ManagedJobCodeGen:
|
|
1135
1169
|
_PREFIX = textwrap.dedent("""\
|
1136
1170
|
from sky.jobs import utils
|
1137
1171
|
from sky.jobs import state as managed_job_state
|
1172
|
+
from sky.jobs import constants as managed_job_constants
|
1173
|
+
|
1174
|
+
managed_job_version = managed_job_constants.MANAGED_JOBS_VERSION
|
1138
1175
|
""")
|
1139
1176
|
|
1140
1177
|
@classmethod
|
@@ -1146,9 +1183,17 @@ class ManagedJobCodeGen:
|
|
1146
1183
|
return cls._build(code)
|
1147
1184
|
|
1148
1185
|
@classmethod
|
1149
|
-
def cancel_jobs_by_id(cls,
|
1186
|
+
def cancel_jobs_by_id(cls,
|
1187
|
+
job_ids: Optional[List[int]],
|
1188
|
+
all_users: bool = False) -> str:
|
1150
1189
|
code = textwrap.dedent(f"""\
|
1151
|
-
|
1190
|
+
if managed_job_version < 2:
|
1191
|
+
# For backward compatibility, since all_users is not supported
|
1192
|
+
# before #4787. Assume th
|
1193
|
+
# TODO(cooperc): Remove compatibility before 0.12.0
|
1194
|
+
msg = utils.cancel_jobs_by_id({job_ids})
|
1195
|
+
else:
|
1196
|
+
msg = utils.cancel_jobs_by_id({job_ids}, all_users={all_users})
|
1152
1197
|
print(msg, end="", flush=True)
|
1153
1198
|
""")
|
1154
1199
|
return cls._build(code)
|
sky/server/constants.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# API server version, whenever there is a change in API server that requires a
|
4
4
|
# restart of the local API server or error out when the client does not match
|
5
5
|
# the server version.
|
6
|
-
API_VERSION = '
|
6
|
+
API_VERSION = '2'
|
7
7
|
|
8
8
|
# Prefix for API request names.
|
9
9
|
REQUEST_NAME_PREFIX = 'sky.'
|
sky/server/requests/payloads.py
CHANGED
@@ -322,6 +322,7 @@ class JobsQueueBody(RequestBody):
|
|
322
322
|
"""The request body for the jobs queue endpoint."""
|
323
323
|
refresh: bool = False
|
324
324
|
skip_finished: bool = False
|
325
|
+
all_users: bool = False
|
325
326
|
|
326
327
|
|
327
328
|
class JobsCancelBody(RequestBody):
|
@@ -329,6 +330,7 @@ class JobsCancelBody(RequestBody):
|
|
329
330
|
name: Optional[str]
|
330
331
|
job_ids: Optional[List[int]]
|
331
332
|
all: bool = False
|
333
|
+
all_users: bool = False
|
332
334
|
|
333
335
|
|
334
336
|
class JobsLogsBody(RequestBody):
|
@@ -55,12 +55,19 @@ setup: |
|
|
55
55
|
|
56
56
|
run: |
|
57
57
|
{{ sky_activate_python_env }}
|
58
|
+
|
59
|
+
# Write env vars to a file
|
60
|
+
{%- for env_name, env_value in controller_envs.items() %}
|
61
|
+
echo "export {{env_name}}='{{env_value}}'" >> {{remote_env_file_path}}
|
62
|
+
{%- endfor %}
|
63
|
+
|
58
64
|
# Submit the job to the scheduler.
|
59
65
|
# Note: The job is already in the `spot` table, marked as PENDING.
|
60
66
|
# CloudVmRayBackend._exec_code_on_head() calls
|
61
67
|
# managed_job_codegen.set_pending() before we get here.
|
62
68
|
python -u -m sky.jobs.scheduler {{remote_user_yaml_path}} \
|
63
|
-
--job-id $SKYPILOT_INTERNAL_JOB_ID
|
69
|
+
--job-id $SKYPILOT_INTERNAL_JOB_ID \
|
70
|
+
--env-file {{remote_env_file_path}}
|
64
71
|
|
65
72
|
|
66
73
|
envs:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20250228
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -169,7 +169,7 @@ Dynamic: summary
|
|
169
169
|
|
170
170
|
<p align="center">
|
171
171
|
<a href="https://docs.skypilot.co/">
|
172
|
-
<img alt="Documentation" src="https://
|
172
|
+
<img alt="Documentation" src="https://img.shields.io/badge/docs-gray?logo=readthedocs&logoColor=f5f5f5">
|
173
173
|
</a>
|
174
174
|
|
175
175
|
<a href="https://github.com/skypilot-org/skypilot/releases">
|
{skypilot_nightly-1.0.0.dev20250226.dist-info → skypilot_nightly-1.0.0.dev20250228.dist-info}/RECORD
RENAMED
@@ -1,8 +1,8 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=jLsjhG2RasaqPtD_RYBd_LfTFzhUYIB8j6WkrjZVbKY,6428
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=hCEqi77nprQEg3ktfRL51xiiw16zwZOmFEDB_Z7fWVU,22384
|
4
4
|
sky/check.py,sha256=NDKx_Zm7YRxPjMv82wz3ESLnGIPljaACyqVdVNM0PzY,11258
|
5
|
-
sky/cli.py,sha256=
|
5
|
+
sky/cli.py,sha256=K28Bowflmjhir2e3hIPbZhsnfqvBsANSeBSnEXFoy10,219929
|
6
6
|
sky/cloud_stores.py,sha256=-95XIqi_ouo7hvoN5mQNP6bGm07MyF6Yk-YP4Txb5wg,24034
|
7
7
|
sky/core.py,sha256=X83hdpPTiWyEJLamrguCd03PUjkRiGgqTFfEBEQkzWc,45471
|
8
8
|
sky/dag.py,sha256=Yl7Ry26Vql5cv4YMz8g9kOUgtoCihJnw7c8NgZYakMY,3242
|
@@ -25,7 +25,7 @@ sky/adaptors/do.py,sha256=dJ0BYbkQoUWVu6_9Pxq3fOu6PngjZyyCQzgjnODXLCA,777
|
|
25
25
|
sky/adaptors/docker.py,sha256=_kzpZ0fkWHqqQAVVl0llTsCE31KYz3Sjn8psTBQHVkA,468
|
26
26
|
sky/adaptors/gcp.py,sha256=OQ9RaqjR0r0iaWYpjvEtIx5vnEhyB4LhUCwbtdxsmVk,3115
|
27
27
|
sky/adaptors/ibm.py,sha256=H87vD6izq_wQI8oQC7cx9iVtRgPi_QkAcrfa1Z3PNqU,4906
|
28
|
-
sky/adaptors/kubernetes.py,sha256=
|
28
|
+
sky/adaptors/kubernetes.py,sha256=UIUc3zI0MgWcv1GTBu-pZUSx_NTLf0zRI20JUdtA1HI,6594
|
29
29
|
sky/adaptors/nebius.py,sha256=JOvwniQT-Pkp9-af6IdL_FUkjIbsEAUXVNUkwdaEeb0,2732
|
30
30
|
sky/adaptors/oci.py,sha256=LfMSFUmkkNT6Yoz9FZHNl6UFSg4X1lJO4-x4ZbDdXTs,2831
|
31
31
|
sky/adaptors/runpod.py,sha256=4Nt_BfZhJAKQNA3wO8cxvvNI8x4NsDGHu_4EhRDlGYQ,225
|
@@ -43,7 +43,7 @@ sky/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
43
|
sky/benchmark/benchmark_state.py,sha256=X8CXmuU9KgsDRhKedhFgjeRMUFWtQsjFs1qECvPG2yg,8723
|
44
44
|
sky/benchmark/benchmark_utils.py,sha256=o4RymqSceq5mLEZL0upQM6NVEzJJQzj9s9tTm49uUTc,26365
|
45
45
|
sky/client/__init__.py,sha256=pz6xvVSd9X-gwqbsDL0E9QOojYqM0KAD0j-NCyCIF1k,38
|
46
|
-
sky/client/cli.py,sha256=
|
46
|
+
sky/client/cli.py,sha256=K28Bowflmjhir2e3hIPbZhsnfqvBsANSeBSnEXFoy10,219929
|
47
47
|
sky/client/common.py,sha256=axDic7WOG1e78SdFm5XIwdhX7YNvf3g4k7INrsW3X4s,14611
|
48
48
|
sky/client/sdk.py,sha256=U4v8Khu1lf1oUoBuJUhIFnjsFhYM9x8XcKsnVRMtihI,66990
|
49
49
|
sky/clouds/__init__.py,sha256=OW6mJ-9hpJSBORCgt2LippLQEYZHNfnBW1mooRNNvxo,1416
|
@@ -107,21 +107,21 @@ sky/data/mounting_utils.py,sha256=la21kp7k51zGoFp9WxT5hf38P_XTqcq-Hm1bJZsPnkg,14
|
|
107
107
|
sky/data/storage.py,sha256=mTgMGdfSV6Gia076Dvgmc18ZlqF6eObima558UShiXA,207165
|
108
108
|
sky/data/storage_utils.py,sha256=zB99nRTJjh8isU0UmqERmlwwRNgfig91IwrwVH8CcNw,12383
|
109
109
|
sky/jobs/__init__.py,sha256=qoI53-xXE0-SOkrLWigvhgFXjk7dWE0OTqGPYIk-kmM,1458
|
110
|
-
sky/jobs/constants.py,sha256=
|
110
|
+
sky/jobs/constants.py,sha256=Yv4cIUhkM-kIMlwcmhiFVPXI7aLnLw7MBhStDqSjjOM,3088
|
111
111
|
sky/jobs/controller.py,sha256=4G1CKI7M7D1BgJLbJMeqzg0iDDv7FR4ObB1BKZFFjhk,29585
|
112
112
|
sky/jobs/recovery_strategy.py,sha256=RLrqq8B1likxTknPzt3_BqO26sFVpoatxzUuGfwc18A,26170
|
113
|
-
sky/jobs/scheduler.py,sha256=
|
114
|
-
sky/jobs/state.py,sha256=
|
115
|
-
sky/jobs/utils.py,sha256=
|
113
|
+
sky/jobs/scheduler.py,sha256=8k2ieJ1TTvJ0TOalnklJtrMwFuatsh-ojoPMBgFRBlI,13119
|
114
|
+
sky/jobs/state.py,sha256=tDULLH6DVs4oKUIKhh0UAn3RzyVGuIUtEq5kW7K1Ojw,44585
|
115
|
+
sky/jobs/utils.py,sha256=Ta0UJXNhQWaNdKHPw4ZIF_o82m_AUH2RKE2Uj4U_TKM,54235
|
116
116
|
sky/jobs/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
117
|
-
sky/jobs/client/sdk.py,sha256=
|
118
|
-
sky/jobs/dashboard/dashboard.py,sha256=
|
117
|
+
sky/jobs/client/sdk.py,sha256=RBaU4IR9e8oMMzpMa-yMlpcd3sD5M4_c5xbp2VPbz9U,9939
|
118
|
+
sky/jobs/dashboard/dashboard.py,sha256=JaVrNUEFQPLmsDZnrR76Uo8QqcAHdgYzx7GZTxDfl9M,7885
|
119
119
|
sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
|
120
120
|
sky/jobs/dashboard/templates/index.html,sha256=tz95q8O2pF7IvfY6yv0rnPyhj4DX8WX4RIVVxqFKV1Y,28519
|
121
121
|
sky/jobs/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
122
|
-
sky/jobs/server/core.py,sha256=
|
122
|
+
sky/jobs/server/core.py,sha256=K5qxukyBnPD1DjcNMFJheULWoYSLaVXtjt0D1Cb9GoI,24902
|
123
123
|
sky/jobs/server/dashboard_utils.py,sha256=2Mbx40W1pQqPEPHsSDbHeaF0j5cgyKy-_A9Owdwp_AQ,2315
|
124
|
-
sky/jobs/server/server.py,sha256=
|
124
|
+
sky/jobs/server/server.py,sha256=vdVxl4ZkBRlfOdsUO5Ttxon_-NE9XoMVMSo8fJ-Y73Y,7803
|
125
125
|
sky/provision/__init__.py,sha256=LzOo5LjkRXwSf29dUqN14YbjzQu3liXLQcmweTeZ4dE,6457
|
126
126
|
sky/provision/common.py,sha256=E8AlSUFcn0FYQq1erNmoVfMAdsF9tP2yxfyk-9PLvQU,10286
|
127
127
|
sky/provision/constants.py,sha256=oc_XDUkcoLQ_lwDy5yMeMSWviKS0j0s1c0pjlvpNeWY,800
|
@@ -229,13 +229,13 @@ sky/serve/server/core.py,sha256=pRvFadEIH_WTUkTtSmuFoPBP4JFq8Obt68ifi9DWuog,3686
|
|
229
229
|
sky/serve/server/server.py,sha256=gQGVU9nHYdGbaLhGjIUNIYn4xwKjRASRJkiiTL5AI1Y,3283
|
230
230
|
sky/server/__init__.py,sha256=MPPBqFzXz6Jv5QSk6td_IcvnfXfNErDZVcizu4MLRow,27
|
231
231
|
sky/server/common.py,sha256=0LphKrp89_sGI-xDakK2uEqI-zKuvbc4OTcuLCiKfmQ,17560
|
232
|
-
sky/server/constants.py,sha256=
|
232
|
+
sky/server/constants.py,sha256=89jKE3SIe1T3_7j6ECTy4pZnhZZD7fBwsWOCOkTban8,770
|
233
233
|
sky/server/server.py,sha256=4ipJG67sBFWylNYdPD1FUhth36yX23XbcROXipRSZsw,42438
|
234
234
|
sky/server/stream_utils.py,sha256=-3IX1YCgxAFfcvQIV0TCvOn1wbRLWovAx3ckCrsExWU,5651
|
235
235
|
sky/server/html/log.html,sha256=TSGZktua9Ysl_ysg3w60rjxAxhH61AJnsYDHdtqrjmI,6929
|
236
236
|
sky/server/requests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
237
237
|
sky/server/requests/executor.py,sha256=TWX2jYkkstgRyRkWNE19Mgw4_CfzadebOW30iTGdK_Q,19693
|
238
|
-
sky/server/requests/payloads.py,sha256=
|
238
|
+
sky/server/requests/payloads.py,sha256=QYgEz85jswXkEYxO1mkwPA8MWXD_pETs-g_JH_Tlm_w,16038
|
239
239
|
sky/server/requests/requests.py,sha256=aMdjiK5kjSYP36pxdXFU6qgKOXcOmtViHbFm3V8Dvf8,19590
|
240
240
|
sky/server/requests/queues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
241
241
|
sky/server/requests/queues/mp_queue.py,sha256=_7AFas__0b1L8e7Bwy4lu0VYU18R85YwMlDHPhQCfh0,2998
|
@@ -281,7 +281,7 @@ sky/templates/do-ray.yml.j2,sha256=sRKpn0tC-uPYtSZ20OB4fMzE7RbPQUr8kOCIbuJ4b4Q,4
|
|
281
281
|
sky/templates/fluidstack-ray.yml.j2,sha256=4M3ONqrTaViv7tzN19bSaWT-7c16183DoRVXeZGqgv0,3756
|
282
282
|
sky/templates/gcp-ray.yml.j2,sha256=CriBoM3XF80x9Rx8X-4VVQUFEo5osW6LRbz5ESrEcOg,9850
|
283
283
|
sky/templates/ibm-ray.yml.j2,sha256=uehn7ZZPNIxIXMytqyiEUpTljmwfynCCkdNJURVN31Y,6877
|
284
|
-
sky/templates/jobs-controller.yaml.j2,sha256=
|
284
|
+
sky/templates/jobs-controller.yaml.j2,sha256=_RyMLMvyjK-OOUDxoko5hXa0jWRm59j-qkKLsAz1opA,2511
|
285
285
|
sky/templates/kubernetes-ingress.yml.j2,sha256=73iDklVDWBMbItg0IexCa6_ClXPJOxw7PWz3leku4nE,1340
|
286
286
|
sky/templates/kubernetes-loadbalancer.yml.j2,sha256=IxrNYM366N01bbkJEbZ_UPYxUP8wyVEbRNFHRsBuLsw,626
|
287
287
|
sky/templates/kubernetes-port-forward-proxy-command.sh,sha256=iw7mypHszg6Ggq9MbyiYMFOkSlXaQZulaxqC5IWYGCc,3381
|
@@ -344,9 +344,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
|
|
344
344
|
sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=iAjfyPclOs8qlALACcfxLpRAO9CZ-h16leFqXZ6tNaY,10096
|
345
345
|
sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
|
346
346
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
|
347
|
-
skypilot_nightly-1.0.0.
|
348
|
-
skypilot_nightly-1.0.0.
|
349
|
-
skypilot_nightly-1.0.0.
|
350
|
-
skypilot_nightly-1.0.0.
|
351
|
-
skypilot_nightly-1.0.0.
|
352
|
-
skypilot_nightly-1.0.0.
|
347
|
+
skypilot_nightly-1.0.0.dev20250228.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
348
|
+
skypilot_nightly-1.0.0.dev20250228.dist-info/METADATA,sha256=IBTEc-5NtZ9KP63RfsWsMmXI6qzs7ZBSdi9bAN8QfAc,19236
|
349
|
+
skypilot_nightly-1.0.0.dev20250228.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
350
|
+
skypilot_nightly-1.0.0.dev20250228.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
351
|
+
skypilot_nightly-1.0.0.dev20250228.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
352
|
+
skypilot_nightly-1.0.0.dev20250228.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|