skypilot-nightly 1.0.0.dev20250410__py3-none-any.whl → 1.0.0.dev20250412__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/oci.py +2 -2
- sky/authentication.py +2 -2
- sky/backends/backend_utils.py +1 -1
- sky/backends/cloud_vm_ray_backend.py +3 -3
- sky/check.py +1 -1
- sky/cli.py +51 -47
- sky/client/cli.py +51 -47
- sky/client/common.py +4 -2
- sky/client/sdk.py +60 -27
- sky/clouds/aws.py +2 -2
- sky/clouds/cloud.py +3 -2
- sky/clouds/kubernetes.py +20 -3
- sky/clouds/nebius.py +2 -4
- sky/clouds/oci.py +2 -2
- sky/clouds/utils/oci_utils.py +1 -1
- sky/core.py +12 -17
- sky/data/mounting_utils.py +34 -10
- sky/exceptions.py +1 -1
- sky/execution.py +5 -4
- sky/jobs/client/sdk.py +5 -0
- sky/optimizer.py +1 -2
- sky/provision/instance_setup.py +3 -1
- sky/provision/kubernetes/config.py +41 -36
- sky/provision/kubernetes/instance.py +4 -7
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +54 -0
- sky/provision/kubernetes/network_utils.py +1 -1
- sky/provision/kubernetes/utils.py +51 -35
- sky/serve/client/sdk.py +6 -0
- sky/server/common.py +16 -1
- sky/server/constants.py +5 -0
- sky/server/requests/payloads.py +2 -0
- sky/setup_files/dependencies.py +1 -1
- sky/skylet/constants.py +2 -2
- sky/skypilot_config.py +197 -70
- sky/templates/kubernetes-ray.yml.j2 +66 -25
- sky/templates/websocket_proxy.py +41 -2
- sky/utils/config_utils.py +1 -1
- sky/utils/controller_utils.py +1 -1
- sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
- sky/utils/kubernetes/rsync_helper.sh +26 -11
- {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/METADATA +3 -1
- {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/RECORD +47 -48
- sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml +0 -10
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +0 -68
- {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'ac3cbfe07c8c1129480d4facd683f4844ff6e1f7'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250412'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/oci.py
CHANGED
@@ -13,7 +13,7 @@ from sky.clouds.utils import oci_utils
|
|
13
13
|
# effect.
|
14
14
|
logging.getLogger('oci.circuit_breaker').setLevel(logging.WARNING)
|
15
15
|
|
16
|
-
|
16
|
+
OCI_CONFIG_PATH = '~/.oci/config'
|
17
17
|
ENV_VAR_OCI_CONFIG = 'OCI_CONFIG'
|
18
18
|
|
19
19
|
oci = common.LazyImport(
|
@@ -23,7 +23,7 @@ oci = common.LazyImport(
|
|
23
23
|
|
24
24
|
|
25
25
|
def get_config_file() -> str:
|
26
|
-
conf_file_path =
|
26
|
+
conf_file_path = OCI_CONFIG_PATH
|
27
27
|
config_path_via_env_var = os.environ.get(ENV_VAR_OCI_CONFIG)
|
28
28
|
if config_path_via_env_var is not None:
|
29
29
|
conf_file_path = config_path_via_env_var
|
sky/authentication.py
CHANGED
@@ -382,10 +382,10 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
382
382
|
network_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
|
383
383
|
network_mode_str)
|
384
384
|
except ValueError as e:
|
385
|
-
# Add message saying "Please check: ~/.sky/
|
385
|
+
# Add message saying "Please check: ~/.sky/skyconfig.yaml" to the error
|
386
386
|
# message.
|
387
387
|
with ux_utils.print_exception_no_traceback():
|
388
|
-
raise ValueError(str(e) + ' Please check: ~/.sky/
|
388
|
+
raise ValueError(str(e) + ' Please check: ~/.sky/skyconfig.yaml.') \
|
389
389
|
from None
|
390
390
|
_, public_key_path = get_or_generate_keys()
|
391
391
|
|
sky/backends/backend_utils.py
CHANGED
@@ -682,7 +682,7 @@ def write_cluster_config(
|
|
682
682
|
ssh_proxy_command = ssh_proxy_command_config[region_name]
|
683
683
|
logger.debug(f'Using ssh_proxy_command: {ssh_proxy_command!r}')
|
684
684
|
|
685
|
-
# User-supplied global instance tags from ~/.sky/
|
685
|
+
# User-supplied global instance tags from ~/.sky/skyconfig.yaml.
|
686
686
|
labels = skypilot_config.get_nested((str(cloud).lower(), 'labels'), {})
|
687
687
|
# labels is a dict, which is guaranteed by the type check in
|
688
688
|
# schemas.py
|
@@ -1473,7 +1473,7 @@ class RetryingVmProvisioner(object):
|
|
1473
1473
|
f'invalid cloud credentials: '
|
1474
1474
|
f'{common_utils.format_exception(e)}')
|
1475
1475
|
except exceptions.InvalidCloudConfigs as e:
|
1476
|
-
# Failed due to invalid user configs in ~/.sky/
|
1476
|
+
# Failed due to invalid user configs in ~/.sky/skyconfig.yaml.
|
1477
1477
|
logger.warning(f'{common_utils.format_exception(e)}')
|
1478
1478
|
# We should block the entire cloud if the user config is
|
1479
1479
|
# invalid.
|
@@ -2065,10 +2065,10 @@ class RetryingVmProvisioner(object):
|
|
2065
2065
|
(clouds.Kubernetes, clouds.RunPod)) and
|
2066
2066
|
controller_utils.Controllers.from_name(cluster_name)
|
2067
2067
|
is not None):
|
2068
|
-
assert (clouds.CloudImplementationFeatures.
|
2068
|
+
assert (clouds.CloudImplementationFeatures.AUTOSTOP
|
2069
2069
|
in requested_features), requested_features
|
2070
2070
|
requested_features.remove(
|
2071
|
-
clouds.CloudImplementationFeatures.
|
2071
|
+
clouds.CloudImplementationFeatures.AUTOSTOP)
|
2072
2072
|
|
2073
2073
|
# Skip if to_provision.cloud does not support requested features
|
2074
2074
|
to_provision.cloud.check_features_are_supported(
|
sky/check.py
CHANGED
@@ -142,7 +142,7 @@ def check_capabilities(
|
|
142
142
|
if disallowed_cloud_names:
|
143
143
|
disallowed_clouds_hint = (
|
144
144
|
'\nNote: The following clouds were disabled because they were not '
|
145
|
-
'included in allowed_clouds in ~/.sky/
|
145
|
+
'included in allowed_clouds in ~/.sky/skyconfig.yaml: '
|
146
146
|
f'{", ".join([c for c in disallowed_cloud_names])}')
|
147
147
|
if not all_enabled_clouds:
|
148
148
|
echo(
|
sky/cli.py
CHANGED
@@ -35,7 +35,8 @@ import sys
|
|
35
35
|
import textwrap
|
36
36
|
import traceback
|
37
37
|
import typing
|
38
|
-
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple,
|
38
|
+
from typing import (Any, Callable, Dict, Generator, List, Optional, Set, Tuple,
|
39
|
+
Union)
|
39
40
|
|
40
41
|
import click
|
41
42
|
import colorama
|
@@ -134,49 +135,51 @@ def _get_cluster_records_and_set_ssh_config(
|
|
134
135
|
# Update the SSH config for all clusters
|
135
136
|
for record in cluster_records:
|
136
137
|
handle = record['handle']
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
if (handle is not None and handle.cached_external_ips is not None and
|
141
|
-
'credentials' in record):
|
142
|
-
credentials = record['credentials']
|
143
|
-
if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
|
144
|
-
# Replace the proxy command to proxy through the SkyPilot API
|
145
|
-
# server with websocket.
|
146
|
-
key_path = (
|
147
|
-
cluster_utils.SSHConfigHelper.generate_local_key_file(
|
148
|
-
handle.cluster_name, credentials))
|
149
|
-
# Instead of directly use websocket_proxy.py, we add an
|
150
|
-
# additional proxy, so that ssh can use the head pod in the
|
151
|
-
# cluster to jump to worker pods.
|
152
|
-
proxy_command = (
|
153
|
-
f'ssh -tt -i {key_path} '
|
154
|
-
'-o StrictHostKeyChecking=no '
|
155
|
-
'-o UserKnownHostsFile=/dev/null '
|
156
|
-
'-o IdentitiesOnly=yes '
|
157
|
-
'-W %h:%p '
|
158
|
-
f'{handle.ssh_user}@127.0.0.1 '
|
159
|
-
'-o ProxyCommand='
|
160
|
-
# TODO(zhwu): write the template to a temp file, don't use
|
161
|
-
# the one in skypilot repo, to avoid changing the file when
|
162
|
-
# updating skypilot.
|
163
|
-
f'\'{sys.executable} {sky.__root_dir__}/templates/'
|
164
|
-
f'websocket_proxy.py '
|
165
|
-
f'{server_common.get_server_url().split("://")[1]} '
|
166
|
-
f'{handle.cluster_name}\'')
|
167
|
-
credentials['ssh_proxy_command'] = proxy_command
|
168
|
-
cluster_utils.SSHConfigHelper.add_cluster(
|
169
|
-
handle.cluster_name,
|
170
|
-
handle.cached_external_ips,
|
171
|
-
credentials,
|
172
|
-
handle.cached_external_ssh_ports,
|
173
|
-
handle.docker_user,
|
174
|
-
handle.ssh_user,
|
175
|
-
)
|
176
|
-
else:
|
138
|
+
|
139
|
+
if not (handle is not None and handle.cached_external_ips is not None
|
140
|
+
and 'credentials' in record):
|
177
141
|
# If the cluster is not UP or does not have credentials available,
|
178
142
|
# we need to remove the cluster from the SSH config.
|
179
143
|
cluster_utils.SSHConfigHelper.remove_cluster(record['name'])
|
144
|
+
continue
|
145
|
+
|
146
|
+
# During the failover, even though a cluster does not exist, the handle
|
147
|
+
# can still exist in the record, and we check for credentials to avoid
|
148
|
+
# updating the SSH config for non-existent clusters.
|
149
|
+
credentials = record['credentials']
|
150
|
+
if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
|
151
|
+
# Replace the proxy command to proxy through the SkyPilot API
|
152
|
+
# server with websocket.
|
153
|
+
key_path = (cluster_utils.SSHConfigHelper.generate_local_key_file(
|
154
|
+
handle.cluster_name, credentials))
|
155
|
+
# Instead of directly use websocket_proxy.py, we add an
|
156
|
+
# additional proxy, so that ssh can use the head pod in the
|
157
|
+
# cluster to jump to worker pods.
|
158
|
+
proxy_command = (
|
159
|
+
f'ssh -tt -i {key_path} '
|
160
|
+
'-o StrictHostKeyChecking=no '
|
161
|
+
'-o UserKnownHostsFile=/dev/null '
|
162
|
+
'-o IdentitiesOnly=yes '
|
163
|
+
'-W %h:%p '
|
164
|
+
f'{handle.ssh_user}@127.0.0.1 '
|
165
|
+
'-o ProxyCommand='
|
166
|
+
# TODO(zhwu): write the template to a temp file, don't use
|
167
|
+
# the one in skypilot repo, to avoid changing the file when
|
168
|
+
# updating skypilot.
|
169
|
+
f'\'{sys.executable} {sky.__root_dir__}/templates/'
|
170
|
+
f'websocket_proxy.py '
|
171
|
+
f'{server_common.get_server_url()} '
|
172
|
+
f'{handle.cluster_name}\'')
|
173
|
+
credentials['ssh_proxy_command'] = proxy_command
|
174
|
+
|
175
|
+
cluster_utils.SSHConfigHelper.add_cluster(
|
176
|
+
handle.cluster_name,
|
177
|
+
handle.cached_external_ips,
|
178
|
+
credentials,
|
179
|
+
handle.cached_external_ssh_ports,
|
180
|
+
handle.docker_user,
|
181
|
+
handle.ssh_user,
|
182
|
+
)
|
180
183
|
|
181
184
|
# Clean up SSH configs for clusters that do not exist.
|
182
185
|
#
|
@@ -186,14 +189,15 @@ def _get_cluster_records_and_set_ssh_config(
|
|
186
189
|
# removing clusters, because SkyPilot has no idea whether to remove
|
187
190
|
# ssh config of a cluster from another user.
|
188
191
|
clusters_exists = set(record['name'] for record in cluster_records)
|
192
|
+
clusters_to_remove: Set[str] = set()
|
189
193
|
if clusters is not None:
|
190
|
-
|
191
|
-
if cluster not in clusters_exists:
|
192
|
-
cluster_utils.SSHConfigHelper.remove_cluster(cluster)
|
194
|
+
clusters_to_remove = set(clusters) - clusters_exists
|
193
195
|
elif all_users:
|
194
|
-
|
195
|
-
|
196
|
-
|
196
|
+
clusters_to_remove = set(cluster_utils.SSHConfigHelper.
|
197
|
+
list_cluster_names()) - clusters_exists
|
198
|
+
|
199
|
+
for cluster_name in clusters_to_remove:
|
200
|
+
cluster_utils.SSHConfigHelper.remove_cluster(cluster_name)
|
197
201
|
|
198
202
|
return cluster_records
|
199
203
|
|
sky/client/cli.py
CHANGED
@@ -35,7 +35,8 @@ import sys
|
|
35
35
|
import textwrap
|
36
36
|
import traceback
|
37
37
|
import typing
|
38
|
-
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple,
|
38
|
+
from typing import (Any, Callable, Dict, Generator, List, Optional, Set, Tuple,
|
39
|
+
Union)
|
39
40
|
|
40
41
|
import click
|
41
42
|
import colorama
|
@@ -134,49 +135,51 @@ def _get_cluster_records_and_set_ssh_config(
|
|
134
135
|
# Update the SSH config for all clusters
|
135
136
|
for record in cluster_records:
|
136
137
|
handle = record['handle']
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
if (handle is not None and handle.cached_external_ips is not None and
|
141
|
-
'credentials' in record):
|
142
|
-
credentials = record['credentials']
|
143
|
-
if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
|
144
|
-
# Replace the proxy command to proxy through the SkyPilot API
|
145
|
-
# server with websocket.
|
146
|
-
key_path = (
|
147
|
-
cluster_utils.SSHConfigHelper.generate_local_key_file(
|
148
|
-
handle.cluster_name, credentials))
|
149
|
-
# Instead of directly use websocket_proxy.py, we add an
|
150
|
-
# additional proxy, so that ssh can use the head pod in the
|
151
|
-
# cluster to jump to worker pods.
|
152
|
-
proxy_command = (
|
153
|
-
f'ssh -tt -i {key_path} '
|
154
|
-
'-o StrictHostKeyChecking=no '
|
155
|
-
'-o UserKnownHostsFile=/dev/null '
|
156
|
-
'-o IdentitiesOnly=yes '
|
157
|
-
'-W %h:%p '
|
158
|
-
f'{handle.ssh_user}@127.0.0.1 '
|
159
|
-
'-o ProxyCommand='
|
160
|
-
# TODO(zhwu): write the template to a temp file, don't use
|
161
|
-
# the one in skypilot repo, to avoid changing the file when
|
162
|
-
# updating skypilot.
|
163
|
-
f'\'{sys.executable} {sky.__root_dir__}/templates/'
|
164
|
-
f'websocket_proxy.py '
|
165
|
-
f'{server_common.get_server_url().split("://")[1]} '
|
166
|
-
f'{handle.cluster_name}\'')
|
167
|
-
credentials['ssh_proxy_command'] = proxy_command
|
168
|
-
cluster_utils.SSHConfigHelper.add_cluster(
|
169
|
-
handle.cluster_name,
|
170
|
-
handle.cached_external_ips,
|
171
|
-
credentials,
|
172
|
-
handle.cached_external_ssh_ports,
|
173
|
-
handle.docker_user,
|
174
|
-
handle.ssh_user,
|
175
|
-
)
|
176
|
-
else:
|
138
|
+
|
139
|
+
if not (handle is not None and handle.cached_external_ips is not None
|
140
|
+
and 'credentials' in record):
|
177
141
|
# If the cluster is not UP or does not have credentials available,
|
178
142
|
# we need to remove the cluster from the SSH config.
|
179
143
|
cluster_utils.SSHConfigHelper.remove_cluster(record['name'])
|
144
|
+
continue
|
145
|
+
|
146
|
+
# During the failover, even though a cluster does not exist, the handle
|
147
|
+
# can still exist in the record, and we check for credentials to avoid
|
148
|
+
# updating the SSH config for non-existent clusters.
|
149
|
+
credentials = record['credentials']
|
150
|
+
if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
|
151
|
+
# Replace the proxy command to proxy through the SkyPilot API
|
152
|
+
# server with websocket.
|
153
|
+
key_path = (cluster_utils.SSHConfigHelper.generate_local_key_file(
|
154
|
+
handle.cluster_name, credentials))
|
155
|
+
# Instead of directly use websocket_proxy.py, we add an
|
156
|
+
# additional proxy, so that ssh can use the head pod in the
|
157
|
+
# cluster to jump to worker pods.
|
158
|
+
proxy_command = (
|
159
|
+
f'ssh -tt -i {key_path} '
|
160
|
+
'-o StrictHostKeyChecking=no '
|
161
|
+
'-o UserKnownHostsFile=/dev/null '
|
162
|
+
'-o IdentitiesOnly=yes '
|
163
|
+
'-W %h:%p '
|
164
|
+
f'{handle.ssh_user}@127.0.0.1 '
|
165
|
+
'-o ProxyCommand='
|
166
|
+
# TODO(zhwu): write the template to a temp file, don't use
|
167
|
+
# the one in skypilot repo, to avoid changing the file when
|
168
|
+
# updating skypilot.
|
169
|
+
f'\'{sys.executable} {sky.__root_dir__}/templates/'
|
170
|
+
f'websocket_proxy.py '
|
171
|
+
f'{server_common.get_server_url()} '
|
172
|
+
f'{handle.cluster_name}\'')
|
173
|
+
credentials['ssh_proxy_command'] = proxy_command
|
174
|
+
|
175
|
+
cluster_utils.SSHConfigHelper.add_cluster(
|
176
|
+
handle.cluster_name,
|
177
|
+
handle.cached_external_ips,
|
178
|
+
credentials,
|
179
|
+
handle.cached_external_ssh_ports,
|
180
|
+
handle.docker_user,
|
181
|
+
handle.ssh_user,
|
182
|
+
)
|
180
183
|
|
181
184
|
# Clean up SSH configs for clusters that do not exist.
|
182
185
|
#
|
@@ -186,14 +189,15 @@ def _get_cluster_records_and_set_ssh_config(
|
|
186
189
|
# removing clusters, because SkyPilot has no idea whether to remove
|
187
190
|
# ssh config of a cluster from another user.
|
188
191
|
clusters_exists = set(record['name'] for record in cluster_records)
|
192
|
+
clusters_to_remove: Set[str] = set()
|
189
193
|
if clusters is not None:
|
190
|
-
|
191
|
-
if cluster not in clusters_exists:
|
192
|
-
cluster_utils.SSHConfigHelper.remove_cluster(cluster)
|
194
|
+
clusters_to_remove = set(clusters) - clusters_exists
|
193
195
|
elif all_users:
|
194
|
-
|
195
|
-
|
196
|
-
|
196
|
+
clusters_to_remove = set(cluster_utils.SSHConfigHelper.
|
197
|
+
list_cluster_names()) - clusters_exists
|
198
|
+
|
199
|
+
for cluster_name in clusters_to_remove:
|
200
|
+
cluster_utils.SSHConfigHelper.remove_cluster(cluster_name)
|
197
201
|
|
198
202
|
return cluster_records
|
199
203
|
|
sky/client/common.py
CHANGED
@@ -75,7 +75,8 @@ def download_logs_from_api_server(
|
|
75
75
|
body = payloads.DownloadBody(folder_paths=list(paths_on_api_server),)
|
76
76
|
response = requests.post(f'{server_common.get_server_url()}/download',
|
77
77
|
json=json.loads(body.model_dump_json()),
|
78
|
-
stream=True
|
78
|
+
stream=True,
|
79
|
+
cookies=server_common.get_api_cookie_jar())
|
79
80
|
if response.status_code == 200:
|
80
81
|
remote_home_path = response.headers.get('X-Home-Path')
|
81
82
|
assert remote_home_path is not None, response.headers
|
@@ -176,7 +177,8 @@ def _upload_chunk_with_retry(params: UploadChunkParams) -> None:
|
|
176
177
|
},
|
177
178
|
content=FileChunkIterator(f, _UPLOAD_CHUNK_BYTES,
|
178
179
|
params.chunk_index),
|
179
|
-
headers={'Content-Type': 'application/octet-stream'}
|
180
|
+
headers={'Content-Type': 'application/octet-stream'},
|
181
|
+
cookies=server_common.get_api_cookie_jar())
|
180
182
|
if response.status_code == 200:
|
181
183
|
data = response.json()
|
182
184
|
status = data.get('status')
|
sky/client/sdk.py
CHANGED
@@ -102,7 +102,8 @@ def check(clouds: Optional[Tuple[str]],
|
|
102
102
|
"""
|
103
103
|
body = payloads.CheckBody(clouds=clouds, verbose=verbose)
|
104
104
|
response = requests.post(f'{server_common.get_server_url()}/check',
|
105
|
-
json=json.loads(body.model_dump_json())
|
105
|
+
json=json.loads(body.model_dump_json()),
|
106
|
+
cookies=server_common.get_api_cookie_jar())
|
106
107
|
return server_common.get_request_id(response)
|
107
108
|
|
108
109
|
|
@@ -118,7 +119,8 @@ def enabled_clouds() -> server_common.RequestId:
|
|
118
119
|
Request Returns:
|
119
120
|
A list of enabled clouds in string format.
|
120
121
|
"""
|
121
|
-
response = requests.get(f'{server_common.get_server_url()}/enabled_clouds'
|
122
|
+
response = requests.get(f'{server_common.get_server_url()}/enabled_clouds',
|
123
|
+
cookies=server_common.get_api_cookie_jar())
|
122
124
|
return server_common.get_request_id(response)
|
123
125
|
|
124
126
|
|
@@ -168,7 +170,8 @@ def list_accelerators(gpus_only: bool = True,
|
|
168
170
|
)
|
169
171
|
response = requests.post(
|
170
172
|
f'{server_common.get_server_url()}/list_accelerators',
|
171
|
-
json=json.loads(body.model_dump_json())
|
173
|
+
json=json.loads(body.model_dump_json()),
|
174
|
+
cookies=server_common.get_api_cookie_jar())
|
172
175
|
return server_common.get_request_id(response)
|
173
176
|
|
174
177
|
|
@@ -208,7 +211,8 @@ def list_accelerator_counts(
|
|
208
211
|
)
|
209
212
|
response = requests.post(
|
210
213
|
f'{server_common.get_server_url()}/list_accelerator_counts',
|
211
|
-
json=json.loads(body.model_dump_json())
|
214
|
+
json=json.loads(body.model_dump_json()),
|
215
|
+
cookies=server_common.get_api_cookie_jar())
|
212
216
|
return server_common.get_request_id(response)
|
213
217
|
|
214
218
|
|
@@ -246,7 +250,8 @@ def optimize(
|
|
246
250
|
minimize=minimize,
|
247
251
|
request_options=admin_policy_request_options)
|
248
252
|
response = requests.post(f'{server_common.get_server_url()}/optimize',
|
249
|
-
json=json.loads(body.model_dump_json())
|
253
|
+
json=json.loads(body.model_dump_json()),
|
254
|
+
cookies=server_common.get_api_cookie_jar())
|
250
255
|
return server_common.get_request_id(response)
|
251
256
|
|
252
257
|
|
@@ -281,7 +286,8 @@ def validate(
|
|
281
286
|
body = payloads.ValidateBody(dag=dag_str,
|
282
287
|
request_options=admin_policy_request_options)
|
283
288
|
response = requests.post(f'{server_common.get_server_url()}/validate',
|
284
|
-
json=json.loads(body.model_dump_json())
|
289
|
+
json=json.loads(body.model_dump_json()),
|
290
|
+
cookies=server_common.get_api_cookie_jar())
|
285
291
|
if response.status_code == 400:
|
286
292
|
with ux_utils.print_exception_no_traceback():
|
287
293
|
raise exceptions.deserialize_exception(
|
@@ -493,6 +499,7 @@ def launch(
|
|
493
499
|
f'{server_common.get_server_url()}/launch',
|
494
500
|
json=json.loads(body.model_dump_json()),
|
495
501
|
timeout=5,
|
502
|
+
cookies=server_common.get_api_cookie_jar(),
|
496
503
|
)
|
497
504
|
return server_common.get_request_id(response)
|
498
505
|
|
@@ -576,6 +583,7 @@ def exec( # pylint: disable=redefined-builtin
|
|
576
583
|
f'{server_common.get_server_url()}/exec',
|
577
584
|
json=json.loads(body.model_dump_json()),
|
578
585
|
timeout=5,
|
586
|
+
cookies=server_common.get_api_cookie_jar(),
|
579
587
|
)
|
580
588
|
return server_common.get_request_id(response)
|
581
589
|
|
@@ -626,7 +634,8 @@ def tail_logs(cluster_name: str,
|
|
626
634
|
json=json.loads(body.model_dump_json()),
|
627
635
|
stream=True,
|
628
636
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
629
|
-
None)
|
637
|
+
None),
|
638
|
+
cookies=server_common.get_api_cookie_jar())
|
630
639
|
request_id = server_common.get_request_id(response)
|
631
640
|
return stream_response(request_id, response, output_stream)
|
632
641
|
|
@@ -663,7 +672,8 @@ def download_logs(cluster_name: str,
|
|
663
672
|
job_ids=job_ids,
|
664
673
|
)
|
665
674
|
response = requests.post(f'{server_common.get_server_url()}/download_logs',
|
666
|
-
json=json.loads(body.model_dump_json())
|
675
|
+
json=json.loads(body.model_dump_json()),
|
676
|
+
cookies=server_common.get_api_cookie_jar())
|
667
677
|
job_id_remote_path_dict = stream_and_get(
|
668
678
|
server_common.get_request_id(response))
|
669
679
|
remote2local_path_dict = client_common.download_logs_from_api_server(
|
@@ -745,6 +755,7 @@ def start(
|
|
745
755
|
f'{server_common.get_server_url()}/start',
|
746
756
|
json=json.loads(body.model_dump_json()),
|
747
757
|
timeout=5,
|
758
|
+
cookies=server_common.get_api_cookie_jar(),
|
748
759
|
)
|
749
760
|
return server_common.get_request_id(response)
|
750
761
|
|
@@ -790,6 +801,7 @@ def down(cluster_name: str, purge: bool = False) -> server_common.RequestId:
|
|
790
801
|
f'{server_common.get_server_url()}/down',
|
791
802
|
json=json.loads(body.model_dump_json()),
|
792
803
|
timeout=5,
|
804
|
+
cookies=server_common.get_api_cookie_jar(),
|
793
805
|
)
|
794
806
|
return server_common.get_request_id(response)
|
795
807
|
|
@@ -838,6 +850,7 @@ def stop(cluster_name: str, purge: bool = False) -> server_common.RequestId:
|
|
838
850
|
f'{server_common.get_server_url()}/stop',
|
839
851
|
json=json.loads(body.model_dump_json()),
|
840
852
|
timeout=5,
|
853
|
+
cookies=server_common.get_api_cookie_jar(),
|
841
854
|
)
|
842
855
|
return server_common.get_request_id(response)
|
843
856
|
|
@@ -907,6 +920,7 @@ def autostop(
|
|
907
920
|
f'{server_common.get_server_url()}/autostop',
|
908
921
|
json=json.loads(body.model_dump_json()),
|
909
922
|
timeout=5,
|
923
|
+
cookies=server_common.get_api_cookie_jar(),
|
910
924
|
)
|
911
925
|
return server_common.get_request_id(response)
|
912
926
|
|
@@ -966,7 +980,8 @@ def queue(cluster_name: str,
|
|
966
980
|
all_users=all_users,
|
967
981
|
)
|
968
982
|
response = requests.post(f'{server_common.get_server_url()}/queue',
|
969
|
-
json=json.loads(body.model_dump_json())
|
983
|
+
json=json.loads(body.model_dump_json()),
|
984
|
+
cookies=server_common.get_api_cookie_jar())
|
970
985
|
return server_common.get_request_id(response)
|
971
986
|
|
972
987
|
|
@@ -1007,7 +1022,8 @@ def job_status(cluster_name: str,
|
|
1007
1022
|
job_ids=job_ids,
|
1008
1023
|
)
|
1009
1024
|
response = requests.post(f'{server_common.get_server_url()}/job_status',
|
1010
|
-
json=json.loads(body.model_dump_json())
|
1025
|
+
json=json.loads(body.model_dump_json()),
|
1026
|
+
cookies=server_common.get_api_cookie_jar())
|
1011
1027
|
return server_common.get_request_id(response)
|
1012
1028
|
|
1013
1029
|
|
@@ -1060,7 +1076,8 @@ def cancel(
|
|
1060
1076
|
try_cancel_if_cluster_is_init=_try_cancel_if_cluster_is_init,
|
1061
1077
|
)
|
1062
1078
|
response = requests.post(f'{server_common.get_server_url()}/cancel',
|
1063
|
-
json=json.loads(body.model_dump_json())
|
1079
|
+
json=json.loads(body.model_dump_json()),
|
1080
|
+
cookies=server_common.get_api_cookie_jar())
|
1064
1081
|
return server_common.get_request_id(response)
|
1065
1082
|
|
1066
1083
|
|
@@ -1155,7 +1172,8 @@ def status(
|
|
1155
1172
|
all_users=all_users,
|
1156
1173
|
)
|
1157
1174
|
response = requests.post(f'{server_common.get_server_url()}/status',
|
1158
|
-
json=json.loads(body.model_dump_json())
|
1175
|
+
json=json.loads(body.model_dump_json()),
|
1176
|
+
cookies=server_common.get_api_cookie_jar())
|
1159
1177
|
return server_common.get_request_id(response)
|
1160
1178
|
|
1161
1179
|
|
@@ -1189,7 +1207,8 @@ def endpoints(
|
|
1189
1207
|
port=port,
|
1190
1208
|
)
|
1191
1209
|
response = requests.post(f'{server_common.get_server_url()}/endpoints',
|
1192
|
-
json=json.loads(body.model_dump_json())
|
1210
|
+
json=json.loads(body.model_dump_json()),
|
1211
|
+
cookies=server_common.get_api_cookie_jar())
|
1193
1212
|
return server_common.get_request_id(response)
|
1194
1213
|
|
1195
1214
|
|
@@ -1227,7 +1246,8 @@ def cost_report() -> server_common.RequestId: # pylint: disable=redefined-built
|
|
1227
1246
|
'total_cost': (float) cost given resources and usage intervals,
|
1228
1247
|
}
|
1229
1248
|
"""
|
1230
|
-
response = requests.get(f'{server_common.get_server_url()}/cost_report'
|
1249
|
+
response = requests.get(f'{server_common.get_server_url()}/cost_report',
|
1250
|
+
cookies=server_common.get_api_cookie_jar())
|
1231
1251
|
return server_common.get_request_id(response)
|
1232
1252
|
|
1233
1253
|
|
@@ -1256,7 +1276,8 @@ def storage_ls() -> server_common.RequestId:
|
|
1256
1276
|
}
|
1257
1277
|
]
|
1258
1278
|
"""
|
1259
|
-
response = requests.get(f'{server_common.get_server_url()}/storage/ls'
|
1279
|
+
response = requests.get(f'{server_common.get_server_url()}/storage/ls',
|
1280
|
+
cookies=server_common.get_api_cookie_jar())
|
1260
1281
|
return server_common.get_request_id(response)
|
1261
1282
|
|
1262
1283
|
|
@@ -1280,7 +1301,8 @@ def storage_delete(name: str) -> server_common.RequestId:
|
|
1280
1301
|
"""
|
1281
1302
|
body = payloads.StorageBody(name=name)
|
1282
1303
|
response = requests.post(f'{server_common.get_server_url()}/storage/delete',
|
1283
|
-
json=json.loads(body.model_dump_json())
|
1304
|
+
json=json.loads(body.model_dump_json()),
|
1305
|
+
cookies=server_common.get_api_cookie_jar())
|
1284
1306
|
return server_common.get_request_id(response)
|
1285
1307
|
|
1286
1308
|
|
@@ -1318,7 +1340,8 @@ def local_up(gpus: bool,
|
|
1318
1340
|
context_name=context_name,
|
1319
1341
|
password=password)
|
1320
1342
|
response = requests.post(f'{server_common.get_server_url()}/local_up',
|
1321
|
-
json=json.loads(body.model_dump_json())
|
1343
|
+
json=json.loads(body.model_dump_json()),
|
1344
|
+
cookies=server_common.get_api_cookie_jar())
|
1322
1345
|
return server_common.get_request_id(response)
|
1323
1346
|
|
1324
1347
|
|
@@ -1334,7 +1357,8 @@ def local_down() -> server_common.RequestId:
|
|
1334
1357
|
with ux_utils.print_exception_no_traceback():
|
1335
1358
|
raise ValueError('sky local down is only supported when running '
|
1336
1359
|
'SkyPilot locally.')
|
1337
|
-
response = requests.post(f'{server_common.get_server_url()}/local_down'
|
1360
|
+
response = requests.post(f'{server_common.get_server_url()}/local_down',
|
1361
|
+
cookies=server_common.get_api_cookie_jar())
|
1338
1362
|
return server_common.get_request_id(response)
|
1339
1363
|
|
1340
1364
|
|
@@ -1358,7 +1382,8 @@ def realtime_kubernetes_gpu_availability(
|
|
1358
1382
|
response = requests.post(
|
1359
1383
|
f'{server_common.get_server_url()}/'
|
1360
1384
|
'realtime_kubernetes_gpu_availability',
|
1361
|
-
json=json.loads(body.model_dump_json())
|
1385
|
+
json=json.loads(body.model_dump_json()),
|
1386
|
+
cookies=server_common.get_api_cookie_jar())
|
1362
1387
|
return server_common.get_request_id(response)
|
1363
1388
|
|
1364
1389
|
|
@@ -1389,7 +1414,8 @@ def kubernetes_node_info(
|
|
1389
1414
|
body = payloads.KubernetesNodeInfoRequestBody(context=context)
|
1390
1415
|
response = requests.post(
|
1391
1416
|
f'{server_common.get_server_url()}/kubernetes_node_info',
|
1392
|
-
json=json.loads(body.model_dump_json())
|
1417
|
+
json=json.loads(body.model_dump_json()),
|
1418
|
+
cookies=server_common.get_api_cookie_jar())
|
1393
1419
|
return server_common.get_request_id(response)
|
1394
1420
|
|
1395
1421
|
|
@@ -1418,7 +1444,8 @@ def status_kubernetes() -> server_common.RequestId:
|
|
1418
1444
|
- context: Kubernetes context used to fetch the cluster information.
|
1419
1445
|
"""
|
1420
1446
|
response = requests.get(
|
1421
|
-
f'{server_common.get_server_url()}/status_kubernetes'
|
1447
|
+
f'{server_common.get_server_url()}/status_kubernetes',
|
1448
|
+
cookies=server_common.get_api_cookie_jar())
|
1422
1449
|
return server_common.get_request_id(response)
|
1423
1450
|
|
1424
1451
|
|
@@ -1444,7 +1471,8 @@ def get(request_id: str) -> Any:
|
|
1444
1471
|
response = requests.get(
|
1445
1472
|
f'{server_common.get_server_url()}/api/get?request_id={request_id}',
|
1446
1473
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
1447
|
-
None)
|
1474
|
+
None),
|
1475
|
+
cookies=server_common.get_api_cookie_jar())
|
1448
1476
|
request_task = None
|
1449
1477
|
if response.status_code == 200:
|
1450
1478
|
request_task = requests_lib.Request.decode(
|
@@ -1523,7 +1551,8 @@ def stream_and_get(
|
|
1523
1551
|
params=params,
|
1524
1552
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
1525
1553
|
None),
|
1526
|
-
stream=True
|
1554
|
+
stream=True,
|
1555
|
+
cookies=server_common.get_api_cookie_jar())
|
1527
1556
|
if response.status_code in [404, 400]:
|
1528
1557
|
detail = response.json().get('detail')
|
1529
1558
|
with ux_utils.print_exception_no_traceback():
|
@@ -1579,7 +1608,8 @@ def api_cancel(request_ids: Optional[Union[str, List[str]]] = None,
|
|
1579
1608
|
|
1580
1609
|
response = requests.post(f'{server_common.get_server_url()}/api/cancel',
|
1581
1610
|
json=json.loads(body.model_dump_json()),
|
1582
|
-
timeout=5
|
1611
|
+
timeout=5,
|
1612
|
+
cookies=server_common.get_api_cookie_jar())
|
1583
1613
|
return server_common.get_request_id(response)
|
1584
1614
|
|
1585
1615
|
|
@@ -1607,7 +1637,8 @@ def api_status(
|
|
1607
1637
|
f'{server_common.get_server_url()}/api/status',
|
1608
1638
|
params=server_common.request_body_to_params(body),
|
1609
1639
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
1610
|
-
None)
|
1640
|
+
None),
|
1641
|
+
cookies=server_common.get_api_cookie_jar())
|
1611
1642
|
server_common.handle_request_error(response)
|
1612
1643
|
return [
|
1613
1644
|
requests_lib.RequestPayload(**request) for request in response.json()
|
@@ -1634,7 +1665,8 @@ def api_info() -> Dict[str, str]:
|
|
1634
1665
|
}
|
1635
1666
|
|
1636
1667
|
"""
|
1637
|
-
response = requests.get(f'{server_common.get_server_url()}/api/health'
|
1668
|
+
response = requests.get(f'{server_common.get_server_url()}/api/health',
|
1669
|
+
cookies=server_common.get_api_cookie_jar())
|
1638
1670
|
response.raise_for_status()
|
1639
1671
|
return response.json()
|
1640
1672
|
|
@@ -1780,7 +1812,8 @@ def api_login(endpoint: Optional[str] = None) -> None:
|
|
1780
1812
|
server_common.check_server_healthy(endpoint)
|
1781
1813
|
|
1782
1814
|
# Set the endpoint in the config file
|
1783
|
-
config_path = pathlib.Path(
|
1815
|
+
config_path = pathlib.Path(
|
1816
|
+
skypilot_config.get_user_config_path()).expanduser()
|
1784
1817
|
with filelock.FileLock(config_path.with_suffix('.lock')):
|
1785
1818
|
if not skypilot_config.loaded():
|
1786
1819
|
config_path.touch()
|