skypilot-nightly 1.0.0.dev20250410__py3-none-any.whl → 1.0.0.dev20250412__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/oci.py +2 -2
  3. sky/authentication.py +2 -2
  4. sky/backends/backend_utils.py +1 -1
  5. sky/backends/cloud_vm_ray_backend.py +3 -3
  6. sky/check.py +1 -1
  7. sky/cli.py +51 -47
  8. sky/client/cli.py +51 -47
  9. sky/client/common.py +4 -2
  10. sky/client/sdk.py +60 -27
  11. sky/clouds/aws.py +2 -2
  12. sky/clouds/cloud.py +3 -2
  13. sky/clouds/kubernetes.py +20 -3
  14. sky/clouds/nebius.py +2 -4
  15. sky/clouds/oci.py +2 -2
  16. sky/clouds/utils/oci_utils.py +1 -1
  17. sky/core.py +12 -17
  18. sky/data/mounting_utils.py +34 -10
  19. sky/exceptions.py +1 -1
  20. sky/execution.py +5 -4
  21. sky/jobs/client/sdk.py +5 -0
  22. sky/optimizer.py +1 -2
  23. sky/provision/instance_setup.py +3 -1
  24. sky/provision/kubernetes/config.py +41 -36
  25. sky/provision/kubernetes/instance.py +4 -7
  26. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +54 -0
  27. sky/provision/kubernetes/network_utils.py +1 -1
  28. sky/provision/kubernetes/utils.py +51 -35
  29. sky/serve/client/sdk.py +6 -0
  30. sky/server/common.py +16 -1
  31. sky/server/constants.py +5 -0
  32. sky/server/requests/payloads.py +2 -0
  33. sky/setup_files/dependencies.py +1 -1
  34. sky/skylet/constants.py +2 -2
  35. sky/skypilot_config.py +197 -70
  36. sky/templates/kubernetes-ray.yml.j2 +66 -25
  37. sky/templates/websocket_proxy.py +41 -2
  38. sky/utils/config_utils.py +1 -1
  39. sky/utils/controller_utils.py +1 -1
  40. sky/utils/kubernetes/generate_kubeconfig.sh +2 -2
  41. sky/utils/kubernetes/rsync_helper.sh +26 -11
  42. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/METADATA +3 -1
  43. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/RECORD +47 -48
  44. sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml +0 -10
  45. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +0 -68
  46. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/WHEEL +0 -0
  47. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/entry_points.txt +0 -0
  48. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/licenses/LICENSE +0 -0
  49. {skypilot_nightly-1.0.0.dev20250410.dist-info → skypilot_nightly-1.0.0.dev20250412.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '022a5c3ffe258f365764b03cb20fac70934f5a60'
8
+ _SKYPILOT_COMMIT_SHA = 'ac3cbfe07c8c1129480d4facd683f4844ff6e1f7'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250410'
38
+ __version__ = '1.0.0.dev20250412'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/adaptors/oci.py CHANGED
@@ -13,7 +13,7 @@ from sky.clouds.utils import oci_utils
13
13
  # effect.
14
14
  logging.getLogger('oci.circuit_breaker').setLevel(logging.WARNING)
15
15
 
16
- CONFIG_PATH = '~/.oci/config'
16
+ OCI_CONFIG_PATH = '~/.oci/config'
17
17
  ENV_VAR_OCI_CONFIG = 'OCI_CONFIG'
18
18
 
19
19
  oci = common.LazyImport(
@@ -23,7 +23,7 @@ oci = common.LazyImport(
23
23
 
24
24
 
25
25
  def get_config_file() -> str:
26
- conf_file_path = CONFIG_PATH
26
+ conf_file_path = OCI_CONFIG_PATH
27
27
  config_path_via_env_var = os.environ.get(ENV_VAR_OCI_CONFIG)
28
28
  if config_path_via_env_var is not None:
29
29
  conf_file_path = config_path_via_env_var
sky/authentication.py CHANGED
@@ -382,10 +382,10 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
382
382
  network_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
383
383
  network_mode_str)
384
384
  except ValueError as e:
385
- # Add message saying "Please check: ~/.sky/config.yaml" to the error
385
+ # Add message saying "Please check: ~/.sky/skyconfig.yaml" to the error
386
386
  # message.
387
387
  with ux_utils.print_exception_no_traceback():
388
- raise ValueError(str(e) + ' Please check: ~/.sky/config.yaml.') \
388
+ raise ValueError(str(e) + ' Please check: ~/.sky/skyconfig.yaml.') \
389
389
  from None
390
390
  _, public_key_path = get_or_generate_keys()
391
391
 
@@ -682,7 +682,7 @@ def write_cluster_config(
682
682
  ssh_proxy_command = ssh_proxy_command_config[region_name]
683
683
  logger.debug(f'Using ssh_proxy_command: {ssh_proxy_command!r}')
684
684
 
685
- # User-supplied global instance tags from ~/.sky/config.yaml.
685
+ # User-supplied global instance tags from ~/.sky/skyconfig.yaml.
686
686
  labels = skypilot_config.get_nested((str(cloud).lower(), 'labels'), {})
687
687
  # labels is a dict, which is guaranteed by the type check in
688
688
  # schemas.py
@@ -1473,7 +1473,7 @@ class RetryingVmProvisioner(object):
1473
1473
  f'invalid cloud credentials: '
1474
1474
  f'{common_utils.format_exception(e)}')
1475
1475
  except exceptions.InvalidCloudConfigs as e:
1476
- # Failed due to invalid user configs in ~/.sky/config.yaml.
1476
+ # Failed due to invalid user configs in ~/.sky/skyconfig.yaml.
1477
1477
  logger.warning(f'{common_utils.format_exception(e)}')
1478
1478
  # We should block the entire cloud if the user config is
1479
1479
  # invalid.
@@ -2065,10 +2065,10 @@ class RetryingVmProvisioner(object):
2065
2065
  (clouds.Kubernetes, clouds.RunPod)) and
2066
2066
  controller_utils.Controllers.from_name(cluster_name)
2067
2067
  is not None):
2068
- assert (clouds.CloudImplementationFeatures.STOP
2068
+ assert (clouds.CloudImplementationFeatures.AUTOSTOP
2069
2069
  in requested_features), requested_features
2070
2070
  requested_features.remove(
2071
- clouds.CloudImplementationFeatures.STOP)
2071
+ clouds.CloudImplementationFeatures.AUTOSTOP)
2072
2072
 
2073
2073
  # Skip if to_provision.cloud does not support requested features
2074
2074
  to_provision.cloud.check_features_are_supported(
sky/check.py CHANGED
@@ -142,7 +142,7 @@ def check_capabilities(
142
142
  if disallowed_cloud_names:
143
143
  disallowed_clouds_hint = (
144
144
  '\nNote: The following clouds were disabled because they were not '
145
- 'included in allowed_clouds in ~/.sky/config.yaml: '
145
+ 'included in allowed_clouds in ~/.sky/skyconfig.yaml: '
146
146
  f'{", ".join([c for c in disallowed_cloud_names])}')
147
147
  if not all_enabled_clouds:
148
148
  echo(
sky/cli.py CHANGED
@@ -35,7 +35,8 @@ import sys
35
35
  import textwrap
36
36
  import traceback
37
37
  import typing
38
- from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
38
+ from typing import (Any, Callable, Dict, Generator, List, Optional, Set, Tuple,
39
+ Union)
39
40
 
40
41
  import click
41
42
  import colorama
@@ -134,49 +135,51 @@ def _get_cluster_records_and_set_ssh_config(
134
135
  # Update the SSH config for all clusters
135
136
  for record in cluster_records:
136
137
  handle = record['handle']
137
- # During the failover, even though a cluster does not exist, the handle
138
- # can still exist in the record, and we check for credentials to avoid
139
- # updating the SSH config for non-existent clusters.
140
- if (handle is not None and handle.cached_external_ips is not None and
141
- 'credentials' in record):
142
- credentials = record['credentials']
143
- if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
144
- # Replace the proxy command to proxy through the SkyPilot API
145
- # server with websocket.
146
- key_path = (
147
- cluster_utils.SSHConfigHelper.generate_local_key_file(
148
- handle.cluster_name, credentials))
149
- # Instead of directly use websocket_proxy.py, we add an
150
- # additional proxy, so that ssh can use the head pod in the
151
- # cluster to jump to worker pods.
152
- proxy_command = (
153
- f'ssh -tt -i {key_path} '
154
- '-o StrictHostKeyChecking=no '
155
- '-o UserKnownHostsFile=/dev/null '
156
- '-o IdentitiesOnly=yes '
157
- '-W %h:%p '
158
- f'{handle.ssh_user}@127.0.0.1 '
159
- '-o ProxyCommand='
160
- # TODO(zhwu): write the template to a temp file, don't use
161
- # the one in skypilot repo, to avoid changing the file when
162
- # updating skypilot.
163
- f'\'{sys.executable} {sky.__root_dir__}/templates/'
164
- f'websocket_proxy.py '
165
- f'{server_common.get_server_url().split("://")[1]} '
166
- f'{handle.cluster_name}\'')
167
- credentials['ssh_proxy_command'] = proxy_command
168
- cluster_utils.SSHConfigHelper.add_cluster(
169
- handle.cluster_name,
170
- handle.cached_external_ips,
171
- credentials,
172
- handle.cached_external_ssh_ports,
173
- handle.docker_user,
174
- handle.ssh_user,
175
- )
176
- else:
138
+
139
+ if not (handle is not None and handle.cached_external_ips is not None
140
+ and 'credentials' in record):
177
141
  # If the cluster is not UP or does not have credentials available,
178
142
  # we need to remove the cluster from the SSH config.
179
143
  cluster_utils.SSHConfigHelper.remove_cluster(record['name'])
144
+ continue
145
+
146
+ # During the failover, even though a cluster does not exist, the handle
147
+ # can still exist in the record, and we check for credentials to avoid
148
+ # updating the SSH config for non-existent clusters.
149
+ credentials = record['credentials']
150
+ if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
151
+ # Replace the proxy command to proxy through the SkyPilot API
152
+ # server with websocket.
153
+ key_path = (cluster_utils.SSHConfigHelper.generate_local_key_file(
154
+ handle.cluster_name, credentials))
155
+ # Instead of directly use websocket_proxy.py, we add an
156
+ # additional proxy, so that ssh can use the head pod in the
157
+ # cluster to jump to worker pods.
158
+ proxy_command = (
159
+ f'ssh -tt -i {key_path} '
160
+ '-o StrictHostKeyChecking=no '
161
+ '-o UserKnownHostsFile=/dev/null '
162
+ '-o IdentitiesOnly=yes '
163
+ '-W %h:%p '
164
+ f'{handle.ssh_user}@127.0.0.1 '
165
+ '-o ProxyCommand='
166
+ # TODO(zhwu): write the template to a temp file, don't use
167
+ # the one in skypilot repo, to avoid changing the file when
168
+ # updating skypilot.
169
+ f'\'{sys.executable} {sky.__root_dir__}/templates/'
170
+ f'websocket_proxy.py '
171
+ f'{server_common.get_server_url()} '
172
+ f'{handle.cluster_name}\'')
173
+ credentials['ssh_proxy_command'] = proxy_command
174
+
175
+ cluster_utils.SSHConfigHelper.add_cluster(
176
+ handle.cluster_name,
177
+ handle.cached_external_ips,
178
+ credentials,
179
+ handle.cached_external_ssh_ports,
180
+ handle.docker_user,
181
+ handle.ssh_user,
182
+ )
180
183
 
181
184
  # Clean up SSH configs for clusters that do not exist.
182
185
  #
@@ -186,14 +189,15 @@ def _get_cluster_records_and_set_ssh_config(
186
189
  # removing clusters, because SkyPilot has no idea whether to remove
187
190
  # ssh config of a cluster from another user.
188
191
  clusters_exists = set(record['name'] for record in cluster_records)
192
+ clusters_to_remove: Set[str] = set()
189
193
  if clusters is not None:
190
- for cluster in clusters:
191
- if cluster not in clusters_exists:
192
- cluster_utils.SSHConfigHelper.remove_cluster(cluster)
194
+ clusters_to_remove = set(clusters) - clusters_exists
193
195
  elif all_users:
194
- for cluster_name in cluster_utils.SSHConfigHelper.list_cluster_names():
195
- if cluster_name not in clusters_exists:
196
- cluster_utils.SSHConfigHelper.remove_cluster(cluster_name)
196
+ clusters_to_remove = set(cluster_utils.SSHConfigHelper.
197
+ list_cluster_names()) - clusters_exists
198
+
199
+ for cluster_name in clusters_to_remove:
200
+ cluster_utils.SSHConfigHelper.remove_cluster(cluster_name)
197
201
 
198
202
  return cluster_records
199
203
 
sky/client/cli.py CHANGED
@@ -35,7 +35,8 @@ import sys
35
35
  import textwrap
36
36
  import traceback
37
37
  import typing
38
- from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
38
+ from typing import (Any, Callable, Dict, Generator, List, Optional, Set, Tuple,
39
+ Union)
39
40
 
40
41
  import click
41
42
  import colorama
@@ -134,49 +135,51 @@ def _get_cluster_records_and_set_ssh_config(
134
135
  # Update the SSH config for all clusters
135
136
  for record in cluster_records:
136
137
  handle = record['handle']
137
- # During the failover, even though a cluster does not exist, the handle
138
- # can still exist in the record, and we check for credentials to avoid
139
- # updating the SSH config for non-existent clusters.
140
- if (handle is not None and handle.cached_external_ips is not None and
141
- 'credentials' in record):
142
- credentials = record['credentials']
143
- if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
144
- # Replace the proxy command to proxy through the SkyPilot API
145
- # server with websocket.
146
- key_path = (
147
- cluster_utils.SSHConfigHelper.generate_local_key_file(
148
- handle.cluster_name, credentials))
149
- # Instead of directly use websocket_proxy.py, we add an
150
- # additional proxy, so that ssh can use the head pod in the
151
- # cluster to jump to worker pods.
152
- proxy_command = (
153
- f'ssh -tt -i {key_path} '
154
- '-o StrictHostKeyChecking=no '
155
- '-o UserKnownHostsFile=/dev/null '
156
- '-o IdentitiesOnly=yes '
157
- '-W %h:%p '
158
- f'{handle.ssh_user}@127.0.0.1 '
159
- '-o ProxyCommand='
160
- # TODO(zhwu): write the template to a temp file, don't use
161
- # the one in skypilot repo, to avoid changing the file when
162
- # updating skypilot.
163
- f'\'{sys.executable} {sky.__root_dir__}/templates/'
164
- f'websocket_proxy.py '
165
- f'{server_common.get_server_url().split("://")[1]} '
166
- f'{handle.cluster_name}\'')
167
- credentials['ssh_proxy_command'] = proxy_command
168
- cluster_utils.SSHConfigHelper.add_cluster(
169
- handle.cluster_name,
170
- handle.cached_external_ips,
171
- credentials,
172
- handle.cached_external_ssh_ports,
173
- handle.docker_user,
174
- handle.ssh_user,
175
- )
176
- else:
138
+
139
+ if not (handle is not None and handle.cached_external_ips is not None
140
+ and 'credentials' in record):
177
141
  # If the cluster is not UP or does not have credentials available,
178
142
  # we need to remove the cluster from the SSH config.
179
143
  cluster_utils.SSHConfigHelper.remove_cluster(record['name'])
144
+ continue
145
+
146
+ # During the failover, even though a cluster does not exist, the handle
147
+ # can still exist in the record, and we check for credentials to avoid
148
+ # updating the SSH config for non-existent clusters.
149
+ credentials = record['credentials']
150
+ if isinstance(handle.launched_resources.cloud, clouds.Kubernetes):
151
+ # Replace the proxy command to proxy through the SkyPilot API
152
+ # server with websocket.
153
+ key_path = (cluster_utils.SSHConfigHelper.generate_local_key_file(
154
+ handle.cluster_name, credentials))
155
+ # Instead of directly use websocket_proxy.py, we add an
156
+ # additional proxy, so that ssh can use the head pod in the
157
+ # cluster to jump to worker pods.
158
+ proxy_command = (
159
+ f'ssh -tt -i {key_path} '
160
+ '-o StrictHostKeyChecking=no '
161
+ '-o UserKnownHostsFile=/dev/null '
162
+ '-o IdentitiesOnly=yes '
163
+ '-W %h:%p '
164
+ f'{handle.ssh_user}@127.0.0.1 '
165
+ '-o ProxyCommand='
166
+ # TODO(zhwu): write the template to a temp file, don't use
167
+ # the one in skypilot repo, to avoid changing the file when
168
+ # updating skypilot.
169
+ f'\'{sys.executable} {sky.__root_dir__}/templates/'
170
+ f'websocket_proxy.py '
171
+ f'{server_common.get_server_url()} '
172
+ f'{handle.cluster_name}\'')
173
+ credentials['ssh_proxy_command'] = proxy_command
174
+
175
+ cluster_utils.SSHConfigHelper.add_cluster(
176
+ handle.cluster_name,
177
+ handle.cached_external_ips,
178
+ credentials,
179
+ handle.cached_external_ssh_ports,
180
+ handle.docker_user,
181
+ handle.ssh_user,
182
+ )
180
183
 
181
184
  # Clean up SSH configs for clusters that do not exist.
182
185
  #
@@ -186,14 +189,15 @@ def _get_cluster_records_and_set_ssh_config(
186
189
  # removing clusters, because SkyPilot has no idea whether to remove
187
190
  # ssh config of a cluster from another user.
188
191
  clusters_exists = set(record['name'] for record in cluster_records)
192
+ clusters_to_remove: Set[str] = set()
189
193
  if clusters is not None:
190
- for cluster in clusters:
191
- if cluster not in clusters_exists:
192
- cluster_utils.SSHConfigHelper.remove_cluster(cluster)
194
+ clusters_to_remove = set(clusters) - clusters_exists
193
195
  elif all_users:
194
- for cluster_name in cluster_utils.SSHConfigHelper.list_cluster_names():
195
- if cluster_name not in clusters_exists:
196
- cluster_utils.SSHConfigHelper.remove_cluster(cluster_name)
196
+ clusters_to_remove = set(cluster_utils.SSHConfigHelper.
197
+ list_cluster_names()) - clusters_exists
198
+
199
+ for cluster_name in clusters_to_remove:
200
+ cluster_utils.SSHConfigHelper.remove_cluster(cluster_name)
197
201
 
198
202
  return cluster_records
199
203
 
sky/client/common.py CHANGED
@@ -75,7 +75,8 @@ def download_logs_from_api_server(
75
75
  body = payloads.DownloadBody(folder_paths=list(paths_on_api_server),)
76
76
  response = requests.post(f'{server_common.get_server_url()}/download',
77
77
  json=json.loads(body.model_dump_json()),
78
- stream=True)
78
+ stream=True,
79
+ cookies=server_common.get_api_cookie_jar())
79
80
  if response.status_code == 200:
80
81
  remote_home_path = response.headers.get('X-Home-Path')
81
82
  assert remote_home_path is not None, response.headers
@@ -176,7 +177,8 @@ def _upload_chunk_with_retry(params: UploadChunkParams) -> None:
176
177
  },
177
178
  content=FileChunkIterator(f, _UPLOAD_CHUNK_BYTES,
178
179
  params.chunk_index),
179
- headers={'Content-Type': 'application/octet-stream'})
180
+ headers={'Content-Type': 'application/octet-stream'},
181
+ cookies=server_common.get_api_cookie_jar())
180
182
  if response.status_code == 200:
181
183
  data = response.json()
182
184
  status = data.get('status')
sky/client/sdk.py CHANGED
@@ -102,7 +102,8 @@ def check(clouds: Optional[Tuple[str]],
102
102
  """
103
103
  body = payloads.CheckBody(clouds=clouds, verbose=verbose)
104
104
  response = requests.post(f'{server_common.get_server_url()}/check',
105
- json=json.loads(body.model_dump_json()))
105
+ json=json.loads(body.model_dump_json()),
106
+ cookies=server_common.get_api_cookie_jar())
106
107
  return server_common.get_request_id(response)
107
108
 
108
109
 
@@ -118,7 +119,8 @@ def enabled_clouds() -> server_common.RequestId:
118
119
  Request Returns:
119
120
  A list of enabled clouds in string format.
120
121
  """
121
- response = requests.get(f'{server_common.get_server_url()}/enabled_clouds')
122
+ response = requests.get(f'{server_common.get_server_url()}/enabled_clouds',
123
+ cookies=server_common.get_api_cookie_jar())
122
124
  return server_common.get_request_id(response)
123
125
 
124
126
 
@@ -168,7 +170,8 @@ def list_accelerators(gpus_only: bool = True,
168
170
  )
169
171
  response = requests.post(
170
172
  f'{server_common.get_server_url()}/list_accelerators',
171
- json=json.loads(body.model_dump_json()))
173
+ json=json.loads(body.model_dump_json()),
174
+ cookies=server_common.get_api_cookie_jar())
172
175
  return server_common.get_request_id(response)
173
176
 
174
177
 
@@ -208,7 +211,8 @@ def list_accelerator_counts(
208
211
  )
209
212
  response = requests.post(
210
213
  f'{server_common.get_server_url()}/list_accelerator_counts',
211
- json=json.loads(body.model_dump_json()))
214
+ json=json.loads(body.model_dump_json()),
215
+ cookies=server_common.get_api_cookie_jar())
212
216
  return server_common.get_request_id(response)
213
217
 
214
218
 
@@ -246,7 +250,8 @@ def optimize(
246
250
  minimize=minimize,
247
251
  request_options=admin_policy_request_options)
248
252
  response = requests.post(f'{server_common.get_server_url()}/optimize',
249
- json=json.loads(body.model_dump_json()))
253
+ json=json.loads(body.model_dump_json()),
254
+ cookies=server_common.get_api_cookie_jar())
250
255
  return server_common.get_request_id(response)
251
256
 
252
257
 
@@ -281,7 +286,8 @@ def validate(
281
286
  body = payloads.ValidateBody(dag=dag_str,
282
287
  request_options=admin_policy_request_options)
283
288
  response = requests.post(f'{server_common.get_server_url()}/validate',
284
- json=json.loads(body.model_dump_json()))
289
+ json=json.loads(body.model_dump_json()),
290
+ cookies=server_common.get_api_cookie_jar())
285
291
  if response.status_code == 400:
286
292
  with ux_utils.print_exception_no_traceback():
287
293
  raise exceptions.deserialize_exception(
@@ -493,6 +499,7 @@ def launch(
493
499
  f'{server_common.get_server_url()}/launch',
494
500
  json=json.loads(body.model_dump_json()),
495
501
  timeout=5,
502
+ cookies=server_common.get_api_cookie_jar(),
496
503
  )
497
504
  return server_common.get_request_id(response)
498
505
 
@@ -576,6 +583,7 @@ def exec( # pylint: disable=redefined-builtin
576
583
  f'{server_common.get_server_url()}/exec',
577
584
  json=json.loads(body.model_dump_json()),
578
585
  timeout=5,
586
+ cookies=server_common.get_api_cookie_jar(),
579
587
  )
580
588
  return server_common.get_request_id(response)
581
589
 
@@ -626,7 +634,8 @@ def tail_logs(cluster_name: str,
626
634
  json=json.loads(body.model_dump_json()),
627
635
  stream=True,
628
636
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
629
- None))
637
+ None),
638
+ cookies=server_common.get_api_cookie_jar())
630
639
  request_id = server_common.get_request_id(response)
631
640
  return stream_response(request_id, response, output_stream)
632
641
 
@@ -663,7 +672,8 @@ def download_logs(cluster_name: str,
663
672
  job_ids=job_ids,
664
673
  )
665
674
  response = requests.post(f'{server_common.get_server_url()}/download_logs',
666
- json=json.loads(body.model_dump_json()))
675
+ json=json.loads(body.model_dump_json()),
676
+ cookies=server_common.get_api_cookie_jar())
667
677
  job_id_remote_path_dict = stream_and_get(
668
678
  server_common.get_request_id(response))
669
679
  remote2local_path_dict = client_common.download_logs_from_api_server(
@@ -745,6 +755,7 @@ def start(
745
755
  f'{server_common.get_server_url()}/start',
746
756
  json=json.loads(body.model_dump_json()),
747
757
  timeout=5,
758
+ cookies=server_common.get_api_cookie_jar(),
748
759
  )
749
760
  return server_common.get_request_id(response)
750
761
 
@@ -790,6 +801,7 @@ def down(cluster_name: str, purge: bool = False) -> server_common.RequestId:
790
801
  f'{server_common.get_server_url()}/down',
791
802
  json=json.loads(body.model_dump_json()),
792
803
  timeout=5,
804
+ cookies=server_common.get_api_cookie_jar(),
793
805
  )
794
806
  return server_common.get_request_id(response)
795
807
 
@@ -838,6 +850,7 @@ def stop(cluster_name: str, purge: bool = False) -> server_common.RequestId:
838
850
  f'{server_common.get_server_url()}/stop',
839
851
  json=json.loads(body.model_dump_json()),
840
852
  timeout=5,
853
+ cookies=server_common.get_api_cookie_jar(),
841
854
  )
842
855
  return server_common.get_request_id(response)
843
856
 
@@ -907,6 +920,7 @@ def autostop(
907
920
  f'{server_common.get_server_url()}/autostop',
908
921
  json=json.loads(body.model_dump_json()),
909
922
  timeout=5,
923
+ cookies=server_common.get_api_cookie_jar(),
910
924
  )
911
925
  return server_common.get_request_id(response)
912
926
 
@@ -966,7 +980,8 @@ def queue(cluster_name: str,
966
980
  all_users=all_users,
967
981
  )
968
982
  response = requests.post(f'{server_common.get_server_url()}/queue',
969
- json=json.loads(body.model_dump_json()))
983
+ json=json.loads(body.model_dump_json()),
984
+ cookies=server_common.get_api_cookie_jar())
970
985
  return server_common.get_request_id(response)
971
986
 
972
987
 
@@ -1007,7 +1022,8 @@ def job_status(cluster_name: str,
1007
1022
  job_ids=job_ids,
1008
1023
  )
1009
1024
  response = requests.post(f'{server_common.get_server_url()}/job_status',
1010
- json=json.loads(body.model_dump_json()))
1025
+ json=json.loads(body.model_dump_json()),
1026
+ cookies=server_common.get_api_cookie_jar())
1011
1027
  return server_common.get_request_id(response)
1012
1028
 
1013
1029
 
@@ -1060,7 +1076,8 @@ def cancel(
1060
1076
  try_cancel_if_cluster_is_init=_try_cancel_if_cluster_is_init,
1061
1077
  )
1062
1078
  response = requests.post(f'{server_common.get_server_url()}/cancel',
1063
- json=json.loads(body.model_dump_json()))
1079
+ json=json.loads(body.model_dump_json()),
1080
+ cookies=server_common.get_api_cookie_jar())
1064
1081
  return server_common.get_request_id(response)
1065
1082
 
1066
1083
 
@@ -1155,7 +1172,8 @@ def status(
1155
1172
  all_users=all_users,
1156
1173
  )
1157
1174
  response = requests.post(f'{server_common.get_server_url()}/status',
1158
- json=json.loads(body.model_dump_json()))
1175
+ json=json.loads(body.model_dump_json()),
1176
+ cookies=server_common.get_api_cookie_jar())
1159
1177
  return server_common.get_request_id(response)
1160
1178
 
1161
1179
 
@@ -1189,7 +1207,8 @@ def endpoints(
1189
1207
  port=port,
1190
1208
  )
1191
1209
  response = requests.post(f'{server_common.get_server_url()}/endpoints',
1192
- json=json.loads(body.model_dump_json()))
1210
+ json=json.loads(body.model_dump_json()),
1211
+ cookies=server_common.get_api_cookie_jar())
1193
1212
  return server_common.get_request_id(response)
1194
1213
 
1195
1214
 
@@ -1227,7 +1246,8 @@ def cost_report() -> server_common.RequestId: # pylint: disable=redefined-built
1227
1246
  'total_cost': (float) cost given resources and usage intervals,
1228
1247
  }
1229
1248
  """
1230
- response = requests.get(f'{server_common.get_server_url()}/cost_report')
1249
+ response = requests.get(f'{server_common.get_server_url()}/cost_report',
1250
+ cookies=server_common.get_api_cookie_jar())
1231
1251
  return server_common.get_request_id(response)
1232
1252
 
1233
1253
 
@@ -1256,7 +1276,8 @@ def storage_ls() -> server_common.RequestId:
1256
1276
  }
1257
1277
  ]
1258
1278
  """
1259
- response = requests.get(f'{server_common.get_server_url()}/storage/ls')
1279
+ response = requests.get(f'{server_common.get_server_url()}/storage/ls',
1280
+ cookies=server_common.get_api_cookie_jar())
1260
1281
  return server_common.get_request_id(response)
1261
1282
 
1262
1283
 
@@ -1280,7 +1301,8 @@ def storage_delete(name: str) -> server_common.RequestId:
1280
1301
  """
1281
1302
  body = payloads.StorageBody(name=name)
1282
1303
  response = requests.post(f'{server_common.get_server_url()}/storage/delete',
1283
- json=json.loads(body.model_dump_json()))
1304
+ json=json.loads(body.model_dump_json()),
1305
+ cookies=server_common.get_api_cookie_jar())
1284
1306
  return server_common.get_request_id(response)
1285
1307
 
1286
1308
 
@@ -1318,7 +1340,8 @@ def local_up(gpus: bool,
1318
1340
  context_name=context_name,
1319
1341
  password=password)
1320
1342
  response = requests.post(f'{server_common.get_server_url()}/local_up',
1321
- json=json.loads(body.model_dump_json()))
1343
+ json=json.loads(body.model_dump_json()),
1344
+ cookies=server_common.get_api_cookie_jar())
1322
1345
  return server_common.get_request_id(response)
1323
1346
 
1324
1347
 
@@ -1334,7 +1357,8 @@ def local_down() -> server_common.RequestId:
1334
1357
  with ux_utils.print_exception_no_traceback():
1335
1358
  raise ValueError('sky local down is only supported when running '
1336
1359
  'SkyPilot locally.')
1337
- response = requests.post(f'{server_common.get_server_url()}/local_down')
1360
+ response = requests.post(f'{server_common.get_server_url()}/local_down',
1361
+ cookies=server_common.get_api_cookie_jar())
1338
1362
  return server_common.get_request_id(response)
1339
1363
 
1340
1364
 
@@ -1358,7 +1382,8 @@ def realtime_kubernetes_gpu_availability(
1358
1382
  response = requests.post(
1359
1383
  f'{server_common.get_server_url()}/'
1360
1384
  'realtime_kubernetes_gpu_availability',
1361
- json=json.loads(body.model_dump_json()))
1385
+ json=json.loads(body.model_dump_json()),
1386
+ cookies=server_common.get_api_cookie_jar())
1362
1387
  return server_common.get_request_id(response)
1363
1388
 
1364
1389
 
@@ -1389,7 +1414,8 @@ def kubernetes_node_info(
1389
1414
  body = payloads.KubernetesNodeInfoRequestBody(context=context)
1390
1415
  response = requests.post(
1391
1416
  f'{server_common.get_server_url()}/kubernetes_node_info',
1392
- json=json.loads(body.model_dump_json()))
1417
+ json=json.loads(body.model_dump_json()),
1418
+ cookies=server_common.get_api_cookie_jar())
1393
1419
  return server_common.get_request_id(response)
1394
1420
 
1395
1421
 
@@ -1418,7 +1444,8 @@ def status_kubernetes() -> server_common.RequestId:
1418
1444
  - context: Kubernetes context used to fetch the cluster information.
1419
1445
  """
1420
1446
  response = requests.get(
1421
- f'{server_common.get_server_url()}/status_kubernetes')
1447
+ f'{server_common.get_server_url()}/status_kubernetes',
1448
+ cookies=server_common.get_api_cookie_jar())
1422
1449
  return server_common.get_request_id(response)
1423
1450
 
1424
1451
 
@@ -1444,7 +1471,8 @@ def get(request_id: str) -> Any:
1444
1471
  response = requests.get(
1445
1472
  f'{server_common.get_server_url()}/api/get?request_id={request_id}',
1446
1473
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
1447
- None))
1474
+ None),
1475
+ cookies=server_common.get_api_cookie_jar())
1448
1476
  request_task = None
1449
1477
  if response.status_code == 200:
1450
1478
  request_task = requests_lib.Request.decode(
@@ -1523,7 +1551,8 @@ def stream_and_get(
1523
1551
  params=params,
1524
1552
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
1525
1553
  None),
1526
- stream=True)
1554
+ stream=True,
1555
+ cookies=server_common.get_api_cookie_jar())
1527
1556
  if response.status_code in [404, 400]:
1528
1557
  detail = response.json().get('detail')
1529
1558
  with ux_utils.print_exception_no_traceback():
@@ -1579,7 +1608,8 @@ def api_cancel(request_ids: Optional[Union[str, List[str]]] = None,
1579
1608
 
1580
1609
  response = requests.post(f'{server_common.get_server_url()}/api/cancel',
1581
1610
  json=json.loads(body.model_dump_json()),
1582
- timeout=5)
1611
+ timeout=5,
1612
+ cookies=server_common.get_api_cookie_jar())
1583
1613
  return server_common.get_request_id(response)
1584
1614
 
1585
1615
 
@@ -1607,7 +1637,8 @@ def api_status(
1607
1637
  f'{server_common.get_server_url()}/api/status',
1608
1638
  params=server_common.request_body_to_params(body),
1609
1639
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
1610
- None))
1640
+ None),
1641
+ cookies=server_common.get_api_cookie_jar())
1611
1642
  server_common.handle_request_error(response)
1612
1643
  return [
1613
1644
  requests_lib.RequestPayload(**request) for request in response.json()
@@ -1634,7 +1665,8 @@ def api_info() -> Dict[str, str]:
1634
1665
  }
1635
1666
 
1636
1667
  """
1637
- response = requests.get(f'{server_common.get_server_url()}/api/health')
1668
+ response = requests.get(f'{server_common.get_server_url()}/api/health',
1669
+ cookies=server_common.get_api_cookie_jar())
1638
1670
  response.raise_for_status()
1639
1671
  return response.json()
1640
1672
 
@@ -1780,7 +1812,8 @@ def api_login(endpoint: Optional[str] = None) -> None:
1780
1812
  server_common.check_server_healthy(endpoint)
1781
1813
 
1782
1814
  # Set the endpoint in the config file
1783
- config_path = pathlib.Path(skypilot_config.CONFIG_PATH).expanduser()
1815
+ config_path = pathlib.Path(
1816
+ skypilot_config.get_user_config_path()).expanduser()
1784
1817
  with filelock.FileLock(config_path.with_suffix('.lock')):
1785
1818
  if not skypilot_config.loaded():
1786
1819
  config_path.touch()