skypilot-nightly 1.0.0.dev20250408__py3-none-any.whl → 1.0.0.dev20250411__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/azure.py +1 -1
  3. sky/adaptors/nebius.py +5 -27
  4. sky/backends/backend.py +9 -7
  5. sky/backends/cloud_vm_ray_backend.py +7 -7
  6. sky/backends/local_docker_backend.py +3 -3
  7. sky/client/common.py +4 -2
  8. sky/client/sdk.py +58 -26
  9. sky/cloud_stores.py +0 -4
  10. sky/clouds/do.py +4 -5
  11. sky/clouds/gcp.py +5 -3
  12. sky/clouds/nebius.py +22 -12
  13. sky/clouds/service_catalog/data_fetchers/fetch_ibm.py +1 -2
  14. sky/clouds/service_catalog/gcp_catalog.py +37 -10
  15. sky/core.py +6 -6
  16. sky/data/data_utils.py +5 -9
  17. sky/data/mounting_utils.py +1 -1
  18. sky/data/storage.py +25 -31
  19. sky/data/storage_utils.py +27 -18
  20. sky/execution.py +11 -4
  21. sky/jobs/client/sdk.py +5 -0
  22. sky/jobs/server/server.py +5 -1
  23. sky/optimizer.py +1 -2
  24. sky/provision/do/utils.py +19 -16
  25. sky/provision/gcp/config.py +30 -20
  26. sky/serve/client/sdk.py +6 -0
  27. sky/server/common.py +16 -1
  28. sky/server/constants.py +5 -0
  29. sky/setup_files/dependencies.py +1 -1
  30. sky/skylet/log_lib.py +4 -0
  31. sky/skypilot_config.py +19 -30
  32. sky/task.py +27 -7
  33. sky/utils/schemas.py +25 -7
  34. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/METADATA +2 -2
  35. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/RECORD +39 -39
  36. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/WHEEL +0 -0
  37. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/entry_points.txt +0 -0
  38. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/licenses/LICENSE +0 -0
  39. {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'e0674be528e87191ade88961c44c6449d01232fa'
8
+ _SKYPILOT_COMMIT_SHA = 'b4202948016cdf48a5939ea9bf1769a2d31f73bf'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250408'
38
+ __version__ = '1.0.0.dev20250411'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/adaptors/azure.py CHANGED
@@ -232,7 +232,7 @@ def get_client(name: str,
232
232
  'Must provide resource_group_name keyword '
233
233
  'arguments for container client.')
234
234
  sky_logger.info(
235
- 'Failed to check the existance of the '
235
+ 'Failed to check the existence of the '
236
236
  f'container {container_url!r} due to '
237
237
  'insufficient IAM role for storage '
238
238
  f'account {storage_account_name!r}.')
sky/adaptors/nebius.py CHANGED
@@ -1,7 +1,6 @@
1
1
  """Nebius cloud adaptor."""
2
2
  import os
3
3
  import threading
4
- from typing import Optional
5
4
 
6
5
  from sky.adaptors import common
7
6
  from sky.utils import annotations
@@ -168,7 +167,7 @@ def session():
168
167
 
169
168
 
170
169
  @annotations.lru_cache(scope='global')
171
- def resource(resource_name: str, region: str = DEFAULT_REGION, **kwargs):
170
+ def resource(resource_name: str, **kwargs):
172
171
  """Create a Nebius resource.
173
172
 
174
173
  Args:
@@ -181,21 +180,13 @@ def resource(resource_name: str, region: str = DEFAULT_REGION, **kwargs):
181
180
  # Reference: https://stackoverflow.com/a/59635814
182
181
 
183
182
  session_ = session()
184
- nebius_credentials = get_nebius_credentials(session_)
185
- endpoint = create_endpoint(region)
186
183
 
187
- return session_.resource(
188
- resource_name,
189
- endpoint_url=endpoint,
190
- aws_access_key_id=nebius_credentials.access_key,
191
- aws_secret_access_key=nebius_credentials.secret_key,
192
- region_name=region,
193
- **kwargs)
184
+ return session_.resource(resource_name, **kwargs)
194
185
 
195
186
 
196
187
  @annotations.lru_cache(scope='global')
197
- def client(service_name: str, region):
198
- """Create an Nebius client of a certain service.
188
+ def client(service_name: str):
189
+ """Create Nebius client of a certain service.
199
190
 
200
191
  Args:
201
192
  service_name: Nebius service name (e.g., 's3').
@@ -207,14 +198,8 @@ def client(service_name: str, region):
207
198
  # Reference: https://stackoverflow.com/a/59635814
208
199
 
209
200
  session_ = session()
210
- nebius_credentials = get_nebius_credentials(session_)
211
- endpoint = create_endpoint(region)
212
201
 
213
- return session_.client(service_name,
214
- endpoint_url=endpoint,
215
- aws_access_key_id=nebius_credentials.access_key,
216
- aws_secret_access_key=nebius_credentials.secret_key,
217
- region_name=region)
202
+ return session_.client(service_name)
218
203
 
219
204
 
220
205
  @common.load_lazy_modules(_LAZY_MODULES)
@@ -223,10 +208,3 @@ def botocore_exceptions():
223
208
  # pylint: disable=import-outside-toplevel
224
209
  from botocore import exceptions
225
210
  return exceptions
226
-
227
-
228
- def create_endpoint(region: Optional[str] = DEFAULT_REGION) -> str:
229
- """Reads accountid necessary to interact with Nebius Object Storage"""
230
- if region is None:
231
- region = DEFAULT_REGION
232
- return f'https://storage.{region}.nebius.cloud:443'
sky/backends/backend.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Sky backend interface."""
2
2
  import typing
3
- from typing import Dict, Generic, Optional
3
+ from typing import Dict, Generic, Optional, Tuple
4
4
 
5
5
  from sky.usage import usage_lib
6
6
  from sky.utils import cluster_utils
@@ -53,7 +53,7 @@ class Backend(Generic[_ResourceHandleType]):
53
53
  cluster_name: Optional[str] = None,
54
54
  retry_until_up: bool = False,
55
55
  skip_unnecessary_provisioning: bool = False,
56
- ) -> Optional[_ResourceHandleType]:
56
+ ) -> Tuple[Optional[_ResourceHandleType], bool]:
57
57
  """Provisions resources for the given task.
58
58
 
59
59
  Args:
@@ -68,13 +68,15 @@ class Backend(Generic[_ResourceHandleType]):
68
68
  the existing cluster will be reused and re-provisioned.
69
69
  retry_until_up: If True, retry provisioning until resources are
70
70
  successfully launched.
71
- skip_if_no_cluster_updates: If True, compare the cluster config to
72
- the existing cluster_name's config. Skip provisioning if no
71
+ skip_unnecessary_provisioning: If True, compare the cluster config
72
+ to the existing cluster_name's config. Skip provisioning if no
73
73
  updates are needed for the existing cluster.
74
74
 
75
75
  Returns:
76
- A ResourceHandle object for the provisioned resources, or None if
77
- dryrun is True.
76
+ - A ResourceHandle object for the provisioned resources, or None if
77
+ dryrun is True.
78
+ - A boolean that is True if the provisioning was skipped, and False
79
+ if provisioning actually happened. Dryrun always gives False.
78
80
  """
79
81
  if cluster_name is None:
80
82
  cluster_name = cluster_utils.generate_cluster_name()
@@ -159,7 +161,7 @@ class Backend(Generic[_ResourceHandleType]):
159
161
  cluster_name: str,
160
162
  retry_until_up: bool = False,
161
163
  skip_unnecessary_provisioning: bool = False,
162
- ) -> Optional[_ResourceHandleType]:
164
+ ) -> Tuple[Optional[_ResourceHandleType], bool]:
163
165
  raise NotImplementedError
164
166
 
165
167
  def _sync_workdir(self, handle: _ResourceHandleType, workdir: Path) -> None:
@@ -2829,7 +2829,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2829
2829
  cluster_name: str,
2830
2830
  retry_until_up: bool = False,
2831
2831
  skip_unnecessary_provisioning: bool = False,
2832
- ) -> Optional[CloudVmRayResourceHandle]:
2832
+ ) -> Tuple[Optional[CloudVmRayResourceHandle], bool]:
2833
2833
  """Provisions the cluster, or re-provisions an existing cluster.
2834
2834
 
2835
2835
  Use the SKYPILOT provisioner if it's supported by the cloud, otherwise
@@ -2969,7 +2969,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2969
2969
  failover_history=e.failover_history) from None
2970
2970
  if dryrun:
2971
2971
  record = global_user_state.get_cluster_from_name(cluster_name)
2972
- return record['handle'] if record is not None else None
2972
+ return record['handle'] if record is not None else None, False
2973
2973
 
2974
2974
  if config_dict['provisioning_skipped']:
2975
2975
  # Skip further provisioning.
@@ -2980,7 +2980,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
2980
2980
  record = global_user_state.get_cluster_from_name(cluster_name)
2981
2981
  assert record is not None and record['handle'] is not None, (
2982
2982
  cluster_name, record)
2983
- return record['handle']
2983
+ return record['handle'], True
2984
2984
 
2985
2985
  if 'provision_record' in config_dict:
2986
2986
  # New provisioner is used here.
@@ -3022,7 +3022,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3022
3022
  self._update_after_cluster_provisioned(
3023
3023
  handle, to_provision_config.prev_handle, task,
3024
3024
  prev_cluster_status, lock_path, config_hash)
3025
- return handle
3025
+ return handle, False
3026
3026
 
3027
3027
  cluster_config_file = config_dict['ray']
3028
3028
  handle = config_dict['handle']
@@ -3094,7 +3094,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3094
3094
  self._update_after_cluster_provisioned(
3095
3095
  handle, to_provision_config.prev_handle, task,
3096
3096
  prev_cluster_status, lock_path, config_hash)
3097
- return handle
3097
+ return handle, False
3098
3098
 
3099
3099
  def _open_ports(self, handle: CloudVmRayResourceHandle) -> None:
3100
3100
  cloud = handle.launched_resources.cloud
@@ -3435,7 +3435,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3435
3435
  mkdir_code = (f'{cd} && mkdir -p {remote_log_dir} && '
3436
3436
  f'touch {remote_log_path}')
3437
3437
  encoded_script = shlex.quote(codegen)
3438
- create_script_code = (f'{{ echo {encoded_script} > {script_path}; }}')
3438
+ create_script_code = f'{{ echo {encoded_script} > {script_path}; }}'
3439
3439
  job_submit_cmd = (
3440
3440
  # JOB_CMD_IDENTIFIER is used for identifying the process retrieved
3441
3441
  # with pid is the same driver process.
@@ -4331,7 +4331,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4331
4331
  cluster_name_on_cloud = handle.cluster_name_on_cloud
4332
4332
  cloud = handle.launched_resources.cloud
4333
4333
 
4334
- if (terminate and handle.launched_resources.is_image_managed is True):
4334
+ if terminate and handle.launched_resources.is_image_managed is True:
4335
4335
  # Delete the image when terminating a "cloned" cluster, i.e.,
4336
4336
  # whose image is created by SkyPilot (--clone-disk-from)
4337
4337
  logger.debug(f'Deleting image {handle.launched_resources.image_id}')
@@ -139,7 +139,7 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
139
139
  cluster_name: str,
140
140
  retry_until_up: bool = False,
141
141
  skip_unnecessary_provisioning: bool = False,
142
- ) -> Optional[LocalDockerResourceHandle]:
142
+ ) -> Tuple[Optional[LocalDockerResourceHandle], bool]:
143
143
  """Builds docker image for the task and returns cluster name as handle.
144
144
 
145
145
  Since resource demands are ignored, There's no provisioning in local
@@ -149,7 +149,7 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
149
149
  assert task.name is not None, ('Task name cannot be None - have you '
150
150
  'specified a task name?')
151
151
  if dryrun:
152
- return None
152
+ return None, False
153
153
  if retry_until_up:
154
154
  logger.warning(
155
155
  f'Retrying until up is not supported in backend: {self.NAME}. '
@@ -175,7 +175,7 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
175
175
  requested_resources=set(
176
176
  task.resources),
177
177
  ready=False)
178
- return handle
178
+ return handle, False
179
179
 
180
180
  def _sync_workdir(self, handle: LocalDockerResourceHandle,
181
181
  workdir: Path) -> None:
sky/client/common.py CHANGED
@@ -75,7 +75,8 @@ def download_logs_from_api_server(
75
75
  body = payloads.DownloadBody(folder_paths=list(paths_on_api_server),)
76
76
  response = requests.post(f'{server_common.get_server_url()}/download',
77
77
  json=json.loads(body.model_dump_json()),
78
- stream=True)
78
+ stream=True,
79
+ cookies=server_common.get_api_cookie_jar())
79
80
  if response.status_code == 200:
80
81
  remote_home_path = response.headers.get('X-Home-Path')
81
82
  assert remote_home_path is not None, response.headers
@@ -176,7 +177,8 @@ def _upload_chunk_with_retry(params: UploadChunkParams) -> None:
176
177
  },
177
178
  content=FileChunkIterator(f, _UPLOAD_CHUNK_BYTES,
178
179
  params.chunk_index),
179
- headers={'Content-Type': 'application/octet-stream'})
180
+ headers={'Content-Type': 'application/octet-stream'},
181
+ cookies=server_common.get_api_cookie_jar())
180
182
  if response.status_code == 200:
181
183
  data = response.json()
182
184
  status = data.get('status')
sky/client/sdk.py CHANGED
@@ -102,7 +102,8 @@ def check(clouds: Optional[Tuple[str]],
102
102
  """
103
103
  body = payloads.CheckBody(clouds=clouds, verbose=verbose)
104
104
  response = requests.post(f'{server_common.get_server_url()}/check',
105
- json=json.loads(body.model_dump_json()))
105
+ json=json.loads(body.model_dump_json()),
106
+ cookies=server_common.get_api_cookie_jar())
106
107
  return server_common.get_request_id(response)
107
108
 
108
109
 
@@ -118,7 +119,8 @@ def enabled_clouds() -> server_common.RequestId:
118
119
  Request Returns:
119
120
  A list of enabled clouds in string format.
120
121
  """
121
- response = requests.get(f'{server_common.get_server_url()}/enabled_clouds')
122
+ response = requests.get(f'{server_common.get_server_url()}/enabled_clouds',
123
+ cookies=server_common.get_api_cookie_jar())
122
124
  return server_common.get_request_id(response)
123
125
 
124
126
 
@@ -168,7 +170,8 @@ def list_accelerators(gpus_only: bool = True,
168
170
  )
169
171
  response = requests.post(
170
172
  f'{server_common.get_server_url()}/list_accelerators',
171
- json=json.loads(body.model_dump_json()))
173
+ json=json.loads(body.model_dump_json()),
174
+ cookies=server_common.get_api_cookie_jar())
172
175
  return server_common.get_request_id(response)
173
176
 
174
177
 
@@ -208,7 +211,8 @@ def list_accelerator_counts(
208
211
  )
209
212
  response = requests.post(
210
213
  f'{server_common.get_server_url()}/list_accelerator_counts',
211
- json=json.loads(body.model_dump_json()))
214
+ json=json.loads(body.model_dump_json()),
215
+ cookies=server_common.get_api_cookie_jar())
212
216
  return server_common.get_request_id(response)
213
217
 
214
218
 
@@ -246,7 +250,8 @@ def optimize(
246
250
  minimize=minimize,
247
251
  request_options=admin_policy_request_options)
248
252
  response = requests.post(f'{server_common.get_server_url()}/optimize',
249
- json=json.loads(body.model_dump_json()))
253
+ json=json.loads(body.model_dump_json()),
254
+ cookies=server_common.get_api_cookie_jar())
250
255
  return server_common.get_request_id(response)
251
256
 
252
257
 
@@ -281,7 +286,8 @@ def validate(
281
286
  body = payloads.ValidateBody(dag=dag_str,
282
287
  request_options=admin_policy_request_options)
283
288
  response = requests.post(f'{server_common.get_server_url()}/validate',
284
- json=json.loads(body.model_dump_json()))
289
+ json=json.loads(body.model_dump_json()),
290
+ cookies=server_common.get_api_cookie_jar())
285
291
  if response.status_code == 400:
286
292
  with ux_utils.print_exception_no_traceback():
287
293
  raise exceptions.deserialize_exception(
@@ -493,6 +499,7 @@ def launch(
493
499
  f'{server_common.get_server_url()}/launch',
494
500
  json=json.loads(body.model_dump_json()),
495
501
  timeout=5,
502
+ cookies=server_common.get_api_cookie_jar(),
496
503
  )
497
504
  return server_common.get_request_id(response)
498
505
 
@@ -576,6 +583,7 @@ def exec( # pylint: disable=redefined-builtin
576
583
  f'{server_common.get_server_url()}/exec',
577
584
  json=json.loads(body.model_dump_json()),
578
585
  timeout=5,
586
+ cookies=server_common.get_api_cookie_jar(),
579
587
  )
580
588
  return server_common.get_request_id(response)
581
589
 
@@ -626,7 +634,8 @@ def tail_logs(cluster_name: str,
626
634
  json=json.loads(body.model_dump_json()),
627
635
  stream=True,
628
636
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
629
- None))
637
+ None),
638
+ cookies=server_common.get_api_cookie_jar())
630
639
  request_id = server_common.get_request_id(response)
631
640
  return stream_response(request_id, response, output_stream)
632
641
 
@@ -663,7 +672,8 @@ def download_logs(cluster_name: str,
663
672
  job_ids=job_ids,
664
673
  )
665
674
  response = requests.post(f'{server_common.get_server_url()}/download_logs',
666
- json=json.loads(body.model_dump_json()))
675
+ json=json.loads(body.model_dump_json()),
676
+ cookies=server_common.get_api_cookie_jar())
667
677
  job_id_remote_path_dict = stream_and_get(
668
678
  server_common.get_request_id(response))
669
679
  remote2local_path_dict = client_common.download_logs_from_api_server(
@@ -745,6 +755,7 @@ def start(
745
755
  f'{server_common.get_server_url()}/start',
746
756
  json=json.loads(body.model_dump_json()),
747
757
  timeout=5,
758
+ cookies=server_common.get_api_cookie_jar(),
748
759
  )
749
760
  return server_common.get_request_id(response)
750
761
 
@@ -790,6 +801,7 @@ def down(cluster_name: str, purge: bool = False) -> server_common.RequestId:
790
801
  f'{server_common.get_server_url()}/down',
791
802
  json=json.loads(body.model_dump_json()),
792
803
  timeout=5,
804
+ cookies=server_common.get_api_cookie_jar(),
793
805
  )
794
806
  return server_common.get_request_id(response)
795
807
 
@@ -838,6 +850,7 @@ def stop(cluster_name: str, purge: bool = False) -> server_common.RequestId:
838
850
  f'{server_common.get_server_url()}/stop',
839
851
  json=json.loads(body.model_dump_json()),
840
852
  timeout=5,
853
+ cookies=server_common.get_api_cookie_jar(),
841
854
  )
842
855
  return server_common.get_request_id(response)
843
856
 
@@ -907,6 +920,7 @@ def autostop(
907
920
  f'{server_common.get_server_url()}/autostop',
908
921
  json=json.loads(body.model_dump_json()),
909
922
  timeout=5,
923
+ cookies=server_common.get_api_cookie_jar(),
910
924
  )
911
925
  return server_common.get_request_id(response)
912
926
 
@@ -966,7 +980,8 @@ def queue(cluster_name: str,
966
980
  all_users=all_users,
967
981
  )
968
982
  response = requests.post(f'{server_common.get_server_url()}/queue',
969
- json=json.loads(body.model_dump_json()))
983
+ json=json.loads(body.model_dump_json()),
984
+ cookies=server_common.get_api_cookie_jar())
970
985
  return server_common.get_request_id(response)
971
986
 
972
987
 
@@ -1007,7 +1022,8 @@ def job_status(cluster_name: str,
1007
1022
  job_ids=job_ids,
1008
1023
  )
1009
1024
  response = requests.post(f'{server_common.get_server_url()}/job_status',
1010
- json=json.loads(body.model_dump_json()))
1025
+ json=json.loads(body.model_dump_json()),
1026
+ cookies=server_common.get_api_cookie_jar())
1011
1027
  return server_common.get_request_id(response)
1012
1028
 
1013
1029
 
@@ -1060,7 +1076,8 @@ def cancel(
1060
1076
  try_cancel_if_cluster_is_init=_try_cancel_if_cluster_is_init,
1061
1077
  )
1062
1078
  response = requests.post(f'{server_common.get_server_url()}/cancel',
1063
- json=json.loads(body.model_dump_json()))
1079
+ json=json.loads(body.model_dump_json()),
1080
+ cookies=server_common.get_api_cookie_jar())
1064
1081
  return server_common.get_request_id(response)
1065
1082
 
1066
1083
 
@@ -1155,7 +1172,8 @@ def status(
1155
1172
  all_users=all_users,
1156
1173
  )
1157
1174
  response = requests.post(f'{server_common.get_server_url()}/status',
1158
- json=json.loads(body.model_dump_json()))
1175
+ json=json.loads(body.model_dump_json()),
1176
+ cookies=server_common.get_api_cookie_jar())
1159
1177
  return server_common.get_request_id(response)
1160
1178
 
1161
1179
 
@@ -1189,7 +1207,8 @@ def endpoints(
1189
1207
  port=port,
1190
1208
  )
1191
1209
  response = requests.post(f'{server_common.get_server_url()}/endpoints',
1192
- json=json.loads(body.model_dump_json()))
1210
+ json=json.loads(body.model_dump_json()),
1211
+ cookies=server_common.get_api_cookie_jar())
1193
1212
  return server_common.get_request_id(response)
1194
1213
 
1195
1214
 
@@ -1227,7 +1246,8 @@ def cost_report() -> server_common.RequestId: # pylint: disable=redefined-built
1227
1246
  'total_cost': (float) cost given resources and usage intervals,
1228
1247
  }
1229
1248
  """
1230
- response = requests.get(f'{server_common.get_server_url()}/cost_report')
1249
+ response = requests.get(f'{server_common.get_server_url()}/cost_report',
1250
+ cookies=server_common.get_api_cookie_jar())
1231
1251
  return server_common.get_request_id(response)
1232
1252
 
1233
1253
 
@@ -1256,7 +1276,8 @@ def storage_ls() -> server_common.RequestId:
1256
1276
  }
1257
1277
  ]
1258
1278
  """
1259
- response = requests.get(f'{server_common.get_server_url()}/storage/ls')
1279
+ response = requests.get(f'{server_common.get_server_url()}/storage/ls',
1280
+ cookies=server_common.get_api_cookie_jar())
1260
1281
  return server_common.get_request_id(response)
1261
1282
 
1262
1283
 
@@ -1280,7 +1301,8 @@ def storage_delete(name: str) -> server_common.RequestId:
1280
1301
  """
1281
1302
  body = payloads.StorageBody(name=name)
1282
1303
  response = requests.post(f'{server_common.get_server_url()}/storage/delete',
1283
- json=json.loads(body.model_dump_json()))
1304
+ json=json.loads(body.model_dump_json()),
1305
+ cookies=server_common.get_api_cookie_jar())
1284
1306
  return server_common.get_request_id(response)
1285
1307
 
1286
1308
 
@@ -1318,7 +1340,8 @@ def local_up(gpus: bool,
1318
1340
  context_name=context_name,
1319
1341
  password=password)
1320
1342
  response = requests.post(f'{server_common.get_server_url()}/local_up',
1321
- json=json.loads(body.model_dump_json()))
1343
+ json=json.loads(body.model_dump_json()),
1344
+ cookies=server_common.get_api_cookie_jar())
1322
1345
  return server_common.get_request_id(response)
1323
1346
 
1324
1347
 
@@ -1334,7 +1357,8 @@ def local_down() -> server_common.RequestId:
1334
1357
  with ux_utils.print_exception_no_traceback():
1335
1358
  raise ValueError('sky local down is only supported when running '
1336
1359
  'SkyPilot locally.')
1337
- response = requests.post(f'{server_common.get_server_url()}/local_down')
1360
+ response = requests.post(f'{server_common.get_server_url()}/local_down',
1361
+ cookies=server_common.get_api_cookie_jar())
1338
1362
  return server_common.get_request_id(response)
1339
1363
 
1340
1364
 
@@ -1358,7 +1382,8 @@ def realtime_kubernetes_gpu_availability(
1358
1382
  response = requests.post(
1359
1383
  f'{server_common.get_server_url()}/'
1360
1384
  'realtime_kubernetes_gpu_availability',
1361
- json=json.loads(body.model_dump_json()))
1385
+ json=json.loads(body.model_dump_json()),
1386
+ cookies=server_common.get_api_cookie_jar())
1362
1387
  return server_common.get_request_id(response)
1363
1388
 
1364
1389
 
@@ -1389,7 +1414,8 @@ def kubernetes_node_info(
1389
1414
  body = payloads.KubernetesNodeInfoRequestBody(context=context)
1390
1415
  response = requests.post(
1391
1416
  f'{server_common.get_server_url()}/kubernetes_node_info',
1392
- json=json.loads(body.model_dump_json()))
1417
+ json=json.loads(body.model_dump_json()),
1418
+ cookies=server_common.get_api_cookie_jar())
1393
1419
  return server_common.get_request_id(response)
1394
1420
 
1395
1421
 
@@ -1418,7 +1444,8 @@ def status_kubernetes() -> server_common.RequestId:
1418
1444
  - context: Kubernetes context used to fetch the cluster information.
1419
1445
  """
1420
1446
  response = requests.get(
1421
- f'{server_common.get_server_url()}/status_kubernetes')
1447
+ f'{server_common.get_server_url()}/status_kubernetes',
1448
+ cookies=server_common.get_api_cookie_jar())
1422
1449
  return server_common.get_request_id(response)
1423
1450
 
1424
1451
 
@@ -1444,7 +1471,8 @@ def get(request_id: str) -> Any:
1444
1471
  response = requests.get(
1445
1472
  f'{server_common.get_server_url()}/api/get?request_id={request_id}',
1446
1473
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
1447
- None))
1474
+ None),
1475
+ cookies=server_common.get_api_cookie_jar())
1448
1476
  request_task = None
1449
1477
  if response.status_code == 200:
1450
1478
  request_task = requests_lib.Request.decode(
@@ -1523,7 +1551,8 @@ def stream_and_get(
1523
1551
  params=params,
1524
1552
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
1525
1553
  None),
1526
- stream=True)
1554
+ stream=True,
1555
+ cookies=server_common.get_api_cookie_jar())
1527
1556
  if response.status_code in [404, 400]:
1528
1557
  detail = response.json().get('detail')
1529
1558
  with ux_utils.print_exception_no_traceback():
@@ -1579,7 +1608,8 @@ def api_cancel(request_ids: Optional[Union[str, List[str]]] = None,
1579
1608
 
1580
1609
  response = requests.post(f'{server_common.get_server_url()}/api/cancel',
1581
1610
  json=json.loads(body.model_dump_json()),
1582
- timeout=5)
1611
+ timeout=5,
1612
+ cookies=server_common.get_api_cookie_jar())
1583
1613
  return server_common.get_request_id(response)
1584
1614
 
1585
1615
 
@@ -1607,7 +1637,8 @@ def api_status(
1607
1637
  f'{server_common.get_server_url()}/api/status',
1608
1638
  params=server_common.request_body_to_params(body),
1609
1639
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
1610
- None))
1640
+ None),
1641
+ cookies=server_common.get_api_cookie_jar())
1611
1642
  server_common.handle_request_error(response)
1612
1643
  return [
1613
1644
  requests_lib.RequestPayload(**request) for request in response.json()
@@ -1634,7 +1665,8 @@ def api_info() -> Dict[str, str]:
1634
1665
  }
1635
1666
 
1636
1667
  """
1637
- response = requests.get(f'{server_common.get_server_url()}/api/health')
1668
+ response = requests.get(f'{server_common.get_server_url()}/api/health',
1669
+ cookies=server_common.get_api_cookie_jar())
1638
1670
  response.raise_for_status()
1639
1671
  return response.json()
1640
1672
 
sky/cloud_stores.py CHANGED
@@ -578,13 +578,11 @@ class NebiusCloudStorage(CloudStorage):
578
578
  # AWS Sync by default uses 10 threads to upload files to the bucket.
579
579
  # To increase parallelism, modify max_concurrent_requests in your
580
580
  # aws config file (Default path: ~/.aws/config).
581
- endpoint_url = nebius.create_endpoint()
582
581
  assert 'nebius://' in source, 'nebius:// is not in source'
583
582
  source = source.replace('nebius://', 's3://')
584
583
  download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
585
584
  'sync --no-follow-symlinks '
586
585
  f'{source} {destination} '
587
- f'--endpoint {endpoint_url} '
588
586
  f'--profile={nebius.NEBIUS_PROFILE_NAME}')
589
587
 
590
588
  all_commands = list(self._GET_AWSCLI)
@@ -593,12 +591,10 @@ class NebiusCloudStorage(CloudStorage):
593
591
 
594
592
  def make_sync_file_command(self, source: str, destination: str) -> str:
595
593
  """Downloads a file using AWS CLI."""
596
- endpoint_url = nebius.create_endpoint()
597
594
  assert 'nebius://' in source, 'nebius:// is not in source'
598
595
  source = source.replace('nebius://', 's3://')
599
596
  download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
600
597
  f'cp {source} {destination} '
601
- f'--endpoint {endpoint_url} '
602
598
  f'--profile={nebius.NEBIUS_PROFILE_NAME}')
603
599
 
604
600
  all_commands = list(self._GET_AWSCLI)
sky/clouds/do.py CHANGED
@@ -280,13 +280,12 @@ class DO(clouds.Cloud):
280
280
  return True, None
281
281
 
282
282
  def get_credential_file_mounts(self) -> Dict[str, str]:
283
- if do_utils.CREDENTIALS_PATH is None:
283
+ credential_path = do_utils.get_credentials_path()
284
+ if credential_path is None:
284
285
  return {}
285
- if not os.path.exists(os.path.expanduser(do_utils.CREDENTIALS_PATH)):
286
+ if not os.path.exists(os.path.expanduser(credential_path)):
286
287
  return {}
287
- return {
288
- f'~/.config/doctl/{_CREDENTIAL_FILE}': do_utils.CREDENTIALS_PATH
289
- }
288
+ return {f'~/.config/doctl/{_CREDENTIAL_FILE}': credential_path}
290
289
 
291
290
  @classmethod
292
291
  def get_current_user_identity(cls) -> Optional[List[str]]:
sky/clouds/gcp.py CHANGED
@@ -688,9 +688,11 @@ class GCP(clouds.Cloud):
688
688
  cls,
689
689
  instance_type: str,
690
690
  ) -> Optional[Dict[str, Union[int, float]]]:
691
- # GCP handles accelerators separately from regular instance types,
692
- # hence return none here.
693
- return None
691
+ # GCP handles accelerators separately from regular instance types.
692
+ # This method supports automatically inferring the GPU type for
693
+ # the instance type that come with GPUs pre-attached.
694
+ return service_catalog.get_accelerators_from_instance_type(
695
+ instance_type, clouds='gcp')
694
696
 
695
697
  @classmethod
696
698
  def get_vcpus_mem_from_instance_type(