skypilot-nightly 1.0.0.dev20250408__py3-none-any.whl → 1.0.0.dev20250411__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/azure.py +1 -1
- sky/adaptors/nebius.py +5 -27
- sky/backends/backend.py +9 -7
- sky/backends/cloud_vm_ray_backend.py +7 -7
- sky/backends/local_docker_backend.py +3 -3
- sky/client/common.py +4 -2
- sky/client/sdk.py +58 -26
- sky/cloud_stores.py +0 -4
- sky/clouds/do.py +4 -5
- sky/clouds/gcp.py +5 -3
- sky/clouds/nebius.py +22 -12
- sky/clouds/service_catalog/data_fetchers/fetch_ibm.py +1 -2
- sky/clouds/service_catalog/gcp_catalog.py +37 -10
- sky/core.py +6 -6
- sky/data/data_utils.py +5 -9
- sky/data/mounting_utils.py +1 -1
- sky/data/storage.py +25 -31
- sky/data/storage_utils.py +27 -18
- sky/execution.py +11 -4
- sky/jobs/client/sdk.py +5 -0
- sky/jobs/server/server.py +5 -1
- sky/optimizer.py +1 -2
- sky/provision/do/utils.py +19 -16
- sky/provision/gcp/config.py +30 -20
- sky/serve/client/sdk.py +6 -0
- sky/server/common.py +16 -1
- sky/server/constants.py +5 -0
- sky/setup_files/dependencies.py +1 -1
- sky/skylet/log_lib.py +4 -0
- sky/skypilot_config.py +19 -30
- sky/task.py +27 -7
- sky/utils/schemas.py +25 -7
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/METADATA +2 -2
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/RECORD +39 -39
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'b4202948016cdf48a5939ea9bf1769a2d31f73bf'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250411'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/azure.py
CHANGED
@@ -232,7 +232,7 @@ def get_client(name: str,
|
|
232
232
|
'Must provide resource_group_name keyword '
|
233
233
|
'arguments for container client.')
|
234
234
|
sky_logger.info(
|
235
|
-
'Failed to check the
|
235
|
+
'Failed to check the existence of the '
|
236
236
|
f'container {container_url!r} due to '
|
237
237
|
'insufficient IAM role for storage '
|
238
238
|
f'account {storage_account_name!r}.')
|
sky/adaptors/nebius.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Nebius cloud adaptor."""
|
2
2
|
import os
|
3
3
|
import threading
|
4
|
-
from typing import Optional
|
5
4
|
|
6
5
|
from sky.adaptors import common
|
7
6
|
from sky.utils import annotations
|
@@ -168,7 +167,7 @@ def session():
|
|
168
167
|
|
169
168
|
|
170
169
|
@annotations.lru_cache(scope='global')
|
171
|
-
def resource(resource_name: str,
|
170
|
+
def resource(resource_name: str, **kwargs):
|
172
171
|
"""Create a Nebius resource.
|
173
172
|
|
174
173
|
Args:
|
@@ -181,21 +180,13 @@ def resource(resource_name: str, region: str = DEFAULT_REGION, **kwargs):
|
|
181
180
|
# Reference: https://stackoverflow.com/a/59635814
|
182
181
|
|
183
182
|
session_ = session()
|
184
|
-
nebius_credentials = get_nebius_credentials(session_)
|
185
|
-
endpoint = create_endpoint(region)
|
186
183
|
|
187
|
-
return session_.resource(
|
188
|
-
resource_name,
|
189
|
-
endpoint_url=endpoint,
|
190
|
-
aws_access_key_id=nebius_credentials.access_key,
|
191
|
-
aws_secret_access_key=nebius_credentials.secret_key,
|
192
|
-
region_name=region,
|
193
|
-
**kwargs)
|
184
|
+
return session_.resource(resource_name, **kwargs)
|
194
185
|
|
195
186
|
|
196
187
|
@annotations.lru_cache(scope='global')
|
197
|
-
def client(service_name: str
|
198
|
-
"""Create
|
188
|
+
def client(service_name: str):
|
189
|
+
"""Create Nebius client of a certain service.
|
199
190
|
|
200
191
|
Args:
|
201
192
|
service_name: Nebius service name (e.g., 's3').
|
@@ -207,14 +198,8 @@ def client(service_name: str, region):
|
|
207
198
|
# Reference: https://stackoverflow.com/a/59635814
|
208
199
|
|
209
200
|
session_ = session()
|
210
|
-
nebius_credentials = get_nebius_credentials(session_)
|
211
|
-
endpoint = create_endpoint(region)
|
212
201
|
|
213
|
-
return session_.client(service_name
|
214
|
-
endpoint_url=endpoint,
|
215
|
-
aws_access_key_id=nebius_credentials.access_key,
|
216
|
-
aws_secret_access_key=nebius_credentials.secret_key,
|
217
|
-
region_name=region)
|
202
|
+
return session_.client(service_name)
|
218
203
|
|
219
204
|
|
220
205
|
@common.load_lazy_modules(_LAZY_MODULES)
|
@@ -223,10 +208,3 @@ def botocore_exceptions():
|
|
223
208
|
# pylint: disable=import-outside-toplevel
|
224
209
|
from botocore import exceptions
|
225
210
|
return exceptions
|
226
|
-
|
227
|
-
|
228
|
-
def create_endpoint(region: Optional[str] = DEFAULT_REGION) -> str:
|
229
|
-
"""Reads accountid necessary to interact with Nebius Object Storage"""
|
230
|
-
if region is None:
|
231
|
-
region = DEFAULT_REGION
|
232
|
-
return f'https://storage.{region}.nebius.cloud:443'
|
sky/backends/backend.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Sky backend interface."""
|
2
2
|
import typing
|
3
|
-
from typing import Dict, Generic, Optional
|
3
|
+
from typing import Dict, Generic, Optional, Tuple
|
4
4
|
|
5
5
|
from sky.usage import usage_lib
|
6
6
|
from sky.utils import cluster_utils
|
@@ -53,7 +53,7 @@ class Backend(Generic[_ResourceHandleType]):
|
|
53
53
|
cluster_name: Optional[str] = None,
|
54
54
|
retry_until_up: bool = False,
|
55
55
|
skip_unnecessary_provisioning: bool = False,
|
56
|
-
) -> Optional[_ResourceHandleType]:
|
56
|
+
) -> Tuple[Optional[_ResourceHandleType], bool]:
|
57
57
|
"""Provisions resources for the given task.
|
58
58
|
|
59
59
|
Args:
|
@@ -68,13 +68,15 @@ class Backend(Generic[_ResourceHandleType]):
|
|
68
68
|
the existing cluster will be reused and re-provisioned.
|
69
69
|
retry_until_up: If True, retry provisioning until resources are
|
70
70
|
successfully launched.
|
71
|
-
|
72
|
-
the existing cluster_name's config. Skip provisioning if no
|
71
|
+
skip_unnecessary_provisioning: If True, compare the cluster config
|
72
|
+
to the existing cluster_name's config. Skip provisioning if no
|
73
73
|
updates are needed for the existing cluster.
|
74
74
|
|
75
75
|
Returns:
|
76
|
-
A ResourceHandle object for the provisioned resources, or None if
|
77
|
-
|
76
|
+
- A ResourceHandle object for the provisioned resources, or None if
|
77
|
+
dryrun is True.
|
78
|
+
- A boolean that is True if the provisioning was skipped, and False
|
79
|
+
if provisioning actually happened. Dryrun always gives False.
|
78
80
|
"""
|
79
81
|
if cluster_name is None:
|
80
82
|
cluster_name = cluster_utils.generate_cluster_name()
|
@@ -159,7 +161,7 @@ class Backend(Generic[_ResourceHandleType]):
|
|
159
161
|
cluster_name: str,
|
160
162
|
retry_until_up: bool = False,
|
161
163
|
skip_unnecessary_provisioning: bool = False,
|
162
|
-
) -> Optional[_ResourceHandleType]:
|
164
|
+
) -> Tuple[Optional[_ResourceHandleType], bool]:
|
163
165
|
raise NotImplementedError
|
164
166
|
|
165
167
|
def _sync_workdir(self, handle: _ResourceHandleType, workdir: Path) -> None:
|
@@ -2829,7 +2829,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
2829
2829
|
cluster_name: str,
|
2830
2830
|
retry_until_up: bool = False,
|
2831
2831
|
skip_unnecessary_provisioning: bool = False,
|
2832
|
-
) -> Optional[CloudVmRayResourceHandle]:
|
2832
|
+
) -> Tuple[Optional[CloudVmRayResourceHandle], bool]:
|
2833
2833
|
"""Provisions the cluster, or re-provisions an existing cluster.
|
2834
2834
|
|
2835
2835
|
Use the SKYPILOT provisioner if it's supported by the cloud, otherwise
|
@@ -2969,7 +2969,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
2969
2969
|
failover_history=e.failover_history) from None
|
2970
2970
|
if dryrun:
|
2971
2971
|
record = global_user_state.get_cluster_from_name(cluster_name)
|
2972
|
-
return record['handle'] if record is not None else None
|
2972
|
+
return record['handle'] if record is not None else None, False
|
2973
2973
|
|
2974
2974
|
if config_dict['provisioning_skipped']:
|
2975
2975
|
# Skip further provisioning.
|
@@ -2980,7 +2980,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
2980
2980
|
record = global_user_state.get_cluster_from_name(cluster_name)
|
2981
2981
|
assert record is not None and record['handle'] is not None, (
|
2982
2982
|
cluster_name, record)
|
2983
|
-
return record['handle']
|
2983
|
+
return record['handle'], True
|
2984
2984
|
|
2985
2985
|
if 'provision_record' in config_dict:
|
2986
2986
|
# New provisioner is used here.
|
@@ -3022,7 +3022,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3022
3022
|
self._update_after_cluster_provisioned(
|
3023
3023
|
handle, to_provision_config.prev_handle, task,
|
3024
3024
|
prev_cluster_status, lock_path, config_hash)
|
3025
|
-
return handle
|
3025
|
+
return handle, False
|
3026
3026
|
|
3027
3027
|
cluster_config_file = config_dict['ray']
|
3028
3028
|
handle = config_dict['handle']
|
@@ -3094,7 +3094,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3094
3094
|
self._update_after_cluster_provisioned(
|
3095
3095
|
handle, to_provision_config.prev_handle, task,
|
3096
3096
|
prev_cluster_status, lock_path, config_hash)
|
3097
|
-
return handle
|
3097
|
+
return handle, False
|
3098
3098
|
|
3099
3099
|
def _open_ports(self, handle: CloudVmRayResourceHandle) -> None:
|
3100
3100
|
cloud = handle.launched_resources.cloud
|
@@ -3435,7 +3435,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3435
3435
|
mkdir_code = (f'{cd} && mkdir -p {remote_log_dir} && '
|
3436
3436
|
f'touch {remote_log_path}')
|
3437
3437
|
encoded_script = shlex.quote(codegen)
|
3438
|
-
create_script_code =
|
3438
|
+
create_script_code = f'{{ echo {encoded_script} > {script_path}; }}'
|
3439
3439
|
job_submit_cmd = (
|
3440
3440
|
# JOB_CMD_IDENTIFIER is used for identifying the process retrieved
|
3441
3441
|
# with pid is the same driver process.
|
@@ -4331,7 +4331,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
4331
4331
|
cluster_name_on_cloud = handle.cluster_name_on_cloud
|
4332
4332
|
cloud = handle.launched_resources.cloud
|
4333
4333
|
|
4334
|
-
if
|
4334
|
+
if terminate and handle.launched_resources.is_image_managed is True:
|
4335
4335
|
# Delete the image when terminating a "cloned" cluster, i.e.,
|
4336
4336
|
# whose image is created by SkyPilot (--clone-disk-from)
|
4337
4337
|
logger.debug(f'Deleting image {handle.launched_resources.image_id}')
|
@@ -139,7 +139,7 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
|
|
139
139
|
cluster_name: str,
|
140
140
|
retry_until_up: bool = False,
|
141
141
|
skip_unnecessary_provisioning: bool = False,
|
142
|
-
) -> Optional[LocalDockerResourceHandle]:
|
142
|
+
) -> Tuple[Optional[LocalDockerResourceHandle], bool]:
|
143
143
|
"""Builds docker image for the task and returns cluster name as handle.
|
144
144
|
|
145
145
|
Since resource demands are ignored, There's no provisioning in local
|
@@ -149,7 +149,7 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
|
|
149
149
|
assert task.name is not None, ('Task name cannot be None - have you '
|
150
150
|
'specified a task name?')
|
151
151
|
if dryrun:
|
152
|
-
return None
|
152
|
+
return None, False
|
153
153
|
if retry_until_up:
|
154
154
|
logger.warning(
|
155
155
|
f'Retrying until up is not supported in backend: {self.NAME}. '
|
@@ -175,7 +175,7 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
|
|
175
175
|
requested_resources=set(
|
176
176
|
task.resources),
|
177
177
|
ready=False)
|
178
|
-
return handle
|
178
|
+
return handle, False
|
179
179
|
|
180
180
|
def _sync_workdir(self, handle: LocalDockerResourceHandle,
|
181
181
|
workdir: Path) -> None:
|
sky/client/common.py
CHANGED
@@ -75,7 +75,8 @@ def download_logs_from_api_server(
|
|
75
75
|
body = payloads.DownloadBody(folder_paths=list(paths_on_api_server),)
|
76
76
|
response = requests.post(f'{server_common.get_server_url()}/download',
|
77
77
|
json=json.loads(body.model_dump_json()),
|
78
|
-
stream=True
|
78
|
+
stream=True,
|
79
|
+
cookies=server_common.get_api_cookie_jar())
|
79
80
|
if response.status_code == 200:
|
80
81
|
remote_home_path = response.headers.get('X-Home-Path')
|
81
82
|
assert remote_home_path is not None, response.headers
|
@@ -176,7 +177,8 @@ def _upload_chunk_with_retry(params: UploadChunkParams) -> None:
|
|
176
177
|
},
|
177
178
|
content=FileChunkIterator(f, _UPLOAD_CHUNK_BYTES,
|
178
179
|
params.chunk_index),
|
179
|
-
headers={'Content-Type': 'application/octet-stream'}
|
180
|
+
headers={'Content-Type': 'application/octet-stream'},
|
181
|
+
cookies=server_common.get_api_cookie_jar())
|
180
182
|
if response.status_code == 200:
|
181
183
|
data = response.json()
|
182
184
|
status = data.get('status')
|
sky/client/sdk.py
CHANGED
@@ -102,7 +102,8 @@ def check(clouds: Optional[Tuple[str]],
|
|
102
102
|
"""
|
103
103
|
body = payloads.CheckBody(clouds=clouds, verbose=verbose)
|
104
104
|
response = requests.post(f'{server_common.get_server_url()}/check',
|
105
|
-
json=json.loads(body.model_dump_json())
|
105
|
+
json=json.loads(body.model_dump_json()),
|
106
|
+
cookies=server_common.get_api_cookie_jar())
|
106
107
|
return server_common.get_request_id(response)
|
107
108
|
|
108
109
|
|
@@ -118,7 +119,8 @@ def enabled_clouds() -> server_common.RequestId:
|
|
118
119
|
Request Returns:
|
119
120
|
A list of enabled clouds in string format.
|
120
121
|
"""
|
121
|
-
response = requests.get(f'{server_common.get_server_url()}/enabled_clouds'
|
122
|
+
response = requests.get(f'{server_common.get_server_url()}/enabled_clouds',
|
123
|
+
cookies=server_common.get_api_cookie_jar())
|
122
124
|
return server_common.get_request_id(response)
|
123
125
|
|
124
126
|
|
@@ -168,7 +170,8 @@ def list_accelerators(gpus_only: bool = True,
|
|
168
170
|
)
|
169
171
|
response = requests.post(
|
170
172
|
f'{server_common.get_server_url()}/list_accelerators',
|
171
|
-
json=json.loads(body.model_dump_json())
|
173
|
+
json=json.loads(body.model_dump_json()),
|
174
|
+
cookies=server_common.get_api_cookie_jar())
|
172
175
|
return server_common.get_request_id(response)
|
173
176
|
|
174
177
|
|
@@ -208,7 +211,8 @@ def list_accelerator_counts(
|
|
208
211
|
)
|
209
212
|
response = requests.post(
|
210
213
|
f'{server_common.get_server_url()}/list_accelerator_counts',
|
211
|
-
json=json.loads(body.model_dump_json())
|
214
|
+
json=json.loads(body.model_dump_json()),
|
215
|
+
cookies=server_common.get_api_cookie_jar())
|
212
216
|
return server_common.get_request_id(response)
|
213
217
|
|
214
218
|
|
@@ -246,7 +250,8 @@ def optimize(
|
|
246
250
|
minimize=minimize,
|
247
251
|
request_options=admin_policy_request_options)
|
248
252
|
response = requests.post(f'{server_common.get_server_url()}/optimize',
|
249
|
-
json=json.loads(body.model_dump_json())
|
253
|
+
json=json.loads(body.model_dump_json()),
|
254
|
+
cookies=server_common.get_api_cookie_jar())
|
250
255
|
return server_common.get_request_id(response)
|
251
256
|
|
252
257
|
|
@@ -281,7 +286,8 @@ def validate(
|
|
281
286
|
body = payloads.ValidateBody(dag=dag_str,
|
282
287
|
request_options=admin_policy_request_options)
|
283
288
|
response = requests.post(f'{server_common.get_server_url()}/validate',
|
284
|
-
json=json.loads(body.model_dump_json())
|
289
|
+
json=json.loads(body.model_dump_json()),
|
290
|
+
cookies=server_common.get_api_cookie_jar())
|
285
291
|
if response.status_code == 400:
|
286
292
|
with ux_utils.print_exception_no_traceback():
|
287
293
|
raise exceptions.deserialize_exception(
|
@@ -493,6 +499,7 @@ def launch(
|
|
493
499
|
f'{server_common.get_server_url()}/launch',
|
494
500
|
json=json.loads(body.model_dump_json()),
|
495
501
|
timeout=5,
|
502
|
+
cookies=server_common.get_api_cookie_jar(),
|
496
503
|
)
|
497
504
|
return server_common.get_request_id(response)
|
498
505
|
|
@@ -576,6 +583,7 @@ def exec( # pylint: disable=redefined-builtin
|
|
576
583
|
f'{server_common.get_server_url()}/exec',
|
577
584
|
json=json.loads(body.model_dump_json()),
|
578
585
|
timeout=5,
|
586
|
+
cookies=server_common.get_api_cookie_jar(),
|
579
587
|
)
|
580
588
|
return server_common.get_request_id(response)
|
581
589
|
|
@@ -626,7 +634,8 @@ def tail_logs(cluster_name: str,
|
|
626
634
|
json=json.loads(body.model_dump_json()),
|
627
635
|
stream=True,
|
628
636
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
629
|
-
None)
|
637
|
+
None),
|
638
|
+
cookies=server_common.get_api_cookie_jar())
|
630
639
|
request_id = server_common.get_request_id(response)
|
631
640
|
return stream_response(request_id, response, output_stream)
|
632
641
|
|
@@ -663,7 +672,8 @@ def download_logs(cluster_name: str,
|
|
663
672
|
job_ids=job_ids,
|
664
673
|
)
|
665
674
|
response = requests.post(f'{server_common.get_server_url()}/download_logs',
|
666
|
-
json=json.loads(body.model_dump_json())
|
675
|
+
json=json.loads(body.model_dump_json()),
|
676
|
+
cookies=server_common.get_api_cookie_jar())
|
667
677
|
job_id_remote_path_dict = stream_and_get(
|
668
678
|
server_common.get_request_id(response))
|
669
679
|
remote2local_path_dict = client_common.download_logs_from_api_server(
|
@@ -745,6 +755,7 @@ def start(
|
|
745
755
|
f'{server_common.get_server_url()}/start',
|
746
756
|
json=json.loads(body.model_dump_json()),
|
747
757
|
timeout=5,
|
758
|
+
cookies=server_common.get_api_cookie_jar(),
|
748
759
|
)
|
749
760
|
return server_common.get_request_id(response)
|
750
761
|
|
@@ -790,6 +801,7 @@ def down(cluster_name: str, purge: bool = False) -> server_common.RequestId:
|
|
790
801
|
f'{server_common.get_server_url()}/down',
|
791
802
|
json=json.loads(body.model_dump_json()),
|
792
803
|
timeout=5,
|
804
|
+
cookies=server_common.get_api_cookie_jar(),
|
793
805
|
)
|
794
806
|
return server_common.get_request_id(response)
|
795
807
|
|
@@ -838,6 +850,7 @@ def stop(cluster_name: str, purge: bool = False) -> server_common.RequestId:
|
|
838
850
|
f'{server_common.get_server_url()}/stop',
|
839
851
|
json=json.loads(body.model_dump_json()),
|
840
852
|
timeout=5,
|
853
|
+
cookies=server_common.get_api_cookie_jar(),
|
841
854
|
)
|
842
855
|
return server_common.get_request_id(response)
|
843
856
|
|
@@ -907,6 +920,7 @@ def autostop(
|
|
907
920
|
f'{server_common.get_server_url()}/autostop',
|
908
921
|
json=json.loads(body.model_dump_json()),
|
909
922
|
timeout=5,
|
923
|
+
cookies=server_common.get_api_cookie_jar(),
|
910
924
|
)
|
911
925
|
return server_common.get_request_id(response)
|
912
926
|
|
@@ -966,7 +980,8 @@ def queue(cluster_name: str,
|
|
966
980
|
all_users=all_users,
|
967
981
|
)
|
968
982
|
response = requests.post(f'{server_common.get_server_url()}/queue',
|
969
|
-
json=json.loads(body.model_dump_json())
|
983
|
+
json=json.loads(body.model_dump_json()),
|
984
|
+
cookies=server_common.get_api_cookie_jar())
|
970
985
|
return server_common.get_request_id(response)
|
971
986
|
|
972
987
|
|
@@ -1007,7 +1022,8 @@ def job_status(cluster_name: str,
|
|
1007
1022
|
job_ids=job_ids,
|
1008
1023
|
)
|
1009
1024
|
response = requests.post(f'{server_common.get_server_url()}/job_status',
|
1010
|
-
json=json.loads(body.model_dump_json())
|
1025
|
+
json=json.loads(body.model_dump_json()),
|
1026
|
+
cookies=server_common.get_api_cookie_jar())
|
1011
1027
|
return server_common.get_request_id(response)
|
1012
1028
|
|
1013
1029
|
|
@@ -1060,7 +1076,8 @@ def cancel(
|
|
1060
1076
|
try_cancel_if_cluster_is_init=_try_cancel_if_cluster_is_init,
|
1061
1077
|
)
|
1062
1078
|
response = requests.post(f'{server_common.get_server_url()}/cancel',
|
1063
|
-
json=json.loads(body.model_dump_json())
|
1079
|
+
json=json.loads(body.model_dump_json()),
|
1080
|
+
cookies=server_common.get_api_cookie_jar())
|
1064
1081
|
return server_common.get_request_id(response)
|
1065
1082
|
|
1066
1083
|
|
@@ -1155,7 +1172,8 @@ def status(
|
|
1155
1172
|
all_users=all_users,
|
1156
1173
|
)
|
1157
1174
|
response = requests.post(f'{server_common.get_server_url()}/status',
|
1158
|
-
json=json.loads(body.model_dump_json())
|
1175
|
+
json=json.loads(body.model_dump_json()),
|
1176
|
+
cookies=server_common.get_api_cookie_jar())
|
1159
1177
|
return server_common.get_request_id(response)
|
1160
1178
|
|
1161
1179
|
|
@@ -1189,7 +1207,8 @@ def endpoints(
|
|
1189
1207
|
port=port,
|
1190
1208
|
)
|
1191
1209
|
response = requests.post(f'{server_common.get_server_url()}/endpoints',
|
1192
|
-
json=json.loads(body.model_dump_json())
|
1210
|
+
json=json.loads(body.model_dump_json()),
|
1211
|
+
cookies=server_common.get_api_cookie_jar())
|
1193
1212
|
return server_common.get_request_id(response)
|
1194
1213
|
|
1195
1214
|
|
@@ -1227,7 +1246,8 @@ def cost_report() -> server_common.RequestId: # pylint: disable=redefined-built
|
|
1227
1246
|
'total_cost': (float) cost given resources and usage intervals,
|
1228
1247
|
}
|
1229
1248
|
"""
|
1230
|
-
response = requests.get(f'{server_common.get_server_url()}/cost_report'
|
1249
|
+
response = requests.get(f'{server_common.get_server_url()}/cost_report',
|
1250
|
+
cookies=server_common.get_api_cookie_jar())
|
1231
1251
|
return server_common.get_request_id(response)
|
1232
1252
|
|
1233
1253
|
|
@@ -1256,7 +1276,8 @@ def storage_ls() -> server_common.RequestId:
|
|
1256
1276
|
}
|
1257
1277
|
]
|
1258
1278
|
"""
|
1259
|
-
response = requests.get(f'{server_common.get_server_url()}/storage/ls'
|
1279
|
+
response = requests.get(f'{server_common.get_server_url()}/storage/ls',
|
1280
|
+
cookies=server_common.get_api_cookie_jar())
|
1260
1281
|
return server_common.get_request_id(response)
|
1261
1282
|
|
1262
1283
|
|
@@ -1280,7 +1301,8 @@ def storage_delete(name: str) -> server_common.RequestId:
|
|
1280
1301
|
"""
|
1281
1302
|
body = payloads.StorageBody(name=name)
|
1282
1303
|
response = requests.post(f'{server_common.get_server_url()}/storage/delete',
|
1283
|
-
json=json.loads(body.model_dump_json())
|
1304
|
+
json=json.loads(body.model_dump_json()),
|
1305
|
+
cookies=server_common.get_api_cookie_jar())
|
1284
1306
|
return server_common.get_request_id(response)
|
1285
1307
|
|
1286
1308
|
|
@@ -1318,7 +1340,8 @@ def local_up(gpus: bool,
|
|
1318
1340
|
context_name=context_name,
|
1319
1341
|
password=password)
|
1320
1342
|
response = requests.post(f'{server_common.get_server_url()}/local_up',
|
1321
|
-
json=json.loads(body.model_dump_json())
|
1343
|
+
json=json.loads(body.model_dump_json()),
|
1344
|
+
cookies=server_common.get_api_cookie_jar())
|
1322
1345
|
return server_common.get_request_id(response)
|
1323
1346
|
|
1324
1347
|
|
@@ -1334,7 +1357,8 @@ def local_down() -> server_common.RequestId:
|
|
1334
1357
|
with ux_utils.print_exception_no_traceback():
|
1335
1358
|
raise ValueError('sky local down is only supported when running '
|
1336
1359
|
'SkyPilot locally.')
|
1337
|
-
response = requests.post(f'{server_common.get_server_url()}/local_down'
|
1360
|
+
response = requests.post(f'{server_common.get_server_url()}/local_down',
|
1361
|
+
cookies=server_common.get_api_cookie_jar())
|
1338
1362
|
return server_common.get_request_id(response)
|
1339
1363
|
|
1340
1364
|
|
@@ -1358,7 +1382,8 @@ def realtime_kubernetes_gpu_availability(
|
|
1358
1382
|
response = requests.post(
|
1359
1383
|
f'{server_common.get_server_url()}/'
|
1360
1384
|
'realtime_kubernetes_gpu_availability',
|
1361
|
-
json=json.loads(body.model_dump_json())
|
1385
|
+
json=json.loads(body.model_dump_json()),
|
1386
|
+
cookies=server_common.get_api_cookie_jar())
|
1362
1387
|
return server_common.get_request_id(response)
|
1363
1388
|
|
1364
1389
|
|
@@ -1389,7 +1414,8 @@ def kubernetes_node_info(
|
|
1389
1414
|
body = payloads.KubernetesNodeInfoRequestBody(context=context)
|
1390
1415
|
response = requests.post(
|
1391
1416
|
f'{server_common.get_server_url()}/kubernetes_node_info',
|
1392
|
-
json=json.loads(body.model_dump_json())
|
1417
|
+
json=json.loads(body.model_dump_json()),
|
1418
|
+
cookies=server_common.get_api_cookie_jar())
|
1393
1419
|
return server_common.get_request_id(response)
|
1394
1420
|
|
1395
1421
|
|
@@ -1418,7 +1444,8 @@ def status_kubernetes() -> server_common.RequestId:
|
|
1418
1444
|
- context: Kubernetes context used to fetch the cluster information.
|
1419
1445
|
"""
|
1420
1446
|
response = requests.get(
|
1421
|
-
f'{server_common.get_server_url()}/status_kubernetes'
|
1447
|
+
f'{server_common.get_server_url()}/status_kubernetes',
|
1448
|
+
cookies=server_common.get_api_cookie_jar())
|
1422
1449
|
return server_common.get_request_id(response)
|
1423
1450
|
|
1424
1451
|
|
@@ -1444,7 +1471,8 @@ def get(request_id: str) -> Any:
|
|
1444
1471
|
response = requests.get(
|
1445
1472
|
f'{server_common.get_server_url()}/api/get?request_id={request_id}',
|
1446
1473
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
1447
|
-
None)
|
1474
|
+
None),
|
1475
|
+
cookies=server_common.get_api_cookie_jar())
|
1448
1476
|
request_task = None
|
1449
1477
|
if response.status_code == 200:
|
1450
1478
|
request_task = requests_lib.Request.decode(
|
@@ -1523,7 +1551,8 @@ def stream_and_get(
|
|
1523
1551
|
params=params,
|
1524
1552
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
1525
1553
|
None),
|
1526
|
-
stream=True
|
1554
|
+
stream=True,
|
1555
|
+
cookies=server_common.get_api_cookie_jar())
|
1527
1556
|
if response.status_code in [404, 400]:
|
1528
1557
|
detail = response.json().get('detail')
|
1529
1558
|
with ux_utils.print_exception_no_traceback():
|
@@ -1579,7 +1608,8 @@ def api_cancel(request_ids: Optional[Union[str, List[str]]] = None,
|
|
1579
1608
|
|
1580
1609
|
response = requests.post(f'{server_common.get_server_url()}/api/cancel',
|
1581
1610
|
json=json.loads(body.model_dump_json()),
|
1582
|
-
timeout=5
|
1611
|
+
timeout=5,
|
1612
|
+
cookies=server_common.get_api_cookie_jar())
|
1583
1613
|
return server_common.get_request_id(response)
|
1584
1614
|
|
1585
1615
|
|
@@ -1607,7 +1637,8 @@ def api_status(
|
|
1607
1637
|
f'{server_common.get_server_url()}/api/status',
|
1608
1638
|
params=server_common.request_body_to_params(body),
|
1609
1639
|
timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
|
1610
|
-
None)
|
1640
|
+
None),
|
1641
|
+
cookies=server_common.get_api_cookie_jar())
|
1611
1642
|
server_common.handle_request_error(response)
|
1612
1643
|
return [
|
1613
1644
|
requests_lib.RequestPayload(**request) for request in response.json()
|
@@ -1634,7 +1665,8 @@ def api_info() -> Dict[str, str]:
|
|
1634
1665
|
}
|
1635
1666
|
|
1636
1667
|
"""
|
1637
|
-
response = requests.get(f'{server_common.get_server_url()}/api/health'
|
1668
|
+
response = requests.get(f'{server_common.get_server_url()}/api/health',
|
1669
|
+
cookies=server_common.get_api_cookie_jar())
|
1638
1670
|
response.raise_for_status()
|
1639
1671
|
return response.json()
|
1640
1672
|
|
sky/cloud_stores.py
CHANGED
@@ -578,13 +578,11 @@ class NebiusCloudStorage(CloudStorage):
|
|
578
578
|
# AWS Sync by default uses 10 threads to upload files to the bucket.
|
579
579
|
# To increase parallelism, modify max_concurrent_requests in your
|
580
580
|
# aws config file (Default path: ~/.aws/config).
|
581
|
-
endpoint_url = nebius.create_endpoint()
|
582
581
|
assert 'nebius://' in source, 'nebius:// is not in source'
|
583
582
|
source = source.replace('nebius://', 's3://')
|
584
583
|
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
585
584
|
'sync --no-follow-symlinks '
|
586
585
|
f'{source} {destination} '
|
587
|
-
f'--endpoint {endpoint_url} '
|
588
586
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
589
587
|
|
590
588
|
all_commands = list(self._GET_AWSCLI)
|
@@ -593,12 +591,10 @@ class NebiusCloudStorage(CloudStorage):
|
|
593
591
|
|
594
592
|
def make_sync_file_command(self, source: str, destination: str) -> str:
|
595
593
|
"""Downloads a file using AWS CLI."""
|
596
|
-
endpoint_url = nebius.create_endpoint()
|
597
594
|
assert 'nebius://' in source, 'nebius:// is not in source'
|
598
595
|
source = source.replace('nebius://', 's3://')
|
599
596
|
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
600
597
|
f'cp {source} {destination} '
|
601
|
-
f'--endpoint {endpoint_url} '
|
602
598
|
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
603
599
|
|
604
600
|
all_commands = list(self._GET_AWSCLI)
|
sky/clouds/do.py
CHANGED
@@ -280,13 +280,12 @@ class DO(clouds.Cloud):
|
|
280
280
|
return True, None
|
281
281
|
|
282
282
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
283
|
-
|
283
|
+
credential_path = do_utils.get_credentials_path()
|
284
|
+
if credential_path is None:
|
284
285
|
return {}
|
285
|
-
if not os.path.exists(os.path.expanduser(
|
286
|
+
if not os.path.exists(os.path.expanduser(credential_path)):
|
286
287
|
return {}
|
287
|
-
return {
|
288
|
-
f'~/.config/doctl/{_CREDENTIAL_FILE}': do_utils.CREDENTIALS_PATH
|
289
|
-
}
|
288
|
+
return {f'~/.config/doctl/{_CREDENTIAL_FILE}': credential_path}
|
290
289
|
|
291
290
|
@classmethod
|
292
291
|
def get_current_user_identity(cls) -> Optional[List[str]]:
|
sky/clouds/gcp.py
CHANGED
@@ -688,9 +688,11 @@ class GCP(clouds.Cloud):
|
|
688
688
|
cls,
|
689
689
|
instance_type: str,
|
690
690
|
) -> Optional[Dict[str, Union[int, float]]]:
|
691
|
-
# GCP handles accelerators separately from regular instance types
|
692
|
-
#
|
693
|
-
|
691
|
+
# GCP handles accelerators separately from regular instance types.
|
692
|
+
# This method supports automatically inferring the GPU type for
|
693
|
+
# the instance type that come with GPUs pre-attached.
|
694
|
+
return service_catalog.get_accelerators_from_instance_type(
|
695
|
+
instance_type, clouds='gcp')
|
694
696
|
|
695
697
|
@classmethod
|
696
698
|
def get_vcpus_mem_from_instance_type(
|