skypilot-nightly 1.0.0.dev20250328__py3-none-any.whl → 1.0.0.dev20250329__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/clouds/do.py +2 -0
- sky/server/common.py +15 -7
- sky/server/requests/executor.py +1 -1
- sky/server/requests/queues/mp_queue.py +8 -1
- sky/server/server.py +1 -1
- {skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/METADATA +3 -2
- {skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/RECORD +12 -12
- {skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '9e6cef22ae679a1fef4539cbfc02a6d5cdc405ed'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250329'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/clouds/do.py
CHANGED
@@ -280,6 +280,8 @@ class DO(clouds.Cloud):
|
|
280
280
|
return True, None
|
281
281
|
|
282
282
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
283
|
+
if do_utils.CREDENTIALS_PATH is None:
|
284
|
+
return {}
|
283
285
|
if not os.path.exists(os.path.expanduser(do_utils.CREDENTIALS_PATH)):
|
284
286
|
return {}
|
285
287
|
return {
|
sky/server/common.py
CHANGED
@@ -51,6 +51,11 @@ API_SERVER_CMD = '-m sky.server.server'
|
|
51
51
|
API_SERVER_CLIENT_DIR = pathlib.Path('~/.sky/api_server/clients')
|
52
52
|
RETRY_COUNT_ON_TIMEOUT = 3
|
53
53
|
|
54
|
+
# The maximum time to wait for the API server to start, set to a conservative
|
55
|
+
# value that unlikely to reach since the server might be just starting slowly
|
56
|
+
# (e.g. in high contention env) and we will exit eagerly if server exit.
|
57
|
+
WAIT_APISERVER_START_TIMEOUT_SEC = 60
|
58
|
+
|
54
59
|
SKY_API_VERSION_WARNING = (
|
55
60
|
f'{colorama.Fore.YELLOW}SkyPilot API server is too old: '
|
56
61
|
f'v{{server_version}} (client version is v{{client_version}}). '
|
@@ -179,7 +184,8 @@ def _start_api_server(deploy: bool = False,
|
|
179
184
|
server_url = get_server_url(host)
|
180
185
|
assert server_url in AVAILABLE_LOCAL_API_SERVER_URLS, (
|
181
186
|
f'server url {server_url} is not a local url')
|
182
|
-
with rich_utils.client_status('Starting SkyPilot API server'
|
187
|
+
with rich_utils.client_status('Starting SkyPilot API server, '
|
188
|
+
f'view logs at {constants.API_SERVER_LOGS}'):
|
183
189
|
logger.info(f'{colorama.Style.DIM}Failed to connect to '
|
184
190
|
f'SkyPilot API server at {server_url}. '
|
185
191
|
'Starting a local server.'
|
@@ -216,14 +222,16 @@ def _start_api_server(deploy: bool = False,
|
|
216
222
|
# If this is called from a CLI invocation, we need
|
217
223
|
# start_new_session=True so that SIGINT on the CLI will not also kill
|
218
224
|
# the API server.
|
219
|
-
subprocess.Popen(cmd, shell=True, start_new_session=True)
|
225
|
+
proc = subprocess.Popen(cmd, shell=True, start_new_session=True)
|
220
226
|
|
221
|
-
# Wait for the server to start until timeout.
|
222
|
-
# Conservative upper time bound for starting the server based on
|
223
|
-
# profiling.
|
224
|
-
timeout_sec = 12
|
225
227
|
start_time = time.time()
|
226
228
|
while True:
|
229
|
+
# Check if process has exited
|
230
|
+
if proc.poll() is not None:
|
231
|
+
with ux_utils.print_exception_no_traceback():
|
232
|
+
raise RuntimeError(
|
233
|
+
'SkyPilot API server process exited unexpectedly.\n'
|
234
|
+
f'View logs at: {constants.API_SERVER_LOGS}')
|
227
235
|
api_server_info = get_api_server_status()
|
228
236
|
assert api_server_info.status != ApiServerStatus.VERSION_MISMATCH, (
|
229
237
|
f'API server version mismatch when starting the server. '
|
@@ -231,7 +239,7 @@ def _start_api_server(deploy: bool = False,
|
|
231
239
|
f'Client version: {server_constants.API_VERSION}')
|
232
240
|
if api_server_info.status == ApiServerStatus.HEALTHY:
|
233
241
|
break
|
234
|
-
elif time.time() - start_time >=
|
242
|
+
elif time.time() - start_time >= WAIT_APISERVER_START_TIMEOUT_SEC:
|
235
243
|
with ux_utils.print_exception_no_traceback():
|
236
244
|
raise RuntimeError(
|
237
245
|
'Failed to start SkyPilot API server at '
|
sky/server/requests/executor.py
CHANGED
@@ -465,7 +465,7 @@ def start(deploy: bool) -> List[multiprocessing.Process]:
|
|
465
465
|
target=mp_queue.start_queue_manager, args=(queue_names, port))
|
466
466
|
queue_server.start()
|
467
467
|
sub_procs.append(queue_server)
|
468
|
-
mp_queue.wait_for_queues_to_be_ready(queue_names, port
|
468
|
+
mp_queue.wait_for_queues_to_be_ready(queue_names, queue_server, port)
|
469
469
|
|
470
470
|
logger.info('Request queues created')
|
471
471
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Shared queues for multiprocessing."""
|
2
|
+
import multiprocessing
|
2
3
|
from multiprocessing import managers
|
3
4
|
import queue
|
4
5
|
import time
|
@@ -57,10 +58,13 @@ def get_queue(queue_name: str,
|
|
57
58
|
|
58
59
|
|
59
60
|
def wait_for_queues_to_be_ready(queue_names: List[str],
|
61
|
+
queue_server: multiprocessing.Process,
|
60
62
|
port: int = DEFAULT_QUEUE_MANAGER_PORT) -> None:
|
61
63
|
"""Wait for the queues to be ready after queue manager is just started."""
|
62
64
|
initial_time = time.time()
|
63
|
-
|
65
|
+
# Wait for queue manager to be ready. Exit eagerly if the manager process
|
66
|
+
# exits, just wait for a long timeout that is unlikely to reach otherwise.
|
67
|
+
max_wait_time = 60
|
64
68
|
while queue_names:
|
65
69
|
try:
|
66
70
|
get_queue(queue_names[0], port)
|
@@ -70,6 +74,9 @@ def wait_for_queues_to_be_ready(queue_names: List[str],
|
|
70
74
|
logger.info(f'Waiting for request queue, named {queue_names[0]!r}, '
|
71
75
|
f'to be ready...')
|
72
76
|
time.sleep(0.2)
|
77
|
+
if not queue_server.is_alive():
|
78
|
+
raise RuntimeError(
|
79
|
+
'Queue manager process exited unexpectedly.') from e
|
73
80
|
if time.time() - initial_time > max_wait_time:
|
74
81
|
raise RuntimeError(
|
75
82
|
f'Request queue, named {queue_names[0]!r}, '
|
sky/server/server.py
CHANGED
@@ -1116,7 +1116,7 @@ if __name__ == '__main__':
|
|
1116
1116
|
|
1117
1117
|
sub_procs = []
|
1118
1118
|
try:
|
1119
|
-
sub_procs = executor.start(cmd_args.deploy)
|
1119
|
+
sub_procs = executor.start(deploy=cmd_args.deploy)
|
1120
1120
|
logger.info(f'Starting SkyPilot API server, workers={num_workers}')
|
1121
1121
|
# We don't support reload for now, since it may cause leakage of request
|
1122
1122
|
# workers or interrupt running requests.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20250329
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -230,7 +230,7 @@ SkyPilot **is easy to use for AI users**:
|
|
230
230
|
- Easy job management: queue, run, and auto-recover many jobs
|
231
231
|
|
232
232
|
SkyPilot **unifies multiple clusters, clouds, and hardware**:
|
233
|
-
- One interface to use reserved GPUs, Kubernetes clusters, or
|
233
|
+
- One interface to use reserved GPUs, Kubernetes clusters, or 16+ clouds
|
234
234
|
- [Flexible provisioning](https://docs.skypilot.co/en/latest/examples/auto-failover.html) of GPUs, TPUs, CPUs, with auto-retry
|
235
235
|
- [Team deployment](https://docs.skypilot.co/en/latest/reference/api-server/api-server.html) and resource sharing
|
236
236
|
|
@@ -262,6 +262,7 @@ VMware vSphere, Nebius.
|
|
262
262
|
<p align="center">
|
263
263
|
<img alt="SkyPilot" src="https://raw.githubusercontent.com/skypilot-org/skypilot/master/docs/source/images/cloud-logos-light.png" width=85%>
|
264
264
|
</p>
|
265
|
+
<!-- source xcf file: https://drive.google.com/drive/folders/1S_acjRsAD3T14qMeEnf6FFrIwHu_Gs_f?usp=drive_link -->
|
265
266
|
|
266
267
|
|
267
268
|
## Getting started
|
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=sMgwavp0z5P8piZvV57bnGkBJpsUPU6jlzZUqdzi3ZU,6428
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=ND011K_-Ud1dVZF37A9KrwYir_ihJXcHc7iDWmuBc8Q,22872
|
4
4
|
sky/check.py,sha256=iMGuM7yjUPRgDHc13Pf1_LSybBqIexq-6aXfnVcaI54,15898
|
@@ -51,7 +51,7 @@ sky/clouds/aws.py,sha256=-feXZ72UMUjuZz8dV4qMTjMMTC9pwfkQQT3KJDcIv5A,54633
|
|
51
51
|
sky/clouds/azure.py,sha256=Zpo6ftWz_B30mX7N-An7JVO-8v7aU3f9cw1iH9phvwE,32251
|
52
52
|
sky/clouds/cloud.py,sha256=OoSyFNYtby2Y0h2TpfMB_lEeolIZOQcfKgIn6AvRC68,36694
|
53
53
|
sky/clouds/cudo.py,sha256=_UkLEtwJsfDMKlmJfML5W3rA8VArba4x8YGIdnvgZoM,13226
|
54
|
-
sky/clouds/do.py,sha256=
|
54
|
+
sky/clouds/do.py,sha256=P38l4otp2AuDReUH9Ii621ht9s-NIyb7-R37jbtjHk8,11580
|
55
55
|
sky/clouds/fluidstack.py,sha256=jIqW1MLe55MVME1PATZm8e6_FsiTnJawW7OdytPW0aM,12666
|
56
56
|
sky/clouds/gcp.py,sha256=sUJ9LXUnMxYm6OYZ5P-z1dJHxgVILuC3OW3eFSTNCv8,56919
|
57
57
|
sky/clouds/ibm.py,sha256=XtuPN8QgrwJdb1qb_b-7KwAE2tf_N9wh9eEfi2tcg-s,22013
|
@@ -229,20 +229,20 @@ sky/serve/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
229
229
|
sky/serve/server/core.py,sha256=pRvFadEIH_WTUkTtSmuFoPBP4JFq8Obt68ifi9DWuog,36865
|
230
230
|
sky/serve/server/server.py,sha256=gQGVU9nHYdGbaLhGjIUNIYn4xwKjRASRJkiiTL5AI1Y,3283
|
231
231
|
sky/server/__init__.py,sha256=MPPBqFzXz6Jv5QSk6td_IcvnfXfNErDZVcizu4MLRow,27
|
232
|
-
sky/server/common.py,sha256=
|
232
|
+
sky/server/common.py,sha256=6FQ-2X4AagshuXNxKxjgFRKT_mtgTGh7kG1GOJ2tZIM,19185
|
233
233
|
sky/server/constants.py,sha256=_ZNrxYh8vmgbf3DmkGDduxjvO2y43ZSPTkH5rCNsVjU,770
|
234
|
-
sky/server/server.py,sha256=
|
234
|
+
sky/server/server.py,sha256=b0D1lxZgozC1ny7xDf1dVAMQaF5u386PT2QETa9dmIk,44479
|
235
235
|
sky/server/stream_utils.py,sha256=4JMHgtoXPpCT8JwtqyUcDQ9IdZFir9om0JaCRr8rvbQ,5849
|
236
236
|
sky/server/uvicorn.py,sha256=wajwPHJ3IEEP3GMNOCc0S81-1v2qT5F-ejUkLFVhUzk,2953
|
237
237
|
sky/server/html/log.html,sha256=TSGZktua9Ysl_ysg3w60rjxAxhH61AJnsYDHdtqrjmI,6929
|
238
238
|
sky/server/requests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
239
239
|
sky/server/requests/event_loop.py,sha256=OhpPbuce65bbjpGRlcJa78AVnYSm08SzFKt70ypCUuQ,1211
|
240
|
-
sky/server/requests/executor.py,sha256=
|
240
|
+
sky/server/requests/executor.py,sha256=wW-8s8APEakYRJKNiWIhHg2vrn2UUteseUTsqm9q3Fg,21693
|
241
241
|
sky/server/requests/payloads.py,sha256=K9CzEqRLDdU2R7s3yxPDD2vC-h6beD_RPdSRpjoQzu4,16529
|
242
242
|
sky/server/requests/preconditions.py,sha256=ipxIb_3JXG6S3-ymcOdqQNb7VDvoPqADxu9ZK7-nQWc,7179
|
243
243
|
sky/server/requests/requests.py,sha256=9ovdQE-zv_Mvc6IbGATHVyQlOxSKjg_OankZbgDVGeE,21338
|
244
244
|
sky/server/requests/queues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
245
|
-
sky/server/requests/queues/mp_queue.py,sha256=
|
245
|
+
sky/server/requests/queues/mp_queue.py,sha256=jDqP4Jd28U3ibSFyMR1DF9I2OWZrPZqFJrG5S6RFpyw,3403
|
246
246
|
sky/server/requests/serializers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
247
247
|
sky/server/requests/serializers/decoders.py,sha256=0cpg80uAqkdK_LqcQPkpKswhcNUUztG9luDLm_0eUow,6811
|
248
248
|
sky/server/requests/serializers/encoders.py,sha256=i4SAb5Oyp00CyMkyidbdA9dtxAzxZl40KTpL_x6pH0w,5679
|
@@ -349,9 +349,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
|
|
349
349
|
sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=HPVgNt-wbCVPd9dpDFiA7t2mzQLpjXHJ61eiwRbEr-c,10378
|
350
350
|
sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
|
351
351
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
|
352
|
-
skypilot_nightly-1.0.0.
|
353
|
-
skypilot_nightly-1.0.0.
|
354
|
-
skypilot_nightly-1.0.0.
|
355
|
-
skypilot_nightly-1.0.0.
|
356
|
-
skypilot_nightly-1.0.0.
|
357
|
-
skypilot_nightly-1.0.0.
|
352
|
+
skypilot_nightly-1.0.0.dev20250329.dist-info/licenses/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
353
|
+
skypilot_nightly-1.0.0.dev20250329.dist-info/METADATA,sha256=wsP6fph4IygZSON28HbzRno8ygmvOgFBPxEfuruaPNQ,18771
|
354
|
+
skypilot_nightly-1.0.0.dev20250329.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
355
|
+
skypilot_nightly-1.0.0.dev20250329.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
356
|
+
skypilot_nightly-1.0.0.dev20250329.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
357
|
+
skypilot_nightly-1.0.0.dev20250329.dist-info/RECORD,,
|
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250329.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|