skypilot-nightly 1.0.0.dev20250407__py3-none-any.whl → 1.0.0.dev20250408__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +1 -4
- sky/data/storage_utils.py +10 -3
- sky/server/requests/executor.py +204 -126
- sky/server/requests/process.py +212 -0
- sky/server/requests/queues/local_queue.py +16 -0
- sky/utils/atomic.py +52 -0
- sky/utils/common_utils.py +2 -2
- sky/utils/validator.py +1 -8
- {skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/RECORD +15 -12
- {skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'e0674be528e87191ade88961c44c6449d01232fa'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250408'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -655,12 +655,9 @@ class RayCodeGen:
|
|
655
655
|
rclone_flush_script = {rclone_flush_script!r}
|
656
656
|
if run_fn is not None:
|
657
657
|
script = run_fn({gang_scheduling_id}, gang_scheduling_id_to_ip)
|
658
|
-
if script is not None:
|
659
|
-
script += rclone_flush_script
|
660
|
-
else:
|
661
|
-
script = rclone_flush_script
|
662
658
|
|
663
659
|
if script is not None:
|
660
|
+
script += rclone_flush_script
|
664
661
|
sky_env_vars_dict['{constants.SKYPILOT_NUM_GPUS_PER_NODE}'] = {int(math.ceil(num_gpus))!r}
|
665
662
|
# Backward compatibility: Environment starting with `SKY_` is
|
666
663
|
# deprecated. Remove it in v0.9.0.
|
sky/data/storage_utils.py
CHANGED
@@ -227,6 +227,9 @@ def get_excluded_files(src_dir_path: str) -> List[str]:
|
|
227
227
|
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
228
228
|
skyignore_path = os.path.join(expand_src_dir_path,
|
229
229
|
constants.SKY_IGNORE_FILE)
|
230
|
+
# Fail fast if the source is a file.
|
231
|
+
if os.path.isfile(expand_src_dir_path):
|
232
|
+
raise ValueError(f'{src_dir_path} is a file, not a directory.')
|
230
233
|
if os.path.exists(skyignore_path):
|
231
234
|
logger.debug(f' {colorama.Style.DIM}'
|
232
235
|
f'Excluded files to sync to cluster based on '
|
@@ -267,11 +270,15 @@ def zip_files_and_folders(items: List[str],
|
|
267
270
|
item = os.path.expanduser(item)
|
268
271
|
if not os.path.isfile(item) and not os.path.isdir(item):
|
269
272
|
raise ValueError(f'{item} does not exist.')
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
+
if os.path.isfile(item):
|
274
|
+
# Add the file to the zip archive even if it matches
|
275
|
+
# patterns in dot ignore files, as it was explicitly
|
276
|
+
# specified by user.
|
273
277
|
zipf.write(item)
|
274
278
|
elif os.path.isdir(item):
|
279
|
+
excluded_files = set([
|
280
|
+
os.path.join(item, f) for f in get_excluded_files(item)
|
281
|
+
])
|
275
282
|
for root, dirs, files in os.walk(item, followlinks=False):
|
276
283
|
# Modify dirs in-place to control os.walk()'s traversal
|
277
284
|
# behavior. This filters out excluded directories BEFORE
|
sky/server/requests/executor.py
CHANGED
@@ -18,9 +18,7 @@ The number of the workers is determined by the system resources.
|
|
18
18
|
|
19
19
|
See the [README.md](../README.md) for detailed architecture of the executor.
|
20
20
|
"""
|
21
|
-
import concurrent.futures
|
22
21
|
import contextlib
|
23
|
-
import dataclasses
|
24
22
|
import enum
|
25
23
|
import multiprocessing
|
26
24
|
import os
|
@@ -42,7 +40,9 @@ from sky.server import common as server_common
|
|
42
40
|
from sky.server import constants as server_constants
|
43
41
|
from sky.server.requests import payloads
|
44
42
|
from sky.server.requests import preconditions
|
43
|
+
from sky.server.requests import process
|
45
44
|
from sky.server.requests import requests as api_requests
|
45
|
+
from sky.server.requests.queues import local_queue
|
46
46
|
from sky.server.requests.queues import mp_queue
|
47
47
|
from sky.skylet import constants
|
48
48
|
from sky.utils import annotations
|
@@ -101,22 +101,23 @@ _MIN_LONG_WORKERS = 1
|
|
101
101
|
# workers so at least 2 workers are needed to ensure responsiveness.
|
102
102
|
_MIN_SHORT_WORKERS = 2
|
103
103
|
|
104
|
+
# Default number of burstable workers for local API server. A heuristic number
|
105
|
+
# that is large enough for most local cases.
|
106
|
+
# TODO(aylei): the number of burstable workers should be auto-tuned based on the
|
107
|
+
# system usage stats.
|
108
|
+
_BURSTABLE_WORKERS_FOR_LOCAL = 1024
|
109
|
+
|
104
110
|
|
105
111
|
class QueueBackend(enum.Enum):
|
112
|
+
# Local queue backend serves queues in each process locally, which has
|
113
|
+
# lower resource usage but the consumer must be in the same process, i.e.
|
114
|
+
# this only works in single-process mode.
|
115
|
+
LOCAL = 'local'
|
116
|
+
# Multi-process queue backend starts a dedicated process for serving queues.
|
106
117
|
MULTIPROCESSING = 'multiprocessing'
|
107
118
|
# TODO(zhwu): we can add redis backend in the future.
|
108
119
|
|
109
120
|
|
110
|
-
@dataclasses.dataclass
|
111
|
-
class RequestWorker:
|
112
|
-
id: int
|
113
|
-
# The type of queue this worker works on.
|
114
|
-
schedule_type: api_requests.ScheduleType
|
115
|
-
|
116
|
-
def __str__(self) -> str:
|
117
|
-
return f'Worker(id={self.id}, schedule_type={self.schedule_type.value})'
|
118
|
-
|
119
|
-
|
120
121
|
class RequestQueue:
|
121
122
|
"""The queue for the requests, either redis or multiprocessing.
|
122
123
|
|
@@ -128,9 +129,12 @@ class RequestQueue:
|
|
128
129
|
backend: Optional[QueueBackend] = None) -> None:
|
129
130
|
self.name = schedule_type.value
|
130
131
|
self.backend = backend
|
131
|
-
|
132
|
-
|
133
|
-
|
132
|
+
if backend == QueueBackend.MULTIPROCESSING:
|
133
|
+
self.queue = mp_queue.get_queue(self.name)
|
134
|
+
elif backend == QueueBackend.LOCAL:
|
135
|
+
self.queue = local_queue.get_queue(self.name)
|
136
|
+
else:
|
137
|
+
raise RuntimeError(f'Invalid queue backend: {backend}')
|
134
138
|
|
135
139
|
def put(self, request: Tuple[str, bool]) -> None:
|
136
140
|
"""Put and request to the queue.
|
@@ -161,6 +165,104 @@ class RequestQueue:
|
|
161
165
|
queue_backend = QueueBackend.MULTIPROCESSING
|
162
166
|
|
163
167
|
|
168
|
+
def executor_initializer(proc_group: str):
|
169
|
+
setproctitle.setproctitle(f'SkyPilot:executor:{proc_group}:'
|
170
|
+
f'{multiprocessing.current_process().pid}')
|
171
|
+
|
172
|
+
|
173
|
+
class RequestWorker:
|
174
|
+
"""A worker that polls requests from the queue and runs them.
|
175
|
+
|
176
|
+
The worker can run at least `garanteed_parallelism` requests in parallel.
|
177
|
+
If there are more resources available, it can spin up extra workers up to
|
178
|
+
`garanteed_parallelism + burstable_parallelism`.
|
179
|
+
"""
|
180
|
+
|
181
|
+
# The type of queue this worker works on.
|
182
|
+
schedule_type: api_requests.ScheduleType
|
183
|
+
# The least number of requests that this worker can run in parallel.
|
184
|
+
garanteed_parallelism: int
|
185
|
+
# The extra number of requests that this worker can run in parallel
|
186
|
+
# if there are available CPU/memory resources.
|
187
|
+
burstable_parallelism: int = 0
|
188
|
+
|
189
|
+
def __init__(self,
|
190
|
+
schedule_type: api_requests.ScheduleType,
|
191
|
+
garanteed_parallelism: int,
|
192
|
+
burstable_parallelism: int = 0) -> None:
|
193
|
+
self.schedule_type = schedule_type
|
194
|
+
self.garanteed_parallelism = garanteed_parallelism
|
195
|
+
self.burstable_parallelism = burstable_parallelism
|
196
|
+
|
197
|
+
def __str__(self) -> str:
|
198
|
+
return f'Worker(schedule_type={self.schedule_type.value})'
|
199
|
+
|
200
|
+
def process_request(self, executor: process.BurstableExecutor,
|
201
|
+
queue: RequestQueue) -> None:
|
202
|
+
try:
|
203
|
+
request_element = queue.get()
|
204
|
+
if request_element is None:
|
205
|
+
time.sleep(0.1)
|
206
|
+
return
|
207
|
+
request_id, ignore_return_value = request_element
|
208
|
+
request = api_requests.get_request(request_id)
|
209
|
+
assert request is not None, f'Request with ID {request_id} is None'
|
210
|
+
if request.status == api_requests.RequestStatus.CANCELLED:
|
211
|
+
return
|
212
|
+
logger.info(f'[{self}] Submitting request: {request_id}')
|
213
|
+
# Start additional process to run the request, so that it can be
|
214
|
+
# cancelled when requested by a user.
|
215
|
+
# TODO(zhwu): since the executor is reusing the request process,
|
216
|
+
# multiple requests can share the same process pid, which may cause
|
217
|
+
# issues with SkyPilot core functions if they rely on the exit of
|
218
|
+
# the process, such as subprocess_daemon.py.
|
219
|
+
executor.submit_until_success(_request_execution_wrapper,
|
220
|
+
request_id, ignore_return_value)
|
221
|
+
|
222
|
+
logger.info(f'[{self}] Submitted request: {request_id}')
|
223
|
+
except (Exception, SystemExit) as e: # pylint: disable=broad-except
|
224
|
+
# Catch any other exceptions to avoid crashing the worker process.
|
225
|
+
logger.error(
|
226
|
+
f'[{self}] Error processing request: '
|
227
|
+
f'{request_id if "request_id" in locals() else ""} '
|
228
|
+
f'{common_utils.format_exception(e, use_bracket=True)}')
|
229
|
+
|
230
|
+
def run(self) -> None:
|
231
|
+
# Handle the SIGTERM signal to abort the executor process gracefully.
|
232
|
+
proc_group = f'{self.schedule_type.value}'
|
233
|
+
if threading.current_thread() is threading.main_thread():
|
234
|
+
signal.signal(signal.SIGTERM, _sigterm_handler)
|
235
|
+
setproctitle.setproctitle(f'SkyPilot:worker:{proc_group}')
|
236
|
+
queue = _get_queue(self.schedule_type)
|
237
|
+
|
238
|
+
# Use concurrent.futures.ProcessPoolExecutor instead of
|
239
|
+
# multiprocessing.Pool because the former is more efficient with the
|
240
|
+
# support of lazy creation of worker processes.
|
241
|
+
# We use executor instead of individual multiprocessing.Process to avoid
|
242
|
+
# the overhead of forking a new process for each request, which can be
|
243
|
+
# about 1s delay.
|
244
|
+
try:
|
245
|
+
executor = process.BurstableExecutor(
|
246
|
+
garanteed_workers=self.garanteed_parallelism,
|
247
|
+
burst_workers=self.burstable_parallelism,
|
248
|
+
initializer=executor_initializer,
|
249
|
+
initargs=(proc_group,))
|
250
|
+
while True:
|
251
|
+
self.process_request(executor, queue)
|
252
|
+
# TODO(aylei): better to distinct between KeyboardInterrupt and SIGTERM.
|
253
|
+
except KeyboardInterrupt:
|
254
|
+
pass
|
255
|
+
finally:
|
256
|
+
# In most cases, here we receive either ctrl-c in foreground
|
257
|
+
# execution or SIGTERM on server exiting. Gracefully exit the
|
258
|
+
# worker process and the executor.
|
259
|
+
# TODO(aylei): worker may also be killed by system daemons like
|
260
|
+
# OOM killer, crash the API server or recreate the worker process
|
261
|
+
# to avoid broken state in such cases.
|
262
|
+
logger.info(f'[{self}] Worker process interrupted')
|
263
|
+
executor.shutdown()
|
264
|
+
|
265
|
+
|
164
266
|
@annotations.lru_cache(scope='global', maxsize=None)
|
165
267
|
def _get_queue(schedule_type: api_requests.ScheduleType) -> RequestQueue:
|
166
268
|
return RequestQueue(schedule_type, backend=queue_backend)
|
@@ -349,110 +451,77 @@ def schedule_request(
|
|
349
451
|
enqueue()
|
350
452
|
|
351
453
|
|
352
|
-
def executor_initializer(proc_group: str):
|
353
|
-
setproctitle.setproctitle(f'SkyPilot:executor:{proc_group}:'
|
354
|
-
f'{multiprocessing.current_process().pid}')
|
355
|
-
|
356
|
-
|
357
|
-
def request_worker(worker: RequestWorker, max_parallel_size: int) -> None:
|
358
|
-
"""Worker for the requests.
|
359
|
-
|
360
|
-
Args:
|
361
|
-
max_parallel_size: Maximum number of parallel jobs this worker can run.
|
362
|
-
"""
|
363
|
-
# Handle the SIGTERM signal to abort the executor process gracefully.
|
364
|
-
signal.signal(signal.SIGTERM, _sigterm_handler)
|
365
|
-
proc_group = f'{worker.schedule_type.value}-{worker.id}'
|
366
|
-
setproctitle.setproctitle(f'SkyPilot:worker:{proc_group}')
|
367
|
-
queue = _get_queue(worker.schedule_type)
|
368
|
-
|
369
|
-
def process_request(executor: concurrent.futures.ProcessPoolExecutor):
|
370
|
-
try:
|
371
|
-
request_element = queue.get()
|
372
|
-
if request_element is None:
|
373
|
-
time.sleep(0.1)
|
374
|
-
return
|
375
|
-
request_id, ignore_return_value = request_element
|
376
|
-
request = api_requests.get_request(request_id)
|
377
|
-
assert request is not None, f'Request with ID {request_id} is None'
|
378
|
-
if request.status == api_requests.RequestStatus.CANCELLED:
|
379
|
-
return
|
380
|
-
logger.info(f'[{worker}] Submitting request: {request_id}')
|
381
|
-
# Start additional process to run the request, so that it can be
|
382
|
-
# cancelled when requested by a user.
|
383
|
-
# TODO(zhwu): since the executor is reusing the request process,
|
384
|
-
# multiple requests can share the same process pid, which may cause
|
385
|
-
# issues with SkyPilot core functions if they rely on the exit of
|
386
|
-
# the process, such as subprocess_daemon.py.
|
387
|
-
future = executor.submit(_request_execution_wrapper, request_id,
|
388
|
-
ignore_return_value)
|
389
|
-
|
390
|
-
if worker.schedule_type == api_requests.ScheduleType.LONG:
|
391
|
-
try:
|
392
|
-
future.result(timeout=None)
|
393
|
-
except Exception as e: # pylint: disable=broad-except
|
394
|
-
logger.error(f'[{worker}] Request {request_id} failed: {e}')
|
395
|
-
logger.info(f'[{worker}] Finished request: {request_id}')
|
396
|
-
else:
|
397
|
-
logger.info(f'[{worker}] Submitted request: {request_id}')
|
398
|
-
except (Exception, SystemExit) as e: # pylint: disable=broad-except
|
399
|
-
# Catch any other exceptions to avoid crashing the worker process.
|
400
|
-
logger.error(
|
401
|
-
f'[{worker}] Error processing request: '
|
402
|
-
f'{request_id if "request_id" in locals() else ""} '
|
403
|
-
f'{common_utils.format_exception(e, use_bracket=True)}')
|
404
|
-
|
405
|
-
# Use concurrent.futures.ProcessPoolExecutor instead of multiprocessing.Pool
|
406
|
-
# because the former is more efficient with the support of lazy creation of
|
407
|
-
# worker processes.
|
408
|
-
# We use executor instead of individual multiprocessing.Process to avoid
|
409
|
-
# the overhead of forking a new process for each request, which can be about
|
410
|
-
# 1s delay.
|
411
|
-
try:
|
412
|
-
executor = concurrent.futures.ProcessPoolExecutor(
|
413
|
-
max_workers=max_parallel_size,
|
414
|
-
initializer=executor_initializer,
|
415
|
-
initargs=(proc_group,))
|
416
|
-
while True:
|
417
|
-
process_request(executor)
|
418
|
-
# TODO(aylei): better to distinct between KeyboardInterrupt and SIGTERM.
|
419
|
-
except KeyboardInterrupt:
|
420
|
-
pass
|
421
|
-
finally:
|
422
|
-
# In most cases, here we receive either ctrl-c in foreground execution
|
423
|
-
# or SIGTERM on server exiting. Gracefully exit the worker process and
|
424
|
-
# the executor.
|
425
|
-
# TODO(aylei): worker may also be killed by system daemons like OOM
|
426
|
-
# killer, crash the API server or recreate the worker process to avoid
|
427
|
-
# broken state in such cases.
|
428
|
-
logger.info(f'[{worker}] Worker process interrupted')
|
429
|
-
executor_processes = list(executor._processes.values()) # pylint: disable=protected-access,line-too-long
|
430
|
-
# Shutdown the executor so that executor process can exit once the
|
431
|
-
# running task is finished or interrupted.
|
432
|
-
executor.shutdown(wait=False)
|
433
|
-
# Proactively interrupt the running task to avoid indefinite waiting.
|
434
|
-
subprocess_utils.run_in_parallel(
|
435
|
-
subprocess_utils.kill_process_with_grace_period,
|
436
|
-
executor_processes,
|
437
|
-
num_threads=len(executor_processes))
|
438
|
-
|
439
|
-
|
440
454
|
def start(deploy: bool) -> List[multiprocessing.Process]:
|
441
|
-
"""Start the request workers.
|
455
|
+
"""Start the request workers.
|
456
|
+
|
457
|
+
Request workers run in background, schedule the requests and delegate the
|
458
|
+
request execution to executor processes. We have different assumptions for
|
459
|
+
the resources in different deployment modes, which leads to different
|
460
|
+
worker setups:
|
461
|
+
|
462
|
+
- Deployment mode (deploy=True), we assume the resources are dedicated to
|
463
|
+
the API server and the resources will be tuned for serious use cases, so:
|
464
|
+
- Use multiprocessing queue backend and dedicated workers processes to
|
465
|
+
avoid GIL contention.
|
466
|
+
- Parallelism (number of executor processes) is fixed and executor
|
467
|
+
processes have same lifecycle with the server, which ensures
|
468
|
+
best-effort cache reusing and stable resources consumption.
|
469
|
+
- Reject to start in low resource environments, to avoid flaky
|
470
|
+
deployments.
|
471
|
+
- Local mode (deploy=False), we assume the server is running in a shared
|
472
|
+
environment (e.g. laptop) and users typically do not pay attention to
|
473
|
+
the resource setup of the server. Moreover, existing users may expect
|
474
|
+
some consistent behaviors with old versions, i.e. before API server was
|
475
|
+
introduced, so:
|
476
|
+
- The max number of long-running executor processes are limited, to avoid
|
477
|
+
high memory consumption when the server is idle.
|
478
|
+
- Allow burstable workers to handle requests when all long-running
|
479
|
+
workers are busy, which mimics the behavior of local sky CLI before
|
480
|
+
API server was introduced.
|
481
|
+
- Works in low resources environments, and further reduce the memory
|
482
|
+
consumption in low resource environments.
|
483
|
+
|
484
|
+
Note that there is still significant overhead for SDK users when migrate to
|
485
|
+
local API server. Since the users are free to run sky operations in Threads
|
486
|
+
when using SDK but all client operations will occupy at least one worker
|
487
|
+
process after API server was introduced.
|
488
|
+
"""
|
442
489
|
# Determine the job capacity of the workers based on the system resources.
|
443
490
|
cpu_count = common_utils.get_cpu_count()
|
444
491
|
mem_size_gb = common_utils.get_mem_size_gb()
|
445
492
|
mem_size_gb = max(0, mem_size_gb - server_constants.MIN_AVAIL_MEM_GB)
|
493
|
+
# Runs in low resource mode if the available memory is less than
|
494
|
+
# server_constants.MIN_AVAIL_MEM_GB.
|
446
495
|
max_parallel_for_long = _max_long_worker_parallism(cpu_count,
|
447
496
|
mem_size_gb,
|
448
497
|
local=not deploy)
|
449
498
|
max_parallel_for_short = _max_short_worker_parallism(
|
450
499
|
mem_size_gb, max_parallel_for_long)
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
500
|
+
if mem_size_gb < server_constants.MIN_AVAIL_MEM_GB:
|
501
|
+
# Permanent worker process may have significant memory consumption
|
502
|
+
# (~350MB per worker) after running commands like `sky check`, so we
|
503
|
+
# don't start any permanent workers in low resource local mode. This
|
504
|
+
# mimics the behavior of local sky CLI before API server was
|
505
|
+
# introduced, where the CLI will start new process everytime and
|
506
|
+
# never reject to start due to resource constraints.
|
507
|
+
# Note that the refresh daemon will still occupy one worker
|
508
|
+
# permanently because it never exits.
|
509
|
+
max_parallel_for_long = 0
|
510
|
+
max_parallel_for_short = 0
|
511
|
+
logger.warning(
|
512
|
+
'SkyPilot API server will run in low resource mode because '
|
513
|
+
'the available memory is less than '
|
514
|
+
f'{server_constants.MIN_AVAIL_MEM_GB}GB.')
|
515
|
+
else:
|
516
|
+
logger.info(
|
517
|
+
f'SkyPilot API server will start {max_parallel_for_long} workers '
|
518
|
+
f'for long requests and will allow at max '
|
519
|
+
f'{max_parallel_for_short} short requests in parallel.')
|
520
|
+
if not deploy:
|
521
|
+
# For local mode, use local queue backend since we only run 1 uvicorn
|
522
|
+
# worker in local mode.
|
523
|
+
global queue_backend
|
524
|
+
queue_backend = QueueBackend.LOCAL
|
456
525
|
sub_procs = []
|
457
526
|
# Setup the queues.
|
458
527
|
if queue_backend == QueueBackend.MULTIPROCESSING:
|
@@ -471,28 +540,37 @@ def start(deploy: bool) -> List[multiprocessing.Process]:
|
|
471
540
|
target=mp_queue.start_queue_manager, args=(queue_names, port))
|
472
541
|
queue_server.start()
|
473
542
|
sub_procs.append(queue_server)
|
474
|
-
mp_queue.wait_for_queues_to_be_ready(queue_names,
|
543
|
+
mp_queue.wait_for_queues_to_be_ready(queue_names,
|
544
|
+
queue_server,
|
545
|
+
port=port)
|
546
|
+
elif queue_backend == QueueBackend.LOCAL:
|
547
|
+
# No setup is needed for local queue backend.
|
548
|
+
pass
|
549
|
+
else:
|
550
|
+
# Should be checked earlier, but just in case.
|
551
|
+
raise RuntimeError(f'Invalid queue backend: {queue_backend}')
|
475
552
|
|
476
553
|
logger.info('Request queues created')
|
477
554
|
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
555
|
+
def run_worker_in_background(worker: RequestWorker):
|
556
|
+
# Thread dispatcher is sufficient for current scale, refer to
|
557
|
+
# tests/load_tests/test_queue_dispatcher.py for more details.
|
558
|
+
# Use daemon thread for automatic cleanup.
|
559
|
+
thread = threading.Thread(target=worker.run, daemon=True)
|
560
|
+
thread.start()
|
561
|
+
|
562
|
+
burstable_parallelism = _BURSTABLE_WORKERS_FOR_LOCAL if not deploy else 0
|
563
|
+
# Start a worker for long requests.
|
564
|
+
long_worker = RequestWorker(schedule_type=api_requests.ScheduleType.LONG,
|
565
|
+
garanteed_parallelism=max_parallel_for_long,
|
566
|
+
burstable_parallelism=burstable_parallelism)
|
567
|
+
run_worker_in_background(long_worker)
|
489
568
|
|
490
569
|
# Start a worker for short requests.
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
sub_procs.append(worker_proc)
|
570
|
+
short_worker = RequestWorker(schedule_type=api_requests.ScheduleType.SHORT,
|
571
|
+
garanteed_parallelism=max_parallel_for_short,
|
572
|
+
burstable_parallelism=burstable_parallelism)
|
573
|
+
run_worker_in_background(short_worker)
|
496
574
|
return sub_procs
|
497
575
|
|
498
576
|
|
@@ -0,0 +1,212 @@
|
|
1
|
+
"""ProcessPoolExecutor with additional supports for skypilot."""
|
2
|
+
import concurrent.futures
|
3
|
+
import logging
|
4
|
+
import multiprocessing
|
5
|
+
import threading
|
6
|
+
import time
|
7
|
+
from typing import Callable, Dict, Optional, Tuple
|
8
|
+
|
9
|
+
from sky.utils import atomic
|
10
|
+
from sky.utils import subprocess_utils
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class PoolExecutor(concurrent.futures.ProcessPoolExecutor):
|
16
|
+
"""A custom ProcessPoolExecutor with additional supports for skypilot.
|
17
|
+
|
18
|
+
The additional supports include:
|
19
|
+
1. Disposable workers: support control whether the worker process should
|
20
|
+
exit after complete a task.
|
21
|
+
2. Idle check: support check if there are any idle workers.
|
22
|
+
3. Proactive shutdown: SIGTERM worker processes when the executor is
|
23
|
+
shutting down instead of indefinitely waiting.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(self, max_workers: int, **kwargs):
|
27
|
+
super().__init__(max_workers=max_workers, **kwargs)
|
28
|
+
self.max_workers: int = max_workers
|
29
|
+
# The number of workers that are handling tasks, atomicity across
|
30
|
+
# multiple threads is sufficient since the idleness check is
|
31
|
+
# best-effort and does not affect the correctness.
|
32
|
+
# E.g. the following case is totally fine:
|
33
|
+
# 1. Thread 1 checks running == max_workers
|
34
|
+
# 2. Thread 2 decrements running
|
35
|
+
# 3. Thread 1 schedules the task to other pool even if the pool is
|
36
|
+
# currently idle.
|
37
|
+
self.running: atomic.AtomicInt = atomic.AtomicInt(0)
|
38
|
+
|
39
|
+
def submit(self, fn, *args, **kwargs) -> concurrent.futures.Future:
|
40
|
+
"""Submit a task for execution.
|
41
|
+
|
42
|
+
If reuse_worker is False, wraps the function to exit after completion.
|
43
|
+
"""
|
44
|
+
self.running.increment()
|
45
|
+
future = super().submit(fn, *args, **kwargs)
|
46
|
+
future.add_done_callback(lambda _: self.running.decrement())
|
47
|
+
return future
|
48
|
+
|
49
|
+
def has_idle_workers(self) -> bool:
|
50
|
+
"""Check if there are any idle workers."""
|
51
|
+
return self.running.get() < self.max_workers
|
52
|
+
|
53
|
+
def shutdown(self, wait: bool = True) -> None:
|
54
|
+
"""Shutdown the executor."""
|
55
|
+
# Here wait means wait for the proactive cancellation complete.
|
56
|
+
# TODO(aylei): we may support wait=True in the future if needed.
|
57
|
+
assert wait is True, 'wait=False is not supported'
|
58
|
+
executor_processes = list(self._processes.values())
|
59
|
+
# Shutdown the executor so that executor process can exit once the
|
60
|
+
# running task is finished or interrupted.
|
61
|
+
super().shutdown(wait=False)
|
62
|
+
# Proactively interrupt the running task to avoid indefinite waiting.
|
63
|
+
subprocess_utils.run_in_parallel(
|
64
|
+
subprocess_utils.kill_process_with_grace_period,
|
65
|
+
executor_processes,
|
66
|
+
num_threads=len(executor_processes))
|
67
|
+
|
68
|
+
|
69
|
+
# Define the worker function outside of the class to avoid pickling self
|
70
|
+
def _disposable_worker(fn, initializer: Optional[Callable], initargs: Tuple,
|
71
|
+
args, kwargs):
|
72
|
+
try:
|
73
|
+
if initializer is not None:
|
74
|
+
initializer(*initargs)
|
75
|
+
fn(*args, **kwargs)
|
76
|
+
except BaseException as e: # pylint: disable=broad-except
|
77
|
+
return e
|
78
|
+
|
79
|
+
|
80
|
+
class DisposableExecutor:
|
81
|
+
"""A simple wrapper that creates a new process for each task.
|
82
|
+
|
83
|
+
This is a workaround for Python 3.10 since `max_tasks_per_child` of
|
84
|
+
ProcessPoolExecutor was introduced in 3.11. There is no way to control
|
85
|
+
the worker lifetime in 3.10.
|
86
|
+
Ref: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ProcessPoolExecutor # pylint: disable=line-too-long
|
87
|
+
TODO(aylei): use the official `max_tasks_per_child` when upgrade to 3.11
|
88
|
+
"""
|
89
|
+
|
90
|
+
def __init__(self,
|
91
|
+
max_workers: Optional[int] = None,
|
92
|
+
initializer: Optional[Callable] = None,
|
93
|
+
initargs: Tuple = ()):
|
94
|
+
self.max_workers: Optional[int] = max_workers
|
95
|
+
self.workers: Dict[int, multiprocessing.Process] = {}
|
96
|
+
self._shutdown: bool = False
|
97
|
+
self._lock: threading.Lock = threading.Lock()
|
98
|
+
self._initializer: Optional[Callable] = initializer
|
99
|
+
self._initargs: Tuple = initargs
|
100
|
+
|
101
|
+
def _monitor_worker(self, process: multiprocessing.Process) -> None:
|
102
|
+
"""Monitor the worker process and cleanup when it's done."""
|
103
|
+
process.join()
|
104
|
+
if process.pid:
|
105
|
+
with self._lock:
|
106
|
+
if process.pid in self.workers:
|
107
|
+
del self.workers[process.pid]
|
108
|
+
|
109
|
+
# Submit is not compatible with ProcessPoolExecutor because we does not
|
110
|
+
# bother to return a Future. Can be improved if needed.
|
111
|
+
def submit(self, fn, *args, **kwargs) -> bool:
|
112
|
+
"""Submit a task for execution."""
|
113
|
+
if self._shutdown:
|
114
|
+
return False
|
115
|
+
with self._lock:
|
116
|
+
if (self.max_workers is not None and
|
117
|
+
len(self.workers) >= self.max_workers):
|
118
|
+
return False
|
119
|
+
|
120
|
+
process = multiprocessing.Process(target=_disposable_worker,
|
121
|
+
args=(fn, self._initializer,
|
122
|
+
self._initargs, args, kwargs))
|
123
|
+
process.start()
|
124
|
+
|
125
|
+
with self._lock:
|
126
|
+
pid = process.pid or 0
|
127
|
+
if pid == 0:
|
128
|
+
raise RuntimeError('Failed to start process')
|
129
|
+
self.workers[pid] = process
|
130
|
+
|
131
|
+
# Start monitor thread to cleanup the worker process when it's done.
|
132
|
+
monitor_thread = threading.Thread(target=self._monitor_worker,
|
133
|
+
args=(process,),
|
134
|
+
daemon=True)
|
135
|
+
monitor_thread.start()
|
136
|
+
|
137
|
+
return True
|
138
|
+
|
139
|
+
def has_idle_workers(self) -> bool:
|
140
|
+
"""Check if there are any idle workers."""
|
141
|
+
if self.max_workers is None:
|
142
|
+
return True
|
143
|
+
with self._lock:
|
144
|
+
return len(self.workers) < self.max_workers
|
145
|
+
|
146
|
+
def shutdown(self):
|
147
|
+
"""Shutdown the executor."""
|
148
|
+
with self._lock:
|
149
|
+
self._shutdown = True
|
150
|
+
subprocess_utils.run_in_parallel(
|
151
|
+
subprocess_utils.kill_process_with_grace_period,
|
152
|
+
list(self.workers.values()), # Convert dict values to list
|
153
|
+
num_threads=len(self.workers))
|
154
|
+
|
155
|
+
|
156
|
+
class BurstableExecutor:
|
157
|
+
"""An multiprocessing executor that supports bursting worker processes."""
|
158
|
+
|
159
|
+
# _executor is a PoolExecutor that is used to run guaranteed requests.
|
160
|
+
_executor: Optional[PoolExecutor] = None
|
161
|
+
# _burst_executor is a ProcessPoolExecutor that is used to run burst
|
162
|
+
# requests.
|
163
|
+
_burst_executor: Optional[DisposableExecutor] = None
|
164
|
+
|
165
|
+
def __init__(self,
|
166
|
+
garanteed_workers: int,
|
167
|
+
burst_workers: int = 0,
|
168
|
+
**kwargs):
|
169
|
+
if garanteed_workers > 0:
|
170
|
+
self._executor = PoolExecutor(max_workers=garanteed_workers,
|
171
|
+
**kwargs)
|
172
|
+
if burst_workers > 0:
|
173
|
+
self._burst_executor = DisposableExecutor(max_workers=burst_workers,
|
174
|
+
**kwargs)
|
175
|
+
|
176
|
+
def submit_until_success(self, fn, *args, **kwargs):
|
177
|
+
"""Submit a task for execution until success.
|
178
|
+
|
179
|
+
Prioritizes submitting to the guaranteed pool. If no idle workers
|
180
|
+
are available in the guaranteed pool, it will submit to the burst
|
181
|
+
pool.
|
182
|
+
TODO(aylei): this is coupled with executor.RequestWorker since we
|
183
|
+
know the worker is dedicated to request scheduling and it either
|
184
|
+
blocks on request polling or request submitting. So it is no harm
|
185
|
+
to make submit blocking here. But for general cases, we need an
|
186
|
+
internal queue to decouple submit and run.
|
187
|
+
"""
|
188
|
+
|
189
|
+
while True:
|
190
|
+
if self._executor is not None and self._executor.has_idle_workers():
|
191
|
+
self._executor.submit(fn, *args, **kwargs)
|
192
|
+
break
|
193
|
+
if (self._burst_executor is not None and
|
194
|
+
self._burst_executor.has_idle_workers()):
|
195
|
+
self._burst_executor.submit(fn, *args, **kwargs)
|
196
|
+
break
|
197
|
+
if self._executor is not None:
|
198
|
+
# No idle workers in either pool, still queue the request
|
199
|
+
# to the guaranteed pool to keep behavior consistent.
|
200
|
+
self._executor.submit(fn, *args, **kwargs)
|
201
|
+
break
|
202
|
+
logger.debug('No guaranteed pool set and the burst pool is full, '
|
203
|
+
'retry later.')
|
204
|
+
time.sleep(0.1)
|
205
|
+
|
206
|
+
def shutdown(self) -> None:
|
207
|
+
"""Shutdown the executor."""
|
208
|
+
|
209
|
+
if self._burst_executor is not None:
|
210
|
+
self._burst_executor.shutdown()
|
211
|
+
if self._executor is not None:
|
212
|
+
self._executor.shutdown(wait=True)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
"""Process-local queue implementation."""
|
2
|
+
import queue
|
3
|
+
import threading
|
4
|
+
from typing import Dict
|
5
|
+
|
6
|
+
# Global dict to store queues
|
7
|
+
_queues: Dict[str, queue.Queue] = {}
|
8
|
+
_lock = threading.Lock()
|
9
|
+
|
10
|
+
|
11
|
+
def get_queue(queue_name: str) -> queue.Queue:
|
12
|
+
"""Get or create a queue by name."""
|
13
|
+
with _lock:
|
14
|
+
if queue_name not in _queues:
|
15
|
+
_queues[queue_name] = queue.Queue()
|
16
|
+
return _queues[queue_name]
|
sky/utils/atomic.py
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
"""Atomic structures and utilties."""
|
2
|
+
|
3
|
+
import threading
|
4
|
+
|
5
|
+
|
6
|
+
class AtomicInt:
|
7
|
+
"""A thread-safe atomic integer implementation."""
|
8
|
+
|
9
|
+
def __init__(self, initial_value: int = 0):
|
10
|
+
self._value = initial_value
|
11
|
+
self._lock = threading.Lock()
|
12
|
+
|
13
|
+
def get(self) -> int:
|
14
|
+
"""Get the current value atomically.
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
The current integer value.
|
18
|
+
"""
|
19
|
+
with self._lock:
|
20
|
+
return self._value
|
21
|
+
|
22
|
+
def increment(self, delta: int = 1) -> int:
|
23
|
+
"""Atomically increment by delta and return new value.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
delta: Amount to increment by (default: 1)
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
The new value after incrementing.
|
30
|
+
"""
|
31
|
+
with self._lock:
|
32
|
+
self._value += delta
|
33
|
+
return self._value
|
34
|
+
|
35
|
+
def decrement(self, delta: int = 1) -> int:
|
36
|
+
"""Atomically decrement by delta and return new value.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
delta: Amount to decrement by (default: 1)
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
The new value after decrementing.
|
43
|
+
"""
|
44
|
+
with self._lock:
|
45
|
+
self._value -= delta
|
46
|
+
return self._value
|
47
|
+
|
48
|
+
def __str__(self) -> str:
|
49
|
+
return str(self.get())
|
50
|
+
|
51
|
+
def __repr__(self) -> str:
|
52
|
+
return f'AtomicInt({self.get()})'
|
sky/utils/common_utils.py
CHANGED
@@ -17,6 +17,8 @@ import typing
|
|
17
17
|
from typing import Any, Callable, Dict, List, Optional, Union
|
18
18
|
import uuid
|
19
19
|
|
20
|
+
import jsonschema
|
21
|
+
|
20
22
|
from sky import exceptions
|
21
23
|
from sky import sky_logging
|
22
24
|
from sky.adaptors import common as adaptors_common
|
@@ -28,12 +30,10 @@ from sky.utils import validator
|
|
28
30
|
|
29
31
|
if typing.TYPE_CHECKING:
|
30
32
|
import jinja2
|
31
|
-
import jsonschema
|
32
33
|
import psutil
|
33
34
|
import yaml
|
34
35
|
else:
|
35
36
|
jinja2 = adaptors_common.LazyImport('jinja2')
|
36
|
-
jsonschema = adaptors_common.LazyImport('jsonschema')
|
37
37
|
psutil = adaptors_common.LazyImport('psutil')
|
38
38
|
yaml = adaptors_common.LazyImport('yaml')
|
39
39
|
|
sky/utils/validator.py
CHANGED
@@ -4,14 +4,7 @@ The main motivation behind extending the existing JSON Schema validator is to
|
|
4
4
|
allow for case-insensitive enum matching since this is currently not supported
|
5
5
|
by the JSON Schema specification.
|
6
6
|
"""
|
7
|
-
import
|
8
|
-
|
9
|
-
from sky.adaptors import common as adaptors_common
|
10
|
-
|
11
|
-
if typing.TYPE_CHECKING:
|
12
|
-
import jsonschema
|
13
|
-
else:
|
14
|
-
jsonschema = adaptors_common.LazyImport('jsonschema')
|
7
|
+
import jsonschema
|
15
8
|
|
16
9
|
|
17
10
|
def case_insensitive_enum(validator, enums, instance, schema):
|
{skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
sky/__init__.py,sha256
|
1
|
+
sky/__init__.py,sha256=q1bqMlklbkN76ppGuGrZUg38yFnoTcFONAreuXS5ffY,6428
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=ND011K_-Ud1dVZF37A9KrwYir_ihJXcHc7iDWmuBc8Q,22872
|
4
4
|
sky/check.py,sha256=PPNQnaaZBA9_aogJpN4gnG4XWnTqkd74c-rBYDkDRDY,16101
|
@@ -34,7 +34,7 @@ sky/adaptors/vsphere.py,sha256=zJP9SeObEoLrpgHW2VHvZE48EhgVf8GfAEIwBeaDMfM,2129
|
|
34
34
|
sky/backends/__init__.py,sha256=UDjwbUgpTRApbPJnNfR786GadUuwgRk3vsWoVu5RB_c,536
|
35
35
|
sky/backends/backend.py,sha256=4BOqKZ-bwBTpjNnZF4JAHX2m2Iga7EmEn8Ao3tEivaM,7527
|
36
36
|
sky/backends/backend_utils.py,sha256=ndY4IPs1F9QovyiKAnB1FNYGWm52_ylwf_K7wY50cv0,134922
|
37
|
-
sky/backends/cloud_vm_ray_backend.py,sha256=
|
37
|
+
sky/backends/cloud_vm_ray_backend.py,sha256=mjedyasnvINYz9pIFThBqscIvjqiXs1DKZyVD8twnc0,251926
|
38
38
|
sky/backends/docker_utils.py,sha256=Hyw1YY20EyghhEbYx6O2FIMDcGkNzBzV9TM7LFynei8,8358
|
39
39
|
sky/backends/local_docker_backend.py,sha256=nSYCjms3HOPjPNOrcCqsUKm1WV3AAovRFjEQ7hcEXW4,17021
|
40
40
|
sky/backends/wheel_utils.py,sha256=meypuMaygSXXjGdXfq6dhWl-OrpAybg9KVRoup4D0wU,9098
|
@@ -106,7 +106,7 @@ sky/data/data_transfer.py,sha256=-JcnVa_LT0kQejcSCnBwYtxhuuaNDPf_Q5oz62p186c,119
|
|
106
106
|
sky/data/data_utils.py,sha256=ryKUPgNBdeDmGIttqK-J7AKdfc70INTuYH5GOWm3C9g,33581
|
107
107
|
sky/data/mounting_utils.py,sha256=ph2p8cYB28FODgxK5ibiD4B4iMD7T3or99zNQaD9HLs,20162
|
108
108
|
sky/data/storage.py,sha256=85LcC64yxfd5bzTijGZVyMZV41NyzUhOn0xJZieK2Dc,236652
|
109
|
-
sky/data/storage_utils.py,sha256=
|
109
|
+
sky/data/storage_utils.py,sha256=_0NYCWPSjyEGiLNckOl8NzclO5Rd03jRS-hgbQMofBs,13597
|
110
110
|
sky/jobs/__init__.py,sha256=qoI53-xXE0-SOkrLWigvhgFXjk7dWE0OTqGPYIk-kmM,1458
|
111
111
|
sky/jobs/constants.py,sha256=1XiIqdR5dEgGgepLKWkZCRT3MYSsMBR-dO7N4RTsjwg,3088
|
112
112
|
sky/jobs/controller.py,sha256=d5qQYHadesfFgU7-dYtt2trZwyd5IzvlVJeNh5O8OiA,31386
|
@@ -238,11 +238,13 @@ sky/server/uvicorn.py,sha256=wajwPHJ3IEEP3GMNOCc0S81-1v2qT5F-ejUkLFVhUzk,2953
|
|
238
238
|
sky/server/html/log.html,sha256=TSGZktua9Ysl_ysg3w60rjxAxhH61AJnsYDHdtqrjmI,6929
|
239
239
|
sky/server/requests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
240
240
|
sky/server/requests/event_loop.py,sha256=OhpPbuce65bbjpGRlcJa78AVnYSm08SzFKt70ypCUuQ,1211
|
241
|
-
sky/server/requests/executor.py,sha256=
|
241
|
+
sky/server/requests/executor.py,sha256=z9DaLJOy__7BUddMhXCODmxqD3iAblo6-siEsmO9DiU,26495
|
242
242
|
sky/server/requests/payloads.py,sha256=3sF36Z9_PLzpEncW0AplJtOz-_nsn5PJaM5lS-3Y8bw,16558
|
243
243
|
sky/server/requests/preconditions.py,sha256=ipxIb_3JXG6S3-ymcOdqQNb7VDvoPqADxu9ZK7-nQWc,7179
|
244
|
+
sky/server/requests/process.py,sha256=uv6JmqdT1vR6S5j3a0CEmxz3fUoKQoZCryQsjZpZE7E,8734
|
244
245
|
sky/server/requests/requests.py,sha256=9ovdQE-zv_Mvc6IbGATHVyQlOxSKjg_OankZbgDVGeE,21338
|
245
246
|
sky/server/requests/queues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
247
|
+
sky/server/requests/queues/local_queue.py,sha256=X6VkBiUmgd_kfqIK1hCtMWG1b8GiZbY70TBiBR6c6GY,416
|
246
248
|
sky/server/requests/queues/mp_queue.py,sha256=jDqP4Jd28U3ibSFyMR1DF9I2OWZrPZqFJrG5S6RFpyw,3403
|
247
249
|
sky/server/requests/serializers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
248
250
|
sky/server/requests/serializers/decoders.py,sha256=0cpg80uAqkdK_LqcQPkpKswhcNUUztG9luDLm_0eUow,6811
|
@@ -311,11 +313,12 @@ sky/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
311
313
|
sky/utils/accelerator_registry.py,sha256=rZniDbqqPAF-vjkrwxGwEErFSAp6puOimkRj3ppOSRY,3905
|
312
314
|
sky/utils/admin_policy_utils.py,sha256=y_do0VH6qh163EqSuRW1uGeKvTnJhiYNrHUs77uoOcA,6013
|
313
315
|
sky/utils/annotations.py,sha256=-rfacB30Sl0xkFriejGvxma3oKctGfXXLZkQPHG33eo,1626
|
316
|
+
sky/utils/atomic.py,sha256=vrw-7XCnckF0xCx-ttamao7evPdGtVsnjaTtgMlBXIE,1280
|
314
317
|
sky/utils/cluster_utils.py,sha256=s6DFRXktv6_gF_DnwDEXJ7CniifHp8CAPeGciRCbXgI,14432
|
315
318
|
sky/utils/command_runner.py,sha256=aEBs4Km8b6PqDklNc63tVYMK0w3PBGQEEP21_wmhG1k,39191
|
316
319
|
sky/utils/command_runner.pyi,sha256=mJOzCgcYZAfHwnY_6Wf1YwlTEJGb9ihzc2f0rE0Kw98,7751
|
317
320
|
sky/utils/common.py,sha256=P4oVXFATUYgkruHX92cN12SJBtfb8DiOOYZtbN1kvP0,1927
|
318
|
-
sky/utils/common_utils.py,sha256=
|
321
|
+
sky/utils/common_utils.py,sha256=UM2eSQNdXRvAzlbfC839E7-7DXC9BMMUkquLsmYpu8w,31619
|
319
322
|
sky/utils/config_utils.py,sha256=VQ2E3DQ2XysD-kul-diSrxn_pXWsDMfKAev91OiJQ1Q,9041
|
320
323
|
sky/utils/control_master_utils.py,sha256=iD4M0onjYOdZ2RuxjwMBl4KhafHXJzuHjvqlBUnu-VE,1450
|
321
324
|
sky/utils/controller_utils.py,sha256=mrmkerYyeu7gsCQ56cB3AjCz0r9WaN7teqXUItA47oQ,49805
|
@@ -334,7 +337,7 @@ sky/utils/status_lib.py,sha256=zn_MSuRYQdNKF8pnFOGQ54X_s_R7dyqWS6Q3a9zENw8,1512
|
|
334
337
|
sky/utils/subprocess_utils.py,sha256=yM2WumV49gSKuZs0v6E3R8XKl5Q9b6veIzi6us5ORU8,15927
|
335
338
|
sky/utils/timeline.py,sha256=ob6s3bc7nwAuSI76yLKBrSR5bzOHnOhbozz1avwoet4,4070
|
336
339
|
sky/utils/ux_utils.py,sha256=R-ddrqcwKngziZz5haHufxiUnABaMMbmRVsaUljrPBg,10181
|
337
|
-
sky/utils/validator.py,sha256=
|
340
|
+
sky/utils/validator.py,sha256=yo5cPUjGxqfa0ZxGyEYZMCWZ8O35G-k3VOEAtAoA_3w,856
|
338
341
|
sky/utils/cli_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
339
342
|
sky/utils/cli_utils/status_utils.py,sha256=LwGXzMgvnQeGR1fCC24q38hRLuAPeeSDkQ387eG6YSs,13495
|
340
343
|
sky/utils/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -350,9 +353,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
|
|
350
353
|
sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=HPVgNt-wbCVPd9dpDFiA7t2mzQLpjXHJ61eiwRbEr-c,10378
|
351
354
|
sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
|
352
355
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
|
353
|
-
skypilot_nightly-1.0.0.
|
354
|
-
skypilot_nightly-1.0.0.
|
355
|
-
skypilot_nightly-1.0.0.
|
356
|
-
skypilot_nightly-1.0.0.
|
357
|
-
skypilot_nightly-1.0.0.
|
358
|
-
skypilot_nightly-1.0.0.
|
356
|
+
skypilot_nightly-1.0.0.dev20250408.dist-info/licenses/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
357
|
+
skypilot_nightly-1.0.0.dev20250408.dist-info/METADATA,sha256=EO_QBlBiR_CcaunlS8EDv2fOBCqiy0SQACbeUa6Pd88,18552
|
358
|
+
skypilot_nightly-1.0.0.dev20250408.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
359
|
+
skypilot_nightly-1.0.0.dev20250408.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
360
|
+
skypilot_nightly-1.0.0.dev20250408.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
361
|
+
skypilot_nightly-1.0.0.dev20250408.dist-info/RECORD,,
|
{skypilot_nightly-1.0.0.dev20250407.dist-info → skypilot_nightly-1.0.0.dev20250408.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|