skypilot-nightly 1.0.0.dev20250206__py3-none-any.whl → 1.0.0.dev20250208__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/authentication.py +1 -1
- sky/backends/backend_utils.py +1 -6
- sky/backends/cloud_vm_ray_backend.py +6 -12
- sky/clouds/do.py +6 -0
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +6 -0
- sky/exceptions.py +8 -0
- sky/jobs/controller.py +3 -5
- sky/jobs/core.py +0 -21
- sky/jobs/recovery_strategy.py +2 -2
- sky/jobs/state.py +91 -26
- sky/jobs/utils.py +28 -50
- sky/provision/azure/instance.py +0 -31
- sky/skylet/constants.py +2 -2
- sky/utils/common_utils.py +1 -2
- sky/utils/controller_utils.py +1 -5
- sky/utils/dag_utils.py +2 -2
- sky/utils/log_utils.py +2 -1
- sky/utils/schemas.py +0 -19
- {skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/METADATA +2 -2
- {skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/RECORD +25 -25
- {skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '54fe787d1fb31687cc78eb307db59d54c2d79076'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250208'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/authentication.py
CHANGED
@@ -289,7 +289,7 @@ def setup_lambda_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
289
289
|
config['auth']['ssh_public_key'] = PUBLIC_SSH_KEY_PATH
|
290
290
|
|
291
291
|
# TODO(zhwu): we need to avoid uploading the public ssh key to the
|
292
|
-
# nodes, as that will cause problem when the node is used as
|
292
|
+
# nodes, as that will cause problem when the node is used as jobs
|
293
293
|
# controller, i.e., the public and private key on the node may
|
294
294
|
# not match.
|
295
295
|
file_mounts = config['file_mounts']
|
sky/backends/backend_utils.py
CHANGED
@@ -844,11 +844,6 @@ def write_cluster_config(
|
|
844
844
|
|
845
845
|
# User-supplied global instance tags from ~/.sky/config.yaml.
|
846
846
|
labels = skypilot_config.get_nested((str(cloud).lower(), 'labels'), {})
|
847
|
-
# Deprecated: instance_tags have been replaced by labels. For backward
|
848
|
-
# compatibility, we support them and the schema allows them only if
|
849
|
-
# `labels` are not specified. This should be removed after 0.8.0.
|
850
|
-
labels = skypilot_config.get_nested((str(cloud).lower(), 'instance_tags'),
|
851
|
-
labels)
|
852
847
|
# labels is a dict, which is guaranteed by the type check in
|
853
848
|
# schemas.py
|
854
849
|
assert isinstance(labels, dict), labels
|
@@ -997,7 +992,7 @@ def write_cluster_config(
|
|
997
992
|
|
998
993
|
# Read the cluster name from the tmp yaml file, to take the backward
|
999
994
|
# compatbility restortion above into account.
|
1000
|
-
# TODO: remove this after 2 minor releases, 0.
|
995
|
+
# TODO: remove this after 2 minor releases, 0.10.0.
|
1001
996
|
yaml_config = common_utils.read_yaml(tmp_yaml_path)
|
1002
997
|
config_dict['cluster_name_on_cloud'] = yaml_config['cluster_name']
|
1003
998
|
|
@@ -2446,7 +2446,7 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle):
|
|
2446
2446
|
# can be None. We need to update it here, even when force_cached is
|
2447
2447
|
# set to True.
|
2448
2448
|
# TODO: We can remove `self.cached_external_ips is None` after
|
2449
|
-
#
|
2449
|
+
# all clouds moved to new provisioner.
|
2450
2450
|
if force_cached and self.cached_external_ips is None:
|
2451
2451
|
raise RuntimeError(
|
2452
2452
|
'Tried to use cached cluster info, but it\'s missing for '
|
@@ -3416,17 +3416,11 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3416
3416
|
managed_job_code = managed_job_codegen.set_pending(
|
3417
3417
|
job_id, managed_job_dag)
|
3418
3418
|
# Set the managed job to PENDING state to make sure that this
|
3419
|
-
# managed job appears in the `sky jobs queue`,
|
3420
|
-
#
|
3421
|
-
#
|
3422
|
-
#
|
3423
|
-
#
|
3424
|
-
# controller process job has been queued, as our skylet on spot
|
3425
|
-
# controller will set the managed job in FAILED state if the
|
3426
|
-
# controller process job does not exist.
|
3427
|
-
# We cannot set the managed job to PENDING state in the codegen for
|
3428
|
-
# the controller process job, as it will stay in the job pending
|
3429
|
-
# table and not be executed until there is an empty slot.
|
3419
|
+
# managed job appears in the `sky jobs queue`, even if it needs to
|
3420
|
+
# wait to be submitted.
|
3421
|
+
# We cannot set the managed job to PENDING state in the job template
|
3422
|
+
# (jobs-controller.yaml.j2), as it may need to wait for the run
|
3423
|
+
# commands to be scheduled on the job controller in high-load cases.
|
3430
3424
|
job_submit_cmd = job_submit_cmd + ' && ' + managed_job_code
|
3431
3425
|
|
3432
3426
|
returncode, stdout, stderr = self.run_on_head(handle,
|
sky/clouds/do.py
CHANGED
@@ -257,6 +257,12 @@ class DO(clouds.Cloud):
|
|
257
257
|
@classmethod
|
258
258
|
def check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
259
259
|
"""Verify that the user has valid credentials for DO."""
|
260
|
+
|
261
|
+
try:
|
262
|
+
do.exceptions()
|
263
|
+
except ImportError as err:
|
264
|
+
return False, str(err)
|
265
|
+
|
260
266
|
try:
|
261
267
|
# attempt to make a CURL request for listing instances
|
262
268
|
do_utils.client().droplets.list()
|
@@ -306,6 +306,12 @@ def _get_instance_types_df(region: str) -> Union[str, 'pd.DataFrame']:
|
|
306
306
|
assert find_num_in_name is not None, row['InstanceType']
|
307
307
|
num_in_name = find_num_in_name.group(1)
|
308
308
|
acc_count = int(num_in_name) // 2
|
309
|
+
if row['InstanceType'] == 'p5en.48xlarge':
|
310
|
+
# TODO(andyl): Check if this workaround still needed after
|
311
|
+
# v0.10.0 released. Currently, the acc_name returned by the
|
312
|
+
# AWS API is 'NVIDIA', which is incorrect. See #4652.
|
313
|
+
acc_name = 'H200'
|
314
|
+
acc_count = 8
|
309
315
|
return pd.Series({
|
310
316
|
'AcceleratorName': acc_name,
|
311
317
|
'AcceleratorCount': acc_count,
|
sky/exceptions.py
CHANGED
@@ -81,6 +81,14 @@ class ManagedJobReachedMaxRetriesError(Exception):
|
|
81
81
|
pass
|
82
82
|
|
83
83
|
|
84
|
+
class ManagedJobStatusError(Exception):
|
85
|
+
"""Raised when a managed job task status update is invalid.
|
86
|
+
|
87
|
+
For instance, a RUNNING job cannot become SUBMITTED.
|
88
|
+
"""
|
89
|
+
pass
|
90
|
+
|
91
|
+
|
84
92
|
class ResourcesMismatchError(Exception):
|
85
93
|
"""Raised when resources are mismatched."""
|
86
94
|
pass
|
sky/jobs/controller.py
CHANGED
@@ -68,7 +68,7 @@ class JobsController:
|
|
68
68
|
else:
|
69
69
|
assert task.name is not None, task
|
70
70
|
task_name = task.name
|
71
|
-
# This is guaranteed by the
|
71
|
+
# This is guaranteed by the jobs.launch API, where we fill in
|
72
72
|
# the task.name with
|
73
73
|
# dag_utils.maybe_infer_and_fill_dag_and_task_names.
|
74
74
|
assert task_name is not None, self._dag
|
@@ -137,8 +137,8 @@ class JobsController:
|
|
137
137
|
1. The optimizer cannot find a feasible solution.
|
138
138
|
2. Precheck errors: invalid cluster name, failure in getting
|
139
139
|
cloud user identity, or unsupported feature.
|
140
|
-
exceptions.
|
141
|
-
all prechecks passed but the maximum number of retries is
|
140
|
+
exceptions.ManagedJobReachedMaxRetriesError: This will be raised
|
141
|
+
when all prechecks passed but the maximum number of retries is
|
142
142
|
reached for `sky.launch`. The failure of `sky.launch` can be
|
143
143
|
due to:
|
144
144
|
1. Any of the underlying failover exceptions is due to resources
|
@@ -482,8 +482,6 @@ def _cleanup(job_id: int, dag_yaml: str):
|
|
482
482
|
when reaching here, as we currently only support chain DAGs, and only
|
483
483
|
task is executed at a time.
|
484
484
|
"""
|
485
|
-
# NOTE: The code to get cluster name is same as what we did in the spot
|
486
|
-
# controller, we should keep it in sync with JobsController.__init__()
|
487
485
|
dag, _ = _get_dag_and_name(dag_yaml)
|
488
486
|
for task in dag.tasks:
|
489
487
|
assert task.name is not None, task
|
sky/jobs/core.py
CHANGED
@@ -472,24 +472,3 @@ def sync_down_logs(
|
|
472
472
|
job_name=name,
|
473
473
|
controller=controller,
|
474
474
|
local_dir=local_dir)
|
475
|
-
|
476
|
-
|
477
|
-
spot_launch = common_utils.deprecated_function(
|
478
|
-
launch,
|
479
|
-
name='sky.jobs.launch',
|
480
|
-
deprecated_name='spot_launch',
|
481
|
-
removing_version='0.8.0',
|
482
|
-
override_argument={'use_spot': True})
|
483
|
-
spot_queue = common_utils.deprecated_function(queue,
|
484
|
-
name='sky.jobs.queue',
|
485
|
-
deprecated_name='spot_queue',
|
486
|
-
removing_version='0.8.0')
|
487
|
-
spot_cancel = common_utils.deprecated_function(cancel,
|
488
|
-
name='sky.jobs.cancel',
|
489
|
-
deprecated_name='spot_cancel',
|
490
|
-
removing_version='0.8.0')
|
491
|
-
spot_tail_logs = common_utils.deprecated_function(
|
492
|
-
tail_logs,
|
493
|
-
name='sky.jobs.tail_logs',
|
494
|
-
deprecated_name='spot_tail_logs',
|
495
|
-
removing_version='0.8.0')
|
sky/jobs/recovery_strategy.py
CHANGED
@@ -263,8 +263,8 @@ class StrategyExecutor:
|
|
263
263
|
1. The optimizer cannot find a feasible solution.
|
264
264
|
2. Precheck errors: invalid cluster name, failure in getting
|
265
265
|
cloud user identity, or unsupported feature.
|
266
|
-
exceptions.
|
267
|
-
all prechecks passed but the maximum number of retries is
|
266
|
+
exceptions.ManagedJobReachedMaxRetriesError: This will be raised
|
267
|
+
when all prechecks passed but the maximum number of retries is
|
268
268
|
reached for `sky.launch`. The failure of `sky.launch` can be
|
269
269
|
due to:
|
270
270
|
1. Any of the underlying failover exceptions is due to resources
|
sky/jobs/state.py
CHANGED
@@ -11,6 +11,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
11
11
|
|
12
12
|
import colorama
|
13
13
|
|
14
|
+
from sky import exceptions
|
14
15
|
from sky import sky_logging
|
15
16
|
from sky.utils import common_utils
|
16
17
|
from sky.utils import db_utils
|
@@ -32,7 +33,7 @@ logger = sky_logging.init_logger(__name__)
|
|
32
33
|
# the same content as the `task_name` column.
|
33
34
|
# The `job_id` is now not really a job id, but a only a unique
|
34
35
|
# identifier/primary key for all the tasks. We will use `spot_job_id`
|
35
|
-
# to identify the
|
36
|
+
# to identify the job.
|
36
37
|
# TODO(zhwu): schema migration may be needed.
|
37
38
|
def create_table(cursor, conn):
|
38
39
|
# Enable WAL mode to avoid locking issues.
|
@@ -420,9 +421,16 @@ def set_submitted(job_id: int, task_id: int, run_timestamp: str,
|
|
420
421
|
run_timestamp=(?),
|
421
422
|
specs=(?)
|
422
423
|
WHERE spot_job_id=(?) AND
|
423
|
-
task_id=(?)
|
424
|
+
task_id=(?) AND
|
425
|
+
status=(?) AND
|
426
|
+
end_at IS null""",
|
424
427
|
(resources_str, submit_time, ManagedJobStatus.SUBMITTED.value,
|
425
|
-
run_timestamp, json.dumps(specs), job_id, task_id
|
428
|
+
run_timestamp, json.dumps(specs), job_id, task_id,
|
429
|
+
ManagedJobStatus.PENDING.value))
|
430
|
+
if cursor.rowcount != 1:
|
431
|
+
raise exceptions.ManagedJobStatusError(
|
432
|
+
f'Failed to set the task to submitted. '
|
433
|
+
f'({cursor.rowcount} rows updated)')
|
426
434
|
callback_func('SUBMITTED')
|
427
435
|
|
428
436
|
|
@@ -434,7 +442,14 @@ def set_starting(job_id: int, task_id: int, callback_func: CallbackType):
|
|
434
442
|
"""\
|
435
443
|
UPDATE spot SET status=(?)
|
436
444
|
WHERE spot_job_id=(?) AND
|
437
|
-
task_id=(?)
|
445
|
+
task_id=(?) AND
|
446
|
+
status=(?) AND
|
447
|
+
end_at IS null""", (ManagedJobStatus.STARTING.value, job_id,
|
448
|
+
task_id, ManagedJobStatus.SUBMITTED.value))
|
449
|
+
if cursor.rowcount != 1:
|
450
|
+
raise exceptions.ManagedJobStatusError(
|
451
|
+
f'Failed to set the task to starting. '
|
452
|
+
f'({cursor.rowcount} rows updated)')
|
438
453
|
callback_func('STARTING')
|
439
454
|
|
440
455
|
|
@@ -447,15 +462,25 @@ def set_started(job_id: int, task_id: int, start_time: float,
|
|
447
462
|
"""\
|
448
463
|
UPDATE spot SET status=(?), start_at=(?), last_recovered_at=(?)
|
449
464
|
WHERE spot_job_id=(?) AND
|
450
|
-
task_id=(?)
|
465
|
+
task_id=(?) AND
|
466
|
+
status IN (?, ?) AND
|
467
|
+
end_at IS null""",
|
451
468
|
(
|
452
469
|
ManagedJobStatus.RUNNING.value,
|
453
470
|
start_time,
|
454
471
|
start_time,
|
455
472
|
job_id,
|
456
473
|
task_id,
|
474
|
+
ManagedJobStatus.STARTING.value,
|
475
|
+
# If the task is empty, we will jump straight from PENDING to
|
476
|
+
# RUNNING
|
477
|
+
ManagedJobStatus.PENDING.value,
|
457
478
|
),
|
458
479
|
)
|
480
|
+
if cursor.rowcount != 1:
|
481
|
+
raise exceptions.ManagedJobStatusError(
|
482
|
+
f'Failed to set the task to started. '
|
483
|
+
f'({cursor.rowcount} rows updated)')
|
459
484
|
callback_func('STARTED')
|
460
485
|
|
461
486
|
|
@@ -468,8 +493,15 @@ def set_recovering(job_id: int, task_id: int, callback_func: CallbackType):
|
|
468
493
|
UPDATE spot SET
|
469
494
|
status=(?), job_duration=job_duration+(?)-last_recovered_at
|
470
495
|
WHERE spot_job_id=(?) AND
|
471
|
-
task_id=(?)
|
472
|
-
|
496
|
+
task_id=(?) AND
|
497
|
+
status=(?) AND
|
498
|
+
end_at IS null""",
|
499
|
+
(ManagedJobStatus.RECOVERING.value, time.time(), job_id, task_id,
|
500
|
+
ManagedJobStatus.RUNNING.value))
|
501
|
+
if cursor.rowcount != 1:
|
502
|
+
raise exceptions.ManagedJobStatusError(
|
503
|
+
f'Failed to set the task to recovering. '
|
504
|
+
f'({cursor.rowcount} rows updated)')
|
473
505
|
callback_func('RECOVERING')
|
474
506
|
|
475
507
|
|
@@ -482,8 +514,15 @@ def set_recovered(job_id: int, task_id: int, recovered_time: float,
|
|
482
514
|
UPDATE spot SET
|
483
515
|
status=(?), last_recovered_at=(?), recovery_count=recovery_count+1
|
484
516
|
WHERE spot_job_id=(?) AND
|
485
|
-
task_id=(?)
|
486
|
-
(
|
517
|
+
task_id=(?) AND
|
518
|
+
status=(?) AND
|
519
|
+
end_at IS null""",
|
520
|
+
(ManagedJobStatus.RUNNING.value, recovered_time, job_id, task_id,
|
521
|
+
ManagedJobStatus.RECOVERING.value))
|
522
|
+
if cursor.rowcount != 1:
|
523
|
+
raise exceptions.ManagedJobStatusError(
|
524
|
+
f'Failed to set the task to recovered. '
|
525
|
+
f'({cursor.rowcount} rows updated)')
|
487
526
|
logger.info('==== Recovered. ====')
|
488
527
|
callback_func('RECOVERED')
|
489
528
|
|
@@ -496,10 +535,16 @@ def set_succeeded(job_id: int, task_id: int, end_time: float,
|
|
496
535
|
"""\
|
497
536
|
UPDATE spot SET
|
498
537
|
status=(?), end_at=(?)
|
499
|
-
WHERE spot_job_id=(?) AND
|
500
|
-
AND
|
501
|
-
(
|
502
|
-
|
538
|
+
WHERE spot_job_id=(?) AND
|
539
|
+
task_id=(?) AND
|
540
|
+
status=(?) AND
|
541
|
+
end_at IS null""",
|
542
|
+
(ManagedJobStatus.SUCCEEDED.value, end_time, job_id, task_id,
|
543
|
+
ManagedJobStatus.RUNNING.value))
|
544
|
+
if cursor.rowcount != 1:
|
545
|
+
raise exceptions.ManagedJobStatusError(
|
546
|
+
f'Failed to set the task to succeeded. '
|
547
|
+
f'({cursor.rowcount} rows updated)')
|
503
548
|
callback_func('SUCCEEDED')
|
504
549
|
logger.info('Job succeeded.')
|
505
550
|
|
@@ -571,7 +616,9 @@ def set_failed(
|
|
571
616
|
{set_str}
|
572
617
|
WHERE spot_job_id=(?) {task_query_str} AND end_at IS null""",
|
573
618
|
(end_time, *list(fields_to_set.values()), job_id, *task_value))
|
574
|
-
|
619
|
+
|
620
|
+
updated = cursor.rowcount > 0
|
621
|
+
if callback_func and updated:
|
575
622
|
callback_func('FAILED')
|
576
623
|
logger.info(failure_reason)
|
577
624
|
|
@@ -586,12 +633,15 @@ def set_cancelling(job_id: int, callback_func: CallbackType):
|
|
586
633
|
rows = cursor.execute(
|
587
634
|
"""\
|
588
635
|
UPDATE spot SET
|
589
|
-
status=(?)
|
636
|
+
status=(?)
|
590
637
|
WHERE spot_job_id=(?) AND end_at IS null""",
|
591
|
-
(ManagedJobStatus.CANCELLING.value,
|
592
|
-
|
593
|
-
|
594
|
-
|
638
|
+
(ManagedJobStatus.CANCELLING.value, job_id))
|
639
|
+
updated = rows.rowcount > 0
|
640
|
+
if updated:
|
641
|
+
logger.info('Cancelling the job...')
|
642
|
+
callback_func('CANCELLING')
|
643
|
+
else:
|
644
|
+
logger.info('Cancellation skipped, job is already terminal')
|
595
645
|
|
596
646
|
|
597
647
|
def set_cancelled(job_id: int, callback_func: CallbackType):
|
@@ -607,9 +657,12 @@ def set_cancelled(job_id: int, callback_func: CallbackType):
|
|
607
657
|
WHERE spot_job_id=(?) AND status=(?)""",
|
608
658
|
(ManagedJobStatus.CANCELLED.value, time.time(), job_id,
|
609
659
|
ManagedJobStatus.CANCELLING.value))
|
610
|
-
|
611
|
-
|
612
|
-
|
660
|
+
updated = rows.rowcount > 0
|
661
|
+
if updated:
|
662
|
+
logger.info('Job cancelled.')
|
663
|
+
callback_func('CANCELLED')
|
664
|
+
else:
|
665
|
+
logger.info('Cancellation skipped, job is not CANCELLING')
|
613
666
|
|
614
667
|
|
615
668
|
def set_local_log_file(job_id: int, task_id: Optional[int],
|
@@ -705,8 +758,9 @@ def get_jobs_to_check_status(job_id: Optional[int] = None) -> List[int]:
|
|
705
758
|
job_id: Optional job ID to check. If None, checks all jobs.
|
706
759
|
|
707
760
|
Returns a list of job_ids, including the following:
|
708
|
-
-
|
709
|
-
-
|
761
|
+
- Jobs that have a schedule_state that is not DONE
|
762
|
+
- Jobs have schedule_state DONE but are in a non-terminal status
|
763
|
+
- Legacy jobs (that is, no schedule state) that are in non-terminal status
|
710
764
|
"""
|
711
765
|
job_filter = '' if job_id is None else 'AND spot.spot_job_id=(?)'
|
712
766
|
job_value = () if job_id is None else (job_id,)
|
@@ -719,7 +773,9 @@ def get_jobs_to_check_status(job_id: Optional[int] = None) -> List[int]:
|
|
719
773
|
|
720
774
|
# Get jobs that are either:
|
721
775
|
# 1. Have schedule state that is not DONE, or
|
722
|
-
# 2. Have
|
776
|
+
# 2. Have schedule state DONE AND are in non-terminal status (unexpected
|
777
|
+
# inconsistent state), or
|
778
|
+
# 3. Have no schedule state (legacy) AND are in non-terminal status
|
723
779
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
724
780
|
rows = cursor.execute(
|
725
781
|
f"""\
|
@@ -728,14 +784,23 @@ def get_jobs_to_check_status(job_id: Optional[int] = None) -> List[int]:
|
|
728
784
|
LEFT OUTER JOIN job_info
|
729
785
|
ON spot.spot_job_id=job_info.spot_job_id
|
730
786
|
WHERE (
|
787
|
+
-- non-legacy jobs that are not DONE
|
731
788
|
(job_info.schedule_state IS NOT NULL AND
|
732
789
|
job_info.schedule_state IS NOT ?)
|
733
790
|
OR
|
734
|
-
|
791
|
+
-- legacy or that are in non-terminal status or
|
792
|
+
-- DONE jobs that are in non-terminal status
|
793
|
+
((-- legacy jobs
|
794
|
+
job_info.schedule_state IS NULL OR
|
795
|
+
-- non-legacy DONE jobs
|
796
|
+
job_info.schedule_state IS ?
|
797
|
+
) AND
|
798
|
+
-- non-terminal
|
735
799
|
status NOT IN ({status_filter_str}))
|
736
800
|
)
|
737
801
|
{job_filter}
|
738
802
|
ORDER BY spot.spot_job_id DESC""", [
|
803
|
+
ManagedJobScheduleState.DONE.value,
|
739
804
|
ManagedJobScheduleState.DONE.value, *terminal_status_values,
|
740
805
|
*job_value
|
741
806
|
]).fetchall()
|
sky/jobs/utils.py
CHANGED
@@ -6,11 +6,9 @@ ManagedJobCodeGen.
|
|
6
6
|
"""
|
7
7
|
import collections
|
8
8
|
import enum
|
9
|
-
import inspect
|
10
9
|
import os
|
11
10
|
import pathlib
|
12
11
|
import shlex
|
13
|
-
import shutil
|
14
12
|
import textwrap
|
15
13
|
import time
|
16
14
|
import traceback
|
@@ -53,7 +51,6 @@ JOB_CONTROLLER_NAME: str = (
|
|
53
51
|
LEGACY_JOB_CONTROLLER_NAME: str = (
|
54
52
|
f'sky-spot-controller-{common_utils.get_user_hash()}')
|
55
53
|
SIGNAL_FILE_PREFIX = '/tmp/sky_jobs_controller_signal_{}'
|
56
|
-
LEGACY_SIGNAL_FILE_PREFIX = '/tmp/sky_spot_controller_signal_{}'
|
57
54
|
# Controller checks its job's status every this many seconds.
|
58
55
|
JOB_STATUS_CHECK_GAP_SECONDS = 20
|
59
56
|
|
@@ -248,16 +245,35 @@ def update_managed_jobs_statuses(job_id: Optional[int] = None):
|
|
248
245
|
schedule_state = tasks[0]['schedule_state']
|
249
246
|
|
250
247
|
# Backwards compatibility: this job was submitted when ray was still
|
251
|
-
# used for managing the parallelism of job controllers.
|
248
|
+
# used for managing the parallelism of job controllers, before #4485.
|
252
249
|
# TODO(cooperc): Remove before 0.11.0.
|
253
250
|
if (schedule_state is
|
254
251
|
managed_job_state.ManagedJobScheduleState.INVALID):
|
255
252
|
_handle_legacy_job(job_id)
|
256
253
|
continue
|
257
254
|
|
258
|
-
#
|
255
|
+
# Handle jobs with schedule state (non-legacy jobs):
|
259
256
|
pid = tasks[0]['controller_pid']
|
260
|
-
if
|
257
|
+
if schedule_state == managed_job_state.ManagedJobScheduleState.DONE:
|
258
|
+
# There are two cases where we could get a job that is DONE.
|
259
|
+
# 1. At query time (get_jobs_to_check_status), the job was not yet
|
260
|
+
# DONE, but since then (before get_managed_jobs is called) it has
|
261
|
+
# hit a terminal status, marked itself done, and exited. This is
|
262
|
+
# fine.
|
263
|
+
# 2. The job is DONE, but in a non-terminal status. This is
|
264
|
+
# unexpected. For instance, the task status is RUNNING, but the
|
265
|
+
# job schedule_state is DONE.
|
266
|
+
if all(task['status'].is_terminal() for task in tasks):
|
267
|
+
# Turns out this job is fine, even though it got pulled by
|
268
|
+
# get_jobs_to_check_status. Probably case #1 above.
|
269
|
+
continue
|
270
|
+
|
271
|
+
logger.error(f'Job {job_id} has DONE schedule state, but some '
|
272
|
+
f'tasks are not terminal. Task statuses: '
|
273
|
+
f'{", ".join(task["status"].value for task in tasks)}')
|
274
|
+
failure_reason = ('Inconsistent internal job state. This is a bug.')
|
275
|
+
elif pid is None:
|
276
|
+
# Non-legacy job and controller process has not yet started.
|
261
277
|
if schedule_state in (
|
262
278
|
managed_job_state.ManagedJobScheduleState.INACTIVE,
|
263
279
|
managed_job_state.ManagedJobScheduleState.WAITING):
|
@@ -458,17 +474,12 @@ def cancel_jobs_by_id(job_ids: Optional[List[int]]) -> str:
|
|
458
474
|
|
459
475
|
# Send the signal to the jobs controller.
|
460
476
|
signal_file = pathlib.Path(SIGNAL_FILE_PREFIX.format(job_id))
|
461
|
-
legacy_signal_file = pathlib.Path(
|
462
|
-
LEGACY_SIGNAL_FILE_PREFIX.format(job_id))
|
463
477
|
# Filelock is needed to prevent race condition between signal
|
464
478
|
# check/removal and signal writing.
|
465
479
|
with filelock.FileLock(str(signal_file) + '.lock'):
|
466
480
|
with signal_file.open('w', encoding='utf-8') as f:
|
467
481
|
f.write(UserSignal.CANCEL.value)
|
468
482
|
f.flush()
|
469
|
-
# Backward compatibility for managed jobs launched before #3419. It
|
470
|
-
# can be removed in the future 0.8.0 release.
|
471
|
-
shutil.copy(str(signal_file), str(legacy_signal_file))
|
472
483
|
cancelled_job_ids.append(job_id)
|
473
484
|
|
474
485
|
if not cancelled_job_ids:
|
@@ -1116,28 +1127,15 @@ class ManagedJobCodeGen:
|
|
1116
1127
|
|
1117
1128
|
>> codegen = ManagedJobCodeGen.show_jobs(...)
|
1118
1129
|
"""
|
1119
|
-
# TODO: the try..except.. block is for backward compatibility. Remove it in
|
1120
|
-
# v0.8.0.
|
1121
1130
|
_PREFIX = textwrap.dedent("""\
|
1122
|
-
|
1123
|
-
|
1124
|
-
from sky.jobs import utils
|
1125
|
-
from sky.jobs import constants as managed_job_constants
|
1126
|
-
from sky.jobs import state as managed_job_state
|
1127
|
-
|
1128
|
-
managed_job_version = managed_job_constants.MANAGED_JOBS_VERSION
|
1129
|
-
except ImportError:
|
1130
|
-
from sky.spot import spot_state as managed_job_state
|
1131
|
-
from sky.spot import spot_utils as utils
|
1131
|
+
from sky.jobs import utils
|
1132
|
+
from sky.jobs import state as managed_job_state
|
1132
1133
|
""")
|
1133
1134
|
|
1134
1135
|
@classmethod
|
1135
1136
|
def get_job_table(cls) -> str:
|
1136
1137
|
code = textwrap.dedent("""\
|
1137
|
-
|
1138
|
-
job_table = utils.dump_spot_job_queue()
|
1139
|
-
else:
|
1140
|
-
job_table = utils.dump_managed_job_queue()
|
1138
|
+
job_table = utils.dump_managed_job_queue()
|
1141
1139
|
print(job_table, flush=True)
|
1142
1140
|
""")
|
1143
1141
|
return cls._build(code)
|
@@ -1173,29 +1171,9 @@ class ManagedJobCodeGen:
|
|
1173
1171
|
job_id: Optional[int],
|
1174
1172
|
follow: bool = True,
|
1175
1173
|
controller: bool = False) -> str:
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
# Import libraries required by `stream_logs`. The try...except... block
|
1180
|
-
# should be removed in v0.8.0.
|
1181
|
-
code = textwrap.dedent("""\
|
1182
|
-
import os
|
1183
|
-
import time
|
1184
|
-
|
1185
|
-
from sky.skylet import job_lib, log_lib
|
1186
|
-
from sky.skylet import constants
|
1187
|
-
from sky.utils import ux_utils
|
1188
|
-
try:
|
1189
|
-
from sky.jobs.utils import stream_logs_by_id
|
1190
|
-
except ImportError:
|
1191
|
-
from sky.spot.spot_utils import stream_logs_by_id
|
1192
|
-
from typing import Optional
|
1193
|
-
""")
|
1194
|
-
code += inspect.getsource(stream_logs)
|
1195
|
-
code += textwrap.dedent(f"""\
|
1196
|
-
|
1197
|
-
msg = stream_logs({job_id!r}, {job_name!r},
|
1198
|
-
follow={follow}, controller={controller})
|
1174
|
+
code = textwrap.dedent(f"""\
|
1175
|
+
msg = utils.stream_logs({job_id!r}, {job_name!r},
|
1176
|
+
follow={follow}, controller={controller})
|
1199
1177
|
print(msg, flush=True)
|
1200
1178
|
""")
|
1201
1179
|
return cls._build(code)
|
sky/provision/azure/instance.py
CHANGED
@@ -15,7 +15,6 @@ from sky import status_lib
|
|
15
15
|
from sky.adaptors import azure
|
16
16
|
from sky.provision import common
|
17
17
|
from sky.provision import constants
|
18
|
-
from sky.provision.azure import config as config_lib
|
19
18
|
from sky.utils import common_utils
|
20
19
|
from sky.utils import subprocess_utils
|
21
20
|
from sky.utils import ux_utils
|
@@ -52,10 +51,6 @@ _RESOURCE_NETWORK_INTERFACE_TYPE = 'Microsoft.Network/networkInterfaces'
|
|
52
51
|
|
53
52
|
_RESOURCE_GROUP_NOT_FOUND_ERROR_MESSAGE = 'ResourceGroupNotFound'
|
54
53
|
_POLL_INTERVAL = 1
|
55
|
-
# TODO(Doyoung): _LEGACY_NSG_NAME can be remove this after 0.8.0 to ignore
|
56
|
-
# legacy nsg names.
|
57
|
-
_LEGACY_NSG_NAME = 'ray-{cluster_name_on_cloud}-nsg'
|
58
|
-
_SECOND_LEGACY_NSG_NAME = 'sky-{cluster_name_on_cloud}-nsg'
|
59
54
|
|
60
55
|
|
61
56
|
class AzureInstanceStatus(enum.Enum):
|
@@ -987,32 +982,6 @@ def query_instances(
|
|
987
982
|
return statuses
|
988
983
|
|
989
984
|
|
990
|
-
# TODO(Doyoung): _get_cluster_nsg can be remove this after 0.8.0 to ignore
|
991
|
-
# legacy nsg names.
|
992
|
-
def _get_cluster_nsg(network_client: Client, resource_group: str,
|
993
|
-
cluster_name_on_cloud: str) -> NetworkSecurityGroup:
|
994
|
-
"""Retrieve the NSG associated with the given name of the cluster."""
|
995
|
-
list_network_security_groups = _get_azure_sdk_function(
|
996
|
-
client=network_client.network_security_groups, function_name='list')
|
997
|
-
legacy_nsg_name = _LEGACY_NSG_NAME.format(
|
998
|
-
cluster_name_on_cloud=cluster_name_on_cloud)
|
999
|
-
second_legacy_nsg_name = _SECOND_LEGACY_NSG_NAME.format(
|
1000
|
-
cluster_name_on_cloud=cluster_name_on_cloud)
|
1001
|
-
_, nsg_name = config_lib.get_cluster_id_and_nsg_name(
|
1002
|
-
resource_group=resource_group,
|
1003
|
-
cluster_name_on_cloud=cluster_name_on_cloud)
|
1004
|
-
possible_nsg_names = [nsg_name, legacy_nsg_name, second_legacy_nsg_name]
|
1005
|
-
for nsg in list_network_security_groups(resource_group):
|
1006
|
-
if nsg.name in possible_nsg_names:
|
1007
|
-
return nsg
|
1008
|
-
|
1009
|
-
# Raise an error if no matching NSG is found
|
1010
|
-
raise ValueError('Failed to find a matching NSG for cluster '
|
1011
|
-
f'{cluster_name_on_cloud!r} in resource group '
|
1012
|
-
f'{resource_group!r}. Expected NSG names were: '
|
1013
|
-
f'{possible_nsg_names}.')
|
1014
|
-
|
1015
|
-
|
1016
985
|
def open_ports(
|
1017
986
|
cluster_name_on_cloud: str,
|
1018
987
|
ports: List[str],
|
sky/skylet/constants.py
CHANGED
@@ -86,7 +86,7 @@ TASK_ID_LIST_ENV_VAR = 'SKYPILOT_TASK_IDS'
|
|
86
86
|
# cluster yaml is updated.
|
87
87
|
#
|
88
88
|
# TODO(zongheng,zhanghao): make the upgrading of skylet automatic?
|
89
|
-
SKYLET_VERSION = '
|
89
|
+
SKYLET_VERSION = '12'
|
90
90
|
# The version of the lib files that skylet/jobs use. Whenever there is an API
|
91
91
|
# change for the job_lib or log_lib, we need to bump this version, so that the
|
92
92
|
# user can be notified to update their SkyPilot version on the remote cluster.
|
@@ -278,7 +278,7 @@ FILE_MOUNTS_WORKDIR_SUBPATH = 'job-{run_id}/workdir'
|
|
278
278
|
FILE_MOUNTS_SUBPATH = 'job-{run_id}/local-file-mounts/{i}'
|
279
279
|
FILE_MOUNTS_TMP_SUBPATH = 'job-{run_id}/tmp-files'
|
280
280
|
|
281
|
-
# The default idle timeout for SkyPilot controllers. This include
|
281
|
+
# The default idle timeout for SkyPilot controllers. This include jobs
|
282
282
|
# controller and sky serve controller.
|
283
283
|
# TODO(tian): Refactor to controller_utils. Current blocker: circular import.
|
284
284
|
CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP = 10
|
sky/utils/common_utils.py
CHANGED
@@ -28,7 +28,6 @@ from sky.utils import validator
|
|
28
28
|
|
29
29
|
_USER_HASH_FILE = os.path.expanduser('~/.sky/user_hash')
|
30
30
|
USER_HASH_LENGTH = 8
|
31
|
-
USER_HASH_LENGTH_IN_CLUSTER_NAME = 4
|
32
31
|
|
33
32
|
# We are using base36 to reduce the length of the hash. 2 chars -> 36^2 = 1296
|
34
33
|
# possibilities. considering the final cluster name contains the prefix as well,
|
@@ -182,7 +181,7 @@ def make_cluster_name_on_cloud(display_name: str,
|
|
182
181
|
f'on the cloud, we convert it to {cluster_name_on_cloud}.')
|
183
182
|
user_hash = ''
|
184
183
|
if add_user_hash:
|
185
|
-
user_hash = get_user_hash()
|
184
|
+
user_hash = get_user_hash()
|
186
185
|
user_hash = f'-{user_hash}'
|
187
186
|
user_hash_length = len(user_hash)
|
188
187
|
|
sky/utils/controller_utils.py
CHANGED
@@ -458,10 +458,6 @@ def get_controller_resources(
|
|
458
458
|
if custom_controller_resources_config is not None:
|
459
459
|
controller_resources_config_copied.update(
|
460
460
|
custom_controller_resources_config)
|
461
|
-
elif controller == Controllers.JOBS_CONTROLLER:
|
462
|
-
controller_resources_config_copied.update(
|
463
|
-
skypilot_config.get_nested(('spot', 'controller', 'resources'),
|
464
|
-
{}))
|
465
461
|
|
466
462
|
try:
|
467
463
|
controller_resources = resources.Resources.from_yaml_config(
|
@@ -938,7 +934,7 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
938
934
|
if (storage_obj.source is not None and
|
939
935
|
not data_utils.is_cloud_store_url(storage_obj.source)):
|
940
936
|
# Need to replace the local path with bucket URI, and remove the
|
941
|
-
# name field, so that the storage mount can work on the
|
937
|
+
# name field, so that the storage mount can work on the jobs
|
942
938
|
# controller.
|
943
939
|
store_types = list(storage_obj.stores.keys())
|
944
940
|
assert len(store_types) == 1, (
|
sky/utils/dag_utils.py
CHANGED
@@ -12,7 +12,7 @@ from sky.utils import ux_utils
|
|
12
12
|
|
13
13
|
logger = sky_logging.init_logger(__name__)
|
14
14
|
|
15
|
-
# Message thrown when APIs sky.{exec,launch,
|
15
|
+
# Message thrown when APIs sky.{exec,launch,jobs.launch}() received a string
|
16
16
|
# instead of a Dag. CLI (cli.py) is implemented by us so should not trigger
|
17
17
|
# this.
|
18
18
|
_ENTRYPOINT_STRING_AS_DAG_MESSAGE = """\
|
@@ -31,7 +31,7 @@ The command can then be run as:
|
|
31
31
|
|
32
32
|
sky.launch(task, ...)
|
33
33
|
|
34
|
-
sky.
|
34
|
+
sky.jobs.launch(task, ...)
|
35
35
|
""".strip()
|
36
36
|
|
37
37
|
|
sky/utils/log_utils.py
CHANGED
@@ -253,7 +253,8 @@ def readable_time_duration(start: Optional[float],
|
|
253
253
|
e.g. "1h 2m 23s"
|
254
254
|
"""
|
255
255
|
# start < 0 means that the starting time is not specified yet.
|
256
|
-
# It is only used in
|
256
|
+
# It is only used in jobs_utils.format_job_table() for job duration
|
257
|
+
# calculation.
|
257
258
|
if start is None or start < 0:
|
258
259
|
return '-'
|
259
260
|
if end == start == 0:
|
sky/utils/schemas.py
CHANGED
@@ -86,11 +86,6 @@ def _get_single_resources_schema():
|
|
86
86
|
'use_spot': {
|
87
87
|
'type': 'boolean',
|
88
88
|
},
|
89
|
-
# Deprecated: use 'job_recovery' instead. This is for backward
|
90
|
-
# compatibility, and can be removed in 0.8.0.
|
91
|
-
'spot_recovery': {
|
92
|
-
'type': 'string',
|
93
|
-
},
|
94
89
|
'job_recovery': {
|
95
90
|
# Either a string or a dict.
|
96
91
|
'anyOf': [{
|
@@ -256,8 +251,6 @@ def get_resources_schema():
|
|
256
251
|
'items': multi_resources_schema,
|
257
252
|
}
|
258
253
|
},
|
259
|
-
# Avoid job_recovery and spot_recovery being present at the same time.
|
260
|
-
**_check_not_both_fields_present('job_recovery', 'spot_recovery')
|
261
254
|
}
|
262
255
|
|
263
256
|
|
@@ -631,15 +624,6 @@ _NETWORK_CONFIG_SCHEMA = {
|
|
631
624
|
}
|
632
625
|
|
633
626
|
_LABELS_SCHEMA = {
|
634
|
-
# Deprecated: 'instance_tags' is replaced by 'labels'. Keeping for backward
|
635
|
-
# compatibility. Will be removed after 0.8.0.
|
636
|
-
'instance_tags': {
|
637
|
-
'type': 'object',
|
638
|
-
'required': [],
|
639
|
-
'additionalProperties': {
|
640
|
-
'type': 'string',
|
641
|
-
},
|
642
|
-
},
|
643
627
|
'labels': {
|
644
628
|
'type': 'object',
|
645
629
|
'required': [],
|
@@ -974,7 +958,6 @@ def get_config_schema():
|
|
974
958
|
'additionalProperties': False,
|
975
959
|
'properties': {
|
976
960
|
'jobs': controller_resources_schema,
|
977
|
-
'spot': controller_resources_schema,
|
978
961
|
'serve': controller_resources_schema,
|
979
962
|
'allowed_clouds': allowed_clouds,
|
980
963
|
'admin_policy': admin_policy_schema,
|
@@ -982,6 +965,4 @@ def get_config_schema():
|
|
982
965
|
'nvidia_gpus': gpu_configs,
|
983
966
|
**cloud_configs,
|
984
967
|
},
|
985
|
-
# Avoid spot and jobs being present at the same time.
|
986
|
-
**_check_not_both_fields_present('spot', 'jobs')
|
987
968
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20250208
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -244,7 +244,7 @@ To get the latest features and fixes, use the nightly build or [install from sou
|
|
244
244
|
pip install "skypilot-nightly[kubernetes,aws,gcp,azure,oci,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp]"
|
245
245
|
```
|
246
246
|
|
247
|
-
[Current supported infra](https://docs.skypilot.co/en/latest/getting-started/installation.html) (Kubernetes; AWS, GCP, Azure, OCI, Lambda Cloud, Fluidstack, RunPod, Cudo, Paperspace, Cloudflare, Samsung, IBM, VMware vSphere):
|
247
|
+
[Current supported infra](https://docs.skypilot.co/en/latest/getting-started/installation.html) (Kubernetes; AWS, GCP, Azure, OCI, Lambda Cloud, Fluidstack, RunPod, Cudo, Digital Ocean, Paperspace, Cloudflare, Samsung, IBM, Vast.ai, VMware vSphere):
|
248
248
|
<p align="center">
|
249
249
|
<img alt="SkyPilot" src="https://raw.githubusercontent.com/skypilot-org/skypilot/master/docs/source/images/cloud-logos-light.png" width=85%>
|
250
250
|
</p>
|
{skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/RECORD
RENAMED
@@ -1,12 +1,12 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=2uSBZnSUInYN4UYP61AUqexpEV1-oycfpMPovSO_Vn8,5560
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
|
-
sky/authentication.py,sha256=
|
3
|
+
sky/authentication.py,sha256=MNc9uHnvQ1EsEl8SsrYcYCGbxcnDbR6gaRCXVNd5RZE,22338
|
4
4
|
sky/check.py,sha256=xzLlxUkBCrzpOho8lw65EvKLPl_b9lA2nteF5MSYbDQ,10885
|
5
5
|
sky/cli.py,sha256=B-YWYiKnfSGdSOXtAY8SRGOGhneUeNPBjXFZ0FuLZ8w,214131
|
6
6
|
sky/cloud_stores.py,sha256=PcLT57_8SZy7o6paAluElfBynaLkbaOq3l-8dNg1AVM,23672
|
7
7
|
sky/core.py,sha256=fE1rn4Ku94S0XmWTO5-6t6eT6aaJImNczRqEnTe8v7Q,38742
|
8
8
|
sky/dag.py,sha256=f3sJlkH4bE6Uuz3ozNtsMhcBpRx7KmC9Sa4seDKt4hU,3104
|
9
|
-
sky/exceptions.py,sha256=
|
9
|
+
sky/exceptions.py,sha256=SEhRubPlk-crkflPC5P_Z085iLrSd3UScYwc790QwYw,9378
|
10
10
|
sky/execution.py,sha256=dpbk1kGRkGHT0FCJKGvjqeV3qIGEN2K20NDZbVrcAvI,28483
|
11
11
|
sky/global_user_state.py,sha256=cTwltMCDIIBaapuGgARxFwpDJDCiKKyVW-PP_qtWuCA,30241
|
12
12
|
sky/optimizer.py,sha256=d5BPAEZVrS3a2oBclSwo8MWkHQKQ3u4tcyawOANN0_0,59836
|
@@ -32,8 +32,8 @@ sky/adaptors/vast.py,sha256=tpvmHi7IkQNzbbHVkeo04kUSajoEpSzXr2XgeO_I1LU,695
|
|
32
32
|
sky/adaptors/vsphere.py,sha256=zJP9SeObEoLrpgHW2VHvZE48EhgVf8GfAEIwBeaDMfM,2129
|
33
33
|
sky/backends/__init__.py,sha256=UDjwbUgpTRApbPJnNfR786GadUuwgRk3vsWoVu5RB_c,536
|
34
34
|
sky/backends/backend.py,sha256=iBs5gnMaaUoH2OIQ3xhAjWdrJWqj8T61Za9TGsBFpvQ,7515
|
35
|
-
sky/backends/backend_utils.py,sha256=
|
36
|
-
sky/backends/cloud_vm_ray_backend.py,sha256=
|
35
|
+
sky/backends/backend_utils.py,sha256=7NDw_wVvtPR62rc7LYalPfLo37SjJfaY4gmbKeJFDuw,137185
|
36
|
+
sky/backends/cloud_vm_ray_backend.py,sha256=wdQhS8nwhNMNJZ1L7ozECNt23xwXXaEvzS51ZfUWwdM,246768
|
37
37
|
sky/backends/docker_utils.py,sha256=Hyw1YY20EyghhEbYx6O2FIMDcGkNzBzV9TM7LFynei8,8358
|
38
38
|
sky/backends/local_docker_backend.py,sha256=nSYCjms3HOPjPNOrcCqsUKm1WV3AAovRFjEQ7hcEXW4,17021
|
39
39
|
sky/backends/wheel_utils.py,sha256=5BUzBqfYz7p1ME6_0PXGmcsAkLVb8NrFt317p7a4X8s,8278
|
@@ -47,7 +47,7 @@ sky/clouds/azure.py,sha256=UlBsgVH3dTV1INFLwHCntpXrdZ4ByTkdXQmjvVdGyQo,31608
|
|
47
47
|
sky/clouds/cloud.py,sha256=5_ZduUcyCEY1JnX_h0PrJ5xwtPP4oor4jf6cICgSArc,35370
|
48
48
|
sky/clouds/cloud_registry.py,sha256=oLoYFjm_SDTgdHokY7b6A5Utq80HXRQNxV0fLjDdVsQ,2361
|
49
49
|
sky/clouds/cudo.py,sha256=TjlgTklsNhbzpTaqEZ5TudnH7YW9aJpkt4xyAhyaKj0,13094
|
50
|
-
sky/clouds/do.py,sha256=
|
50
|
+
sky/clouds/do.py,sha256=od4gMTrs2W5IkaDDr_oexOSdIOqn94vKq2U_QZcrpRk,11311
|
51
51
|
sky/clouds/fluidstack.py,sha256=u2I6jXEtTqgqRWi2EafMsKqc8VkUq1cR6CSDUvk72_U,12407
|
52
52
|
sky/clouds/gcp.py,sha256=6QOnefFsYiLCcnajjduLHsayqJ641bBu42jPTpvy7Mc,55007
|
53
53
|
sky/clouds/ibm.py,sha256=0ArRTQx1_DpTNGByFhukzFedEDzmVjBsGiiques1bQ0,21447
|
@@ -80,7 +80,7 @@ sky/clouds/service_catalog/vast_catalog.py,sha256=3QfbFx7b2UIjrMbvjPyhuc7ppaKC3h
|
|
80
80
|
sky/clouds/service_catalog/vsphere_catalog.py,sha256=OV3Czi3vwRSW4lqVPHxU_GND0ox322gmhv3kb11Q8AM,4412
|
81
81
|
sky/clouds/service_catalog/data_fetchers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
82
82
|
sky/clouds/service_catalog/data_fetchers/analyze.py,sha256=VdksJQs3asFE8H5T3ZV1FJas2xD9WEX6c-V5p7y-wp4,2084
|
83
|
-
sky/clouds/service_catalog/data_fetchers/fetch_aws.py,sha256=
|
83
|
+
sky/clouds/service_catalog/data_fetchers/fetch_aws.py,sha256=Zj4bqWPiDcT_ZFyHxQw_k7ORxWZrx91euar9kL0VPaI,23343
|
84
84
|
sky/clouds/service_catalog/data_fetchers/fetch_azure.py,sha256=7YVnoGDGGZI2TK02bj_LOoD4E5J5CFl6eqz2XlR4Vy8,12790
|
85
85
|
sky/clouds/service_catalog/data_fetchers/fetch_cudo.py,sha256=52P48lvWN0s1ArjeLPeLemPRpxjSRcHincRle0nqdm4,3440
|
86
86
|
sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py,sha256=yKuAFbjBRNz_e2RNNDT_aHHAuKQ86Ac7GKgIie5O6Pg,7273
|
@@ -102,12 +102,12 @@ sky/data/storage.py,sha256=CWVKnHhdzXw1biPbRqYizkyVexL_OCELuJCqtd4hit4,204094
|
|
102
102
|
sky/data/storage_utils.py,sha256=cM3kxlffYE7PnJySDu8huyUsMX_JYsf9uer8r5OYsjo,9556
|
103
103
|
sky/jobs/__init__.py,sha256=ObZcz3lL1ip8JcmR6gbfZ4RMMfXJJdsnuU2zLQUb8jY,1546
|
104
104
|
sky/jobs/constants.py,sha256=6RphkJ6pmafQ7XYW5qwId1Zvqb99HJelA9kgrgfNR7o,1421
|
105
|
-
sky/jobs/controller.py,sha256=
|
106
|
-
sky/jobs/core.py,sha256=
|
107
|
-
sky/jobs/recovery_strategy.py,sha256=
|
105
|
+
sky/jobs/controller.py,sha256=cX8kGplwa-0Te_ihUfzzOr-TRs_Fw6UdFPm6mrtSE0c,28548
|
106
|
+
sky/jobs/core.py,sha256=b9aJB90AxUdhoasSxsWBoD-mQY1MmC05FbPbtyFMzHI,19154
|
107
|
+
sky/jobs/recovery_strategy.py,sha256=49H1ca5N4bIJ3W4iqurxzSvJE0dIihPt2XnstboxUm4,26370
|
108
108
|
sky/jobs/scheduler.py,sha256=IUW0a_69Pkvs4jqsWCXkeMDIZn-TTuPNyZvPLGRUYUM,12306
|
109
|
-
sky/jobs/state.py,sha256=
|
110
|
-
sky/jobs/utils.py,sha256
|
109
|
+
sky/jobs/state.py,sha256=y9X1JGWpCokWBIwHZGB55fx39ZsilB1NZTbN_U8mPjA,43528
|
110
|
+
sky/jobs/utils.py,sha256=-xojxXXfv_Sh31YXY0pMFSQLMt65G-QEZe9ITGad56k,51943
|
111
111
|
sky/jobs/dashboard/dashboard.py,sha256=ZMysaI6m5vtGvT4OPUdStLY7Gkieefyzh1l9o_WILqY,7896
|
112
112
|
sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
|
113
113
|
sky/jobs/dashboard/templates/index.html,sha256=tz95q8O2pF7IvfY6yv0rnPyhj4DX8WX4RIVVxqFKV1Y,28519
|
@@ -126,7 +126,7 @@ sky/provision/aws/utils.py,sha256=m49pS-SHGW7Au3bhDeTPsL8N5iRzbwOXzyEWRCc1Vho,32
|
|
126
126
|
sky/provision/azure/__init__.py,sha256=87cgk1_Ws7n9rqaDDPv-HpfrkVeSQMdFQnhnXwyx9g4,548
|
127
127
|
sky/provision/azure/azure-config-template.json,sha256=jrjAgOtpe0e6FSg3vsVqHKQqJe0w-HeWOFT1HuwzS2c,4712
|
128
128
|
sky/provision/azure/config.py,sha256=V5-0Zelt4Xo0vcqnD6PpsnaCS7vc3xosDelILDAKSW4,8885
|
129
|
-
sky/provision/azure/instance.py,sha256=
|
129
|
+
sky/provision/azure/instance.py,sha256=w8HDiweytYbKLfiaRd7HUVn4ZsCbRDSHFylCdgwBcMY,47489
|
130
130
|
sky/provision/cudo/__init__.py,sha256=KAEl26MVPsk7IoP9Gg-MOJJRIV6-X9B0fbyHdyJWdLo,741
|
131
131
|
sky/provision/cudo/config.py,sha256=RYOVkV0MoUqVBJRZiKhBZhjFygeyFs7eUdVMdPg1vds,327
|
132
132
|
sky/provision/cudo/cudo_machine_type.py,sha256=_VNXWPELmlFXbtdcnPvkuLuyE9CZ923BUCdiac-ClDY,696
|
@@ -215,7 +215,7 @@ sky/skylet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
215
215
|
sky/skylet/attempt_skylet.py,sha256=GZ6ITjjA0m-da3IxXXfoHR6n4pjp3X3TOXUqVvSrV0k,2136
|
216
216
|
sky/skylet/autostop_lib.py,sha256=JPDHmByuhoNYXSUHl-OnyeJUkOFWn7gDM1FrS7Kr3E8,4478
|
217
217
|
sky/skylet/configs.py,sha256=UtnpmEL0F9hH6PSjhsps7xgjGZ6qzPOfW1p2yj9tSng,1887
|
218
|
-
sky/skylet/constants.py,sha256=
|
218
|
+
sky/skylet/constants.py,sha256=EUSW4yH59eqBDLMIdmQWIYd3nAJBFoUeo5v9MGiginI,16057
|
219
219
|
sky/skylet/events.py,sha256=0bOjUYpphuAficD9wDB5NOan2vwJDaRqdnm4sl0RK0U,12535
|
220
220
|
sky/skylet/job_lib.py,sha256=Rk-C069cusJIRXsks8xqCb016JSt7GlpU7LrpX0qFJk,42785
|
221
221
|
sky/skylet/log_lib.py,sha256=oFEBd85vDYFrIyyZKekH30yc4rRYILC0F0o-COQ64oE,20445
|
@@ -270,17 +270,17 @@ sky/utils/admin_policy_utils.py,sha256=_Vt_jTTYCXmMdryj0vrrumFPewa93qHnzUqBDXjAh
|
|
270
270
|
sky/utils/cluster_yaml_utils.py,sha256=1wRRYqI1kI-eFs1pMW4r_FFjHJ0zamq6v2RRI-Gtx5E,849
|
271
271
|
sky/utils/command_runner.py,sha256=ewDjFxcCOv0OeG2aUOIfVWmTls65up9DvSnAXURvGfM,36696
|
272
272
|
sky/utils/command_runner.pyi,sha256=mJOzCgcYZAfHwnY_6Wf1YwlTEJGb9ihzc2f0rE0Kw98,7751
|
273
|
-
sky/utils/common_utils.py,sha256=
|
273
|
+
sky/utils/common_utils.py,sha256=sAN4d105BxDjJts4nveGOUq61gQA6NVmTrXtp6H97hg,25298
|
274
274
|
sky/utils/control_master_utils.py,sha256=90hnxiAUP20gbJ9e3MERh7rb04ZO_I3LsljNjR26H5I,1416
|
275
|
-
sky/utils/controller_utils.py,sha256=
|
276
|
-
sky/utils/dag_utils.py,sha256=
|
275
|
+
sky/utils/controller_utils.py,sha256=SUrhK46ouBH2rm7azfFLIWr-T9-voYAdiXl2z5fG4Qw,45948
|
276
|
+
sky/utils/dag_utils.py,sha256=l_0O3RUfe9OdQ9mtbhdlHpJVD4VAF_HQ3A75dgsYIjM,6099
|
277
277
|
sky/utils/db_utils.py,sha256=K2-OHPg0FeHCarevMdWe0IWzm6wWumViEeYeJuGoFUE,3747
|
278
278
|
sky/utils/env_options.py,sha256=E5iwRFBUY2Iq6e0y0c1Mv5OSQ4MRNdk0-p38xUyVerc,1366
|
279
279
|
sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
|
280
|
-
sky/utils/log_utils.py,sha256=
|
280
|
+
sky/utils/log_utils.py,sha256=AjkgSrk0GVOUbnnCEC2f4lsf2HOIXkZETCxR0BJw2-U,14152
|
281
281
|
sky/utils/resources_utils.py,sha256=06Kx6AfbBdwBYGmIYFEY_qm6OBc2a5esZMPvIX7gCvc,7787
|
282
282
|
sky/utils/rich_utils.py,sha256=hmnI1X5dKvRIQzB7EyNb34FT97qFNve-0QHqM5r0mVk,3066
|
283
|
-
sky/utils/schemas.py,sha256=
|
283
|
+
sky/utils/schemas.py,sha256=z3wiu6Lzvdov9mLkM6lwp_RKsLnuE5_fuAuAQWU-IFk,29736
|
284
284
|
sky/utils/subprocess_utils.py,sha256=YhtxqHoaZLw2M9TikTH56dTboZN3Qu2RsGeWo4uwJVA,12054
|
285
285
|
sky/utils/timeline.py,sha256=ebHxKJK2HX0utGArrUgSezTPkcwav3VETa_AQS34t-E,3925
|
286
286
|
sky/utils/ux_utils.py,sha256=CqyIFGDuSE8fQasPkna_loZMwtboC9KedR09WEQ7qz0,6502
|
@@ -298,9 +298,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
|
|
298
298
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
299
299
|
sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
|
300
300
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
|
301
|
-
skypilot_nightly-1.0.0.
|
302
|
-
skypilot_nightly-1.0.0.
|
303
|
-
skypilot_nightly-1.0.0.
|
304
|
-
skypilot_nightly-1.0.0.
|
305
|
-
skypilot_nightly-1.0.0.
|
306
|
-
skypilot_nightly-1.0.0.
|
301
|
+
skypilot_nightly-1.0.0.dev20250208.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
302
|
+
skypilot_nightly-1.0.0.dev20250208.dist-info/METADATA,sha256=NOzgyFXK0rfDlLfzYPGbhY49z1XTCIDKuFWYuTeI2Mc,21397
|
303
|
+
skypilot_nightly-1.0.0.dev20250208.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
304
|
+
skypilot_nightly-1.0.0.dev20250208.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
305
|
+
skypilot_nightly-1.0.0.dev20250208.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
306
|
+
skypilot_nightly-1.0.0.dev20250208.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20250206.dist-info → skypilot_nightly-1.0.0.dev20250208.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|