skypilot-nightly 1.0.0.dev20241029__py3-none-any.whl → 1.0.0.dev20241030__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +13 -3
- sky/execution.py +5 -4
- sky/jobs/controller.py +38 -22
- sky/jobs/recovery_strategy.py +30 -5
- sky/jobs/state.py +33 -5
- sky/jobs/utils.py +28 -4
- sky/resources.py +25 -8
- sky/setup_files/setup.py +4 -3
- sky/skylet/job_lib.py +34 -42
- sky/utils/dag_utils.py +14 -4
- sky/utils/schemas.py +21 -1
- {skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/METADATA +13 -11
- {skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/RECORD +18 -18
- {skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '9d50f192b262d5f6cc74b5b6644f3a9e3ea31f2f'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241030'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -3175,9 +3175,19 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
|
|
3175
3175
|
returncode = _run_setup(f'{create_script_code} && {setup_cmd}',)
|
3176
3176
|
if returncode == 255:
|
3177
3177
|
is_message_too_long = False
|
3178
|
-
|
3179
|
-
|
3180
|
-
|
3178
|
+
try:
|
3179
|
+
with open(os.path.expanduser(setup_log_path),
|
3180
|
+
'r',
|
3181
|
+
encoding='utf-8') as f:
|
3182
|
+
if 'too long' in f.read():
|
3183
|
+
is_message_too_long = True
|
3184
|
+
except Exception as e: # pylint: disable=broad-except
|
3185
|
+
# We don't crash the setup if we cannot read the log file.
|
3186
|
+
# Instead, we should retry the setup with dumping the script
|
3187
|
+
# to a file to be safe.
|
3188
|
+
logger.debug('Failed to read setup log file '
|
3189
|
+
f'{setup_log_path}: {e}')
|
3190
|
+
is_message_too_long = True
|
3181
3191
|
|
3182
3192
|
if is_message_too_long:
|
3183
3193
|
# If the setup script is too long, we retry it with dumping
|
sky/execution.py
CHANGED
@@ -171,10 +171,11 @@ def _execute(
|
|
171
171
|
task = dag.tasks[0]
|
172
172
|
|
173
173
|
if any(r.job_recovery is not None for r in task.resources):
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
174
|
+
logger.warning(
|
175
|
+
f'{colorama.Style.DIM}The task has `job_recovery` specified, '
|
176
|
+
'but is launched as an unmanaged job. It will be ignored.'
|
177
|
+
'To enable job recovery, use managed jobs: sky jobs launch.'
|
178
|
+
f'{colorama.Style.RESET_ALL}')
|
178
179
|
|
179
180
|
cluster_exists = False
|
180
181
|
if cluster_name is not None:
|
sky/jobs/controller.py
CHANGED
@@ -160,6 +160,11 @@ class JobsController:
|
|
160
160
|
if task_id == 0:
|
161
161
|
submitted_at = backend_utils.get_timestamp_from_run_timestamp(
|
162
162
|
self._backend.run_timestamp)
|
163
|
+
assert task.name is not None, task
|
164
|
+
cluster_name = managed_job_utils.generate_managed_job_cluster_name(
|
165
|
+
task.name, self._job_id)
|
166
|
+
self._strategy_executor = recovery_strategy.StrategyExecutor.make(
|
167
|
+
cluster_name, self._backend, task, self._retry_until_up)
|
163
168
|
managed_job_state.set_submitted(
|
164
169
|
self._job_id,
|
165
170
|
task_id,
|
@@ -167,15 +172,14 @@ class JobsController:
|
|
167
172
|
submitted_at,
|
168
173
|
resources_str=backend_utils.get_task_resources_str(
|
169
174
|
task, is_managed_job=True),
|
175
|
+
specs={
|
176
|
+
'max_restarts_on_errors':
|
177
|
+
self._strategy_executor.max_restarts_on_errors
|
178
|
+
},
|
170
179
|
callback_func=callback_func)
|
171
180
|
logger.info(
|
172
181
|
f'Submitted managed job {self._job_id} (task: {task_id}, name: '
|
173
182
|
f'{task.name!r}); {constants.TASK_ID_ENV_VAR}: {task_id_env_var}')
|
174
|
-
assert task.name is not None, task
|
175
|
-
cluster_name = managed_job_utils.generate_managed_job_cluster_name(
|
176
|
-
task.name, self._job_id)
|
177
|
-
self._strategy_executor = recovery_strategy.StrategyExecutor.make(
|
178
|
-
cluster_name, self._backend, task, self._retry_until_up)
|
179
183
|
|
180
184
|
logger.info('Started monitoring.')
|
181
185
|
managed_job_state.set_starting(job_id=self._job_id,
|
@@ -283,23 +287,35 @@ class JobsController:
|
|
283
287
|
failure_reason = (
|
284
288
|
'To see the details, run: '
|
285
289
|
f'sky jobs logs --controller {self._job_id}')
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
290
|
+
should_restart_on_failure = (
|
291
|
+
self._strategy_executor.should_restart_on_failure())
|
292
|
+
if should_restart_on_failure:
|
293
|
+
max_restarts = (
|
294
|
+
self._strategy_executor.max_restarts_on_errors)
|
295
|
+
logger.info(
|
296
|
+
f'User program crashed '
|
297
|
+
f'({managed_job_status.value}). '
|
298
|
+
f'Retry the job as max_restarts_on_errors is '
|
299
|
+
f'set to {max_restarts}. '
|
300
|
+
f'[{self._strategy_executor.restart_cnt_on_failure}'
|
301
|
+
f'/{max_restarts}]')
|
302
|
+
else:
|
303
|
+
managed_job_state.set_failed(
|
304
|
+
self._job_id,
|
305
|
+
task_id,
|
306
|
+
failure_type=managed_job_status,
|
307
|
+
failure_reason=failure_reason,
|
308
|
+
end_time=end_time,
|
309
|
+
callback_func=callback_func)
|
310
|
+
return False
|
311
|
+
else:
|
312
|
+
# Although the cluster is healthy, we fail to access the
|
313
|
+
# job status. Try to recover the job (will not restart the
|
314
|
+
# cluster, if the cluster is healthy).
|
315
|
+
assert job_status is None, job_status
|
316
|
+
logger.info('Failed to fetch the job status while the '
|
317
|
+
'cluster is healthy. Try to recover the job '
|
318
|
+
'(the cluster will not be restarted).')
|
303
319
|
# When the handle is None, the cluster should be cleaned up already.
|
304
320
|
if handle is not None:
|
305
321
|
resources = handle.launched_resources
|
sky/jobs/recovery_strategy.py
CHANGED
@@ -66,7 +66,8 @@ class StrategyExecutor:
|
|
66
66
|
RETRY_INIT_GAP_SECONDS = 60
|
67
67
|
|
68
68
|
def __init__(self, cluster_name: str, backend: 'backends.Backend',
|
69
|
-
task: 'task_lib.Task', retry_until_up: bool
|
69
|
+
task: 'task_lib.Task', retry_until_up: bool,
|
70
|
+
max_restarts_on_errors: int) -> None:
|
70
71
|
"""Initialize the strategy executor.
|
71
72
|
|
72
73
|
Args:
|
@@ -82,6 +83,8 @@ class StrategyExecutor:
|
|
82
83
|
self.cluster_name = cluster_name
|
83
84
|
self.backend = backend
|
84
85
|
self.retry_until_up = retry_until_up
|
86
|
+
self.max_restarts_on_errors = max_restarts_on_errors
|
87
|
+
self.restart_cnt_on_failure = 0
|
85
88
|
|
86
89
|
def __init_subclass__(cls, name: str, default: bool = False):
|
87
90
|
RECOVERY_STRATEGIES[name] = cls
|
@@ -109,8 +112,17 @@ class StrategyExecutor:
|
|
109
112
|
# set the new_task_resources to be the same type (list or set) as the
|
110
113
|
# original task.resources
|
111
114
|
task.set_resources(type(task.resources)(new_resources_list))
|
112
|
-
|
113
|
-
|
115
|
+
if isinstance(job_recovery, dict):
|
116
|
+
job_recovery_name = job_recovery.pop('strategy',
|
117
|
+
DEFAULT_RECOVERY_STRATEGY)
|
118
|
+
max_restarts_on_errors = job_recovery.pop('max_restarts_on_errors',
|
119
|
+
0)
|
120
|
+
else:
|
121
|
+
job_recovery_name = job_recovery
|
122
|
+
max_restarts_on_errors = 0
|
123
|
+
return RECOVERY_STRATEGIES[job_recovery_name](cluster_name, backend,
|
124
|
+
task, retry_until_up,
|
125
|
+
max_restarts_on_errors)
|
114
126
|
|
115
127
|
def launch(self) -> float:
|
116
128
|
"""Launch the cluster for the first time.
|
@@ -368,6 +380,17 @@ class StrategyExecutor:
|
|
368
380
|
f'{gap_seconds:.1f} seconds.')
|
369
381
|
time.sleep(gap_seconds)
|
370
382
|
|
383
|
+
def should_restart_on_failure(self) -> bool:
|
384
|
+
"""Increments counter & checks if job should be restarted on a failure.
|
385
|
+
|
386
|
+
Returns:
|
387
|
+
True if the job should be restarted, otherwise False.
|
388
|
+
"""
|
389
|
+
self.restart_cnt_on_failure += 1
|
390
|
+
if self.restart_cnt_on_failure > self.max_restarts_on_errors:
|
391
|
+
return False
|
392
|
+
return True
|
393
|
+
|
371
394
|
|
372
395
|
class FailoverStrategyExecutor(StrategyExecutor, name='FAILOVER',
|
373
396
|
default=False):
|
@@ -376,8 +399,10 @@ class FailoverStrategyExecutor(StrategyExecutor, name='FAILOVER',
|
|
376
399
|
_MAX_RETRY_CNT = 240 # Retry for 4 hours.
|
377
400
|
|
378
401
|
def __init__(self, cluster_name: str, backend: 'backends.Backend',
|
379
|
-
task: 'task_lib.Task', retry_until_up: bool
|
380
|
-
|
402
|
+
task: 'task_lib.Task', retry_until_up: bool,
|
403
|
+
max_restarts_on_errors: int) -> None:
|
404
|
+
super().__init__(cluster_name, backend, task, retry_until_up,
|
405
|
+
max_restarts_on_errors)
|
381
406
|
# Note down the cloud/region of the launched cluster, so that we can
|
382
407
|
# first retry in the same cloud/region. (Inside recover() we may not
|
383
408
|
# rely on cluster handle, as it can be None if the cluster is
|
sky/jobs/state.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# TODO(zhwu): maybe use file based status instead of database, so
|
3
3
|
# that we can easily switch to a s3-based storage.
|
4
4
|
import enum
|
5
|
+
import json
|
5
6
|
import pathlib
|
6
7
|
import sqlite3
|
7
8
|
import time
|
@@ -65,7 +66,8 @@ _CURSOR.execute("""\
|
|
65
66
|
failure_reason TEXT,
|
66
67
|
spot_job_id INTEGER,
|
67
68
|
task_id INTEGER DEFAULT 0,
|
68
|
-
task_name TEXT
|
69
|
+
task_name TEXT,
|
70
|
+
specs TEXT)""")
|
69
71
|
_CONN.commit()
|
70
72
|
|
71
73
|
db_utils.add_column_to_table(_CURSOR, _CONN, 'spot', 'failure_reason', 'TEXT')
|
@@ -92,6 +94,17 @@ db_utils.add_column_to_table(_CURSOR,
|
|
92
94
|
'TEXT',
|
93
95
|
copy_from='job_name')
|
94
96
|
|
97
|
+
# Specs is some useful information about the task, e.g., the
|
98
|
+
# max_restarts_on_errors value. It is stored in JSON format.
|
99
|
+
db_utils.add_column_to_table(_CURSOR,
|
100
|
+
_CONN,
|
101
|
+
'spot',
|
102
|
+
'specs',
|
103
|
+
'TEXT',
|
104
|
+
value_to_replace_existing_entries=json.dumps({
|
105
|
+
'max_restarts_on_errors': 0,
|
106
|
+
}))
|
107
|
+
|
95
108
|
# `job_info` contains the mapping from job_id to the job_name.
|
96
109
|
# In the future, it may contain more information about each job.
|
97
110
|
_CURSOR.execute("""\
|
@@ -130,7 +143,8 @@ columns = [
|
|
130
143
|
'task_name',
|
131
144
|
# columns from the job_info table
|
132
145
|
'_job_info_job_id', # This should be the same as job_id
|
133
|
-
'job_name'
|
146
|
+
'job_name',
|
147
|
+
'specs',
|
134
148
|
]
|
135
149
|
|
136
150
|
|
@@ -283,7 +297,8 @@ def set_pending(job_id: int, task_id: int, task_name: str, resources_str: str):
|
|
283
297
|
|
284
298
|
def set_submitted(job_id: int, task_id: int, run_timestamp: str,
|
285
299
|
submit_time: float, resources_str: str,
|
286
|
-
|
300
|
+
specs: Dict[str, Union[str,
|
301
|
+
int]], callback_func: CallbackType):
|
287
302
|
"""Set the task to submitted.
|
288
303
|
|
289
304
|
Args:
|
@@ -293,6 +308,8 @@ def set_submitted(job_id: int, task_id: int, run_timestamp: str,
|
|
293
308
|
determine the log directory of the managed task.
|
294
309
|
submit_time: The time when the managed task is submitted.
|
295
310
|
resources_str: The resources string of the managed task.
|
311
|
+
specs: The specs of the managed task.
|
312
|
+
callback_func: The callback function.
|
296
313
|
"""
|
297
314
|
# Use the timestamp in the `run_timestamp` ('sky-2022-10...'), to make
|
298
315
|
# the log directory and submission time align with each other, so as to
|
@@ -306,11 +323,12 @@ def set_submitted(job_id: int, task_id: int, run_timestamp: str,
|
|
306
323
|
resources=(?),
|
307
324
|
submitted_at=(?),
|
308
325
|
status=(?),
|
309
|
-
run_timestamp=(?)
|
326
|
+
run_timestamp=(?),
|
327
|
+
specs=(?)
|
310
328
|
WHERE spot_job_id=(?) AND
|
311
329
|
task_id=(?)""",
|
312
330
|
(resources_str, submit_time, ManagedJobStatus.SUBMITTED.value,
|
313
|
-
run_timestamp, job_id, task_id))
|
331
|
+
run_timestamp, json.dumps(specs), job_id, task_id))
|
314
332
|
callback_func('SUBMITTED')
|
315
333
|
|
316
334
|
|
@@ -619,3 +637,13 @@ def get_latest_job_id() -> Optional[int]:
|
|
619
637
|
for (job_id,) in rows:
|
620
638
|
return job_id
|
621
639
|
return None
|
640
|
+
|
641
|
+
|
642
|
+
def get_task_specs(job_id: int, task_id: int) -> Dict[str, Any]:
|
643
|
+
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
644
|
+
task_specs = cursor.execute(
|
645
|
+
"""\
|
646
|
+
SELECT specs FROM spot
|
647
|
+
WHERE spot_job_id=(?) AND task_id=(?)""",
|
648
|
+
(job_id, task_id)).fetchone()
|
649
|
+
return json.loads(task_specs[0])
|
sky/jobs/utils.py
CHANGED
@@ -70,7 +70,7 @@ _JOB_CANCELLED_MESSAGE = (
|
|
70
70
|
# state, after the job finished. This is a safeguard to avoid the case where
|
71
71
|
# the managed job status fails to be updated and keep the `sky jobs logs`
|
72
72
|
# blocking for a long time.
|
73
|
-
_FINAL_JOB_STATUS_WAIT_TIMEOUT_SECONDS =
|
73
|
+
_FINAL_JOB_STATUS_WAIT_TIMEOUT_SECONDS = 25
|
74
74
|
|
75
75
|
|
76
76
|
class UserSignal(enum.Enum):
|
@@ -392,8 +392,12 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> str:
|
|
392
392
|
f'INFO: Log for the current task ({task_id}) '
|
393
393
|
'is finished. Waiting for the next task\'s log '
|
394
394
|
'to be started.')
|
395
|
-
|
396
|
-
|
395
|
+
# Add a newline to avoid the status display below
|
396
|
+
# removing the last line of the task output.
|
397
|
+
print()
|
398
|
+
status_display.update(
|
399
|
+
ux_utils.spinner_message(
|
400
|
+
f'Waiting for the next task: {task_id + 1}'))
|
397
401
|
status_display.start()
|
398
402
|
original_task_id = task_id
|
399
403
|
while True:
|
@@ -405,7 +409,27 @@ def stream_logs_by_id(job_id: int, follow: bool = True) -> str:
|
|
405
409
|
time.sleep(JOB_STATUS_CHECK_GAP_SECONDS)
|
406
410
|
continue
|
407
411
|
else:
|
408
|
-
|
412
|
+
task_specs = managed_job_state.get_task_specs(
|
413
|
+
job_id, task_id)
|
414
|
+
if task_specs.get('max_restarts_on_errors', 0) == 0:
|
415
|
+
# We don't need to wait for the managed job status
|
416
|
+
# update, as the job is guaranteed to be in terminal
|
417
|
+
# state afterwards.
|
418
|
+
break
|
419
|
+
print()
|
420
|
+
status_display.update(
|
421
|
+
ux_utils.spinner_message(
|
422
|
+
'Waiting for next restart for the failed task'))
|
423
|
+
status_display.start()
|
424
|
+
while True:
|
425
|
+
_, managed_job_status = (
|
426
|
+
managed_job_state.get_latest_task_id_status(
|
427
|
+
job_id))
|
428
|
+
if (managed_job_status !=
|
429
|
+
managed_job_state.ManagedJobStatus.RUNNING):
|
430
|
+
break
|
431
|
+
time.sleep(JOB_STATUS_CHECK_GAP_SECONDS)
|
432
|
+
continue
|
409
433
|
# The job can be cancelled by the user or the controller (when
|
410
434
|
# the cluster is partially preempted).
|
411
435
|
logger.debug(
|
sky/resources.py
CHANGED
@@ -55,7 +55,7 @@ class Resources:
|
|
55
55
|
accelerators: Union[None, str, Dict[str, int]] = None,
|
56
56
|
accelerator_args: Optional[Dict[str, str]] = None,
|
57
57
|
use_spot: Optional[bool] = None,
|
58
|
-
job_recovery: Optional[str] = None,
|
58
|
+
job_recovery: Optional[Union[Dict[str, Union[str, int]], str]] = None,
|
59
59
|
region: Optional[str] = None,
|
60
60
|
zone: Optional[str] = None,
|
61
61
|
image_id: Union[Dict[str, str], str, None] = None,
|
@@ -111,6 +111,12 @@ class Resources:
|
|
111
111
|
job to recover the cluster from preemption. Refer to
|
112
112
|
`recovery_strategy module <https://github.com/skypilot-org/skypilot/blob/master/sky/jobs/recovery_strategy.py>`__ # pylint: disable=line-too-long
|
113
113
|
for more details.
|
114
|
+
When a dict is provided, it can have the following fields:
|
115
|
+
|
116
|
+
- strategy: the recovery strategy to use.
|
117
|
+
- max_restarts_on_errors: the max number of restarts on user code
|
118
|
+
errors.
|
119
|
+
|
114
120
|
region: the region to use.
|
115
121
|
zone: the zone to use.
|
116
122
|
image_id: the image ID to use. If a str, must be a string
|
@@ -161,10 +167,20 @@ class Resources:
|
|
161
167
|
|
162
168
|
self._use_spot_specified = use_spot is not None
|
163
169
|
self._use_spot = use_spot if use_spot is not None else False
|
164
|
-
self._job_recovery = None
|
170
|
+
self._job_recovery: Optional[Dict[str, Union[str, int]]] = None
|
165
171
|
if job_recovery is not None:
|
166
|
-
if job_recovery
|
167
|
-
|
172
|
+
if isinstance(job_recovery, str):
|
173
|
+
job_recovery = {'strategy': job_recovery}
|
174
|
+
if 'strategy' not in job_recovery:
|
175
|
+
job_recovery['strategy'] = None
|
176
|
+
|
177
|
+
strategy_name = job_recovery['strategy']
|
178
|
+
if strategy_name == 'none':
|
179
|
+
self._job_recovery = None
|
180
|
+
else:
|
181
|
+
if strategy_name is not None:
|
182
|
+
job_recovery['strategy'] = strategy_name.upper()
|
183
|
+
self._job_recovery = job_recovery
|
168
184
|
|
169
185
|
if disk_size is not None:
|
170
186
|
if round(disk_size) != disk_size:
|
@@ -419,7 +435,7 @@ class Resources:
|
|
419
435
|
return self._use_spot_specified
|
420
436
|
|
421
437
|
@property
|
422
|
-
def job_recovery(self) -> Optional[str]:
|
438
|
+
def job_recovery(self) -> Optional[Dict[str, Union[str, int]]]:
|
423
439
|
return self._job_recovery
|
424
440
|
|
425
441
|
@property
|
@@ -814,12 +830,13 @@ class Resources:
|
|
814
830
|
Raises:
|
815
831
|
ValueError: if the attributes are invalid.
|
816
832
|
"""
|
817
|
-
if self._job_recovery is None:
|
833
|
+
if self._job_recovery is None or self._job_recovery['strategy'] is None:
|
818
834
|
return
|
819
|
-
if self._job_recovery
|
835
|
+
if (self._job_recovery['strategy']
|
836
|
+
not in managed_jobs.RECOVERY_STRATEGIES):
|
820
837
|
with ux_utils.print_exception_no_traceback():
|
821
838
|
raise ValueError(
|
822
|
-
f'Spot recovery strategy {self._job_recovery} '
|
839
|
+
f'Spot recovery strategy {self._job_recovery["strategy"]} '
|
823
840
|
'is not supported. The strategy should be among '
|
824
841
|
f'{list(managed_jobs.RECOVERY_STRATEGIES.keys())}')
|
825
842
|
|
sky/setup_files/setup.py
CHANGED
@@ -153,7 +153,7 @@ install_requires = [
|
|
153
153
|
'tabulate',
|
154
154
|
# Light weight requirement, can be replaced with "typing" once
|
155
155
|
# we deprecate Python 3.7 (this will take a while).
|
156
|
-
|
156
|
+
'typing_extensions',
|
157
157
|
'filelock >= 3.6.0',
|
158
158
|
'packaging',
|
159
159
|
'psutil',
|
@@ -216,8 +216,9 @@ extras_require: Dict[str, List[str]] = {
|
|
216
216
|
# We need azure-identity>=1.13.0 to enable the customization of the
|
217
217
|
# timeout of AzureCliCredential.
|
218
218
|
'azure': [
|
219
|
-
'azure-cli>=2.
|
220
|
-
'azure-mgmt-network', 'azure-
|
219
|
+
'azure-cli>=2.65.0', 'azure-core>=1.31.0', 'azure-identity>=1.19.0',
|
220
|
+
'azure-mgmt-network>=27.0.0', 'azure-mgmt-compute>=33.0.0',
|
221
|
+
'azure-storage-blob>=12.23.1', 'msgraph-sdk'
|
221
222
|
] + local_ray,
|
222
223
|
# We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd'
|
223
224
|
# parameter for stopping instances.
|
sky/skylet/job_lib.py
CHANGED
@@ -512,16 +512,13 @@ def _get_jobs_by_ids(job_ids: List[int]) -> List[Dict[str, Any]]:
|
|
512
512
|
return records
|
513
513
|
|
514
514
|
|
515
|
-
def
|
516
|
-
rows = _CURSOR.execute(
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
'submit': submit
|
523
|
-
} for job_id, created_time, submit in rows
|
524
|
-
}
|
515
|
+
def _get_pending_job(job_id: int) -> Optional[Dict[str, Any]]:
|
516
|
+
rows = _CURSOR.execute('SELECT created_time, submit FROM pending_jobs '
|
517
|
+
f'WHERE job_id={job_id!r}')
|
518
|
+
for row in rows:
|
519
|
+
created_time, submit = row
|
520
|
+
return {'created_time': created_time, 'submit': submit}
|
521
|
+
return None
|
525
522
|
|
526
523
|
|
527
524
|
def update_job_status(job_ids: List[int],
|
@@ -535,7 +532,7 @@ def update_job_status(job_ids: List[int],
|
|
535
532
|
during job cancelling, we still need this to handle the staleness problem,
|
536
533
|
caused by instance restarting and other corner cases (if any).
|
537
534
|
|
538
|
-
This function should only be run on the remote instance with ray
|
535
|
+
This function should only be run on the remote instance with ray>=2.4.0.
|
539
536
|
"""
|
540
537
|
if len(job_ids) == 0:
|
541
538
|
return []
|
@@ -547,50 +544,45 @@ def update_job_status(job_ids: List[int],
|
|
547
544
|
|
548
545
|
# In ray 2.4.0, job_client.list_jobs returns a list of JobDetails,
|
549
546
|
# which contains the job status (str) and submission_id (str).
|
547
|
+
ray_job_query_time = time.time()
|
550
548
|
job_detail_lists: List['ray_pydantic.JobDetails'] = job_client.list_jobs()
|
551
549
|
|
552
|
-
pending_jobs = _get_pending_jobs()
|
553
550
|
job_details = {}
|
554
551
|
ray_job_ids_set = set(ray_job_ids)
|
555
552
|
for job_detail in job_detail_lists:
|
556
553
|
if job_detail.submission_id in ray_job_ids_set:
|
557
554
|
job_details[job_detail.submission_id] = job_detail
|
558
|
-
job_statuses: List[Optional[JobStatus]] = [None] * len(ray_job_ids)
|
559
|
-
for i, ray_job_id in enumerate(ray_job_ids):
|
560
|
-
job_id = job_ids[i]
|
561
|
-
if ray_job_id in job_details:
|
562
|
-
ray_status = job_details[ray_job_id].status
|
563
|
-
job_statuses[i] = _RAY_TO_JOB_STATUS_MAP[ray_status]
|
564
|
-
if job_id in pending_jobs:
|
565
|
-
if pending_jobs[job_id]['created_time'] < psutil.boot_time():
|
566
|
-
logger.info(
|
567
|
-
f'Job {job_id} is stale, setting to FAILED: '
|
568
|
-
f'created_time={pending_jobs[job_id]["created_time"]}, '
|
569
|
-
f'boot_time={psutil.boot_time()}')
|
570
|
-
# The job is stale as it is created before the instance
|
571
|
-
# is booted, e.g. the instance is rebooted.
|
572
|
-
job_statuses[i] = JobStatus.FAILED
|
573
|
-
# Gives a 60 second grace period between job being submit from
|
574
|
-
# the pending table until appearing in ray jobs.
|
575
|
-
if (pending_jobs[job_id]['submit'] > 0 and
|
576
|
-
pending_jobs[job_id]['submit'] <
|
577
|
-
time.time() - _PENDING_SUBMIT_GRACE_PERIOD):
|
578
|
-
# For jobs submitted outside of the grace period, we will
|
579
|
-
# consider the ray job status.
|
580
|
-
continue
|
581
|
-
else:
|
582
|
-
# Reset the job status to PENDING even though it may not appear
|
583
|
-
# in the ray jobs, so that it will not be considered as stale.
|
584
|
-
job_statuses[i] = JobStatus.PENDING
|
585
|
-
|
586
|
-
assert len(job_statuses) == len(job_ids), (job_statuses, job_ids)
|
587
555
|
|
588
556
|
statuses = []
|
589
|
-
for job_id,
|
557
|
+
for job_id, ray_job_id in zip(job_ids, ray_job_ids):
|
590
558
|
# Per-job status lock is required because between the job status
|
591
559
|
# query and the job status update, the job status in the databse
|
592
560
|
# can be modified by the generated ray program.
|
593
561
|
with filelock.FileLock(_get_lock_path(job_id)):
|
562
|
+
status = None
|
563
|
+
if ray_job_id in job_details:
|
564
|
+
ray_status = job_details[ray_job_id].status
|
565
|
+
status = _RAY_TO_JOB_STATUS_MAP[ray_status]
|
566
|
+
pending_job = _get_pending_job(job_id)
|
567
|
+
if pending_job is not None:
|
568
|
+
if pending_job['created_time'] < psutil.boot_time():
|
569
|
+
logger.info(f'Job {job_id} is stale, setting to FAILED: '
|
570
|
+
f'created_time={pending_job["created_time"]}, '
|
571
|
+
f'boot_time={psutil.boot_time()}')
|
572
|
+
# The job is stale as it is created before the instance
|
573
|
+
# is booted, e.g. the instance is rebooted.
|
574
|
+
status = JobStatus.FAILED
|
575
|
+
# Gives a 60 second grace period between job being submit from
|
576
|
+
# the pending table until appearing in ray jobs. For jobs
|
577
|
+
# submitted outside of the grace period, we will consider the
|
578
|
+
# ray job status.
|
579
|
+
if not (pending_job['submit'] > 0 and pending_job['submit'] <
|
580
|
+
ray_job_query_time - _PENDING_SUBMIT_GRACE_PERIOD):
|
581
|
+
# Reset the job status to PENDING even though it may not
|
582
|
+
# appear in the ray jobs, so that it will not be considered
|
583
|
+
# as stale.
|
584
|
+
status = JobStatus.PENDING
|
585
|
+
|
594
586
|
original_status = get_status_no_lock(job_id)
|
595
587
|
assert original_status is not None, (job_id, status)
|
596
588
|
if status is None:
|
sky/utils/dag_utils.py
CHANGED
@@ -143,11 +143,21 @@ def fill_default_config_in_dag_for_job_launch(dag: dag_lib.Dag) -> None:
|
|
143
143
|
for task_ in dag.tasks:
|
144
144
|
|
145
145
|
new_resources_list = []
|
146
|
+
default_strategy = jobs.DEFAULT_RECOVERY_STRATEGY
|
147
|
+
assert default_strategy is not None
|
146
148
|
for resources in list(task_.resources):
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
149
|
+
original_job_recovery = resources.job_recovery
|
150
|
+
job_recovery = {'strategy': default_strategy}
|
151
|
+
if isinstance(original_job_recovery, str):
|
152
|
+
job_recovery['strategy'] = original_job_recovery
|
153
|
+
elif isinstance(original_job_recovery, dict):
|
154
|
+
job_recovery.update(original_job_recovery)
|
155
|
+
strategy = job_recovery.get('strategy')
|
156
|
+
if strategy is None:
|
157
|
+
job_recovery['strategy'] = default_strategy
|
158
|
+
change_default_value: Dict[str, Any] = {
|
159
|
+
'job_recovery': job_recovery
|
160
|
+
}
|
151
161
|
|
152
162
|
new_resources = resources.copy(**change_default_value)
|
153
163
|
new_resources_list.append(new_resources)
|
sky/utils/schemas.py
CHANGED
@@ -92,7 +92,27 @@ def _get_single_resources_schema():
|
|
92
92
|
'type': 'string',
|
93
93
|
},
|
94
94
|
'job_recovery': {
|
95
|
-
|
95
|
+
# Either a string or a dict.
|
96
|
+
'anyOf': [{
|
97
|
+
'type': 'string',
|
98
|
+
}, {
|
99
|
+
'type': 'object',
|
100
|
+
'required': [],
|
101
|
+
'additionalProperties': False,
|
102
|
+
'properties': {
|
103
|
+
'strategy': {
|
104
|
+
'anyOf': [{
|
105
|
+
'type': 'string',
|
106
|
+
}, {
|
107
|
+
'type': 'null',
|
108
|
+
}],
|
109
|
+
},
|
110
|
+
'max_restarts_on_errors': {
|
111
|
+
'type': 'integer',
|
112
|
+
'minimum': 0,
|
113
|
+
},
|
114
|
+
}
|
115
|
+
}],
|
96
116
|
},
|
97
117
|
'disk_size': {
|
98
118
|
'type': 'integer',
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20241030
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -46,11 +46,12 @@ Requires-Dist: awscli>=1.27.10; extra == "all"
|
|
46
46
|
Requires-Dist: botocore>=1.29.10; extra == "all"
|
47
47
|
Requires-Dist: boto3>=1.26.1; extra == "all"
|
48
48
|
Requires-Dist: colorama<0.4.5; extra == "all"
|
49
|
-
Requires-Dist: azure-cli>=2.
|
50
|
-
Requires-Dist: azure-core; extra == "all"
|
51
|
-
Requires-Dist: azure-identity>=1.
|
52
|
-
Requires-Dist: azure-mgmt-network; extra == "all"
|
53
|
-
Requires-Dist: azure-
|
49
|
+
Requires-Dist: azure-cli>=2.65.0; extra == "all"
|
50
|
+
Requires-Dist: azure-core>=1.31.0; extra == "all"
|
51
|
+
Requires-Dist: azure-identity>=1.19.0; extra == "all"
|
52
|
+
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
|
53
|
+
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
|
54
|
+
Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
|
54
55
|
Requires-Dist: msgraph-sdk; extra == "all"
|
55
56
|
Requires-Dist: ray[default]!=2.6.0,>=2.2.0; extra == "all"
|
56
57
|
Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
|
@@ -78,11 +79,12 @@ Requires-Dist: botocore>=1.29.10; extra == "aws"
|
|
78
79
|
Requires-Dist: boto3>=1.26.1; extra == "aws"
|
79
80
|
Requires-Dist: colorama<0.4.5; extra == "aws"
|
80
81
|
Provides-Extra: azure
|
81
|
-
Requires-Dist: azure-cli>=2.
|
82
|
-
Requires-Dist: azure-core; extra == "azure"
|
83
|
-
Requires-Dist: azure-identity>=1.
|
84
|
-
Requires-Dist: azure-mgmt-network; extra == "azure"
|
85
|
-
Requires-Dist: azure-
|
82
|
+
Requires-Dist: azure-cli>=2.65.0; extra == "azure"
|
83
|
+
Requires-Dist: azure-core>=1.31.0; extra == "azure"
|
84
|
+
Requires-Dist: azure-identity>=1.19.0; extra == "azure"
|
85
|
+
Requires-Dist: azure-mgmt-network>=27.0.0; extra == "azure"
|
86
|
+
Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "azure"
|
87
|
+
Requires-Dist: azure-storage-blob>=12.23.1; extra == "azure"
|
86
88
|
Requires-Dist: msgraph-sdk; extra == "azure"
|
87
89
|
Requires-Dist: ray[default]!=2.6.0,>=2.2.0; extra == "azure"
|
88
90
|
Provides-Extra: cloudflare
|
{skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
sky/__init__.py,sha256=
|
1
|
+
sky/__init__.py,sha256=WwnJbF2ubaAJEJkUGPJ7jK5mh3QD1r487evpncErtC8,5882
|
2
2
|
sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
|
3
3
|
sky/authentication.py,sha256=pAdCT60OxxiXI9KXDyP2lQ9u9vMc6aMtq5Xi2h_hbdw,20984
|
4
4
|
sky/check.py,sha256=D3Y3saIFAYVvPxuBHnVgJEO0fUVDxgjwuMBaO-D778k,9472
|
@@ -7,10 +7,10 @@ sky/cloud_stores.py,sha256=RjFgmRhUh1Kk__f6g3KxzLp9s7dA0pFK4W1AukEuUaw,21153
|
|
7
7
|
sky/core.py,sha256=DW9OGE2kS2CmsvQ1grrpRnNFS3woMGWSHu5GE99e-I4,38190
|
8
8
|
sky/dag.py,sha256=WLFWr5hfrwjd31uYlNvI-zWUk7tLaT_gzJn4LzbVtkE,2780
|
9
9
|
sky/exceptions.py,sha256=KBIEJHgrw6OMBL8H65o-Gk6qYQEV1SR9gBwMjnMnxxg,8858
|
10
|
-
sky/execution.py,sha256=
|
10
|
+
sky/execution.py,sha256=tDK6JhF_405cjqxRpbdLbHZyxrKTD5oa0UkKDvPJ_9Q,24751
|
11
11
|
sky/global_user_state.py,sha256=PywEmUutF97XBgRMClR6IS5_KM8JJC0oA1LsPUZebp0,28681
|
12
12
|
sky/optimizer.py,sha256=tXGrFpc6xNtKH34qjBAMd4jTuWcDZTPnGFwEtuCQFmk,59702
|
13
|
-
sky/resources.py,sha256=
|
13
|
+
sky/resources.py,sha256=7kVpLRfy3DFFgmEji0_Xz6FbrvBDUSXC6K0bsRIK3hA,68290
|
14
14
|
sky/sky_logging.py,sha256=oLmTmwkuucIto3LHXLJfMcyRpYSkmZAZa5XzQPA5IHk,4434
|
15
15
|
sky/skypilot_config.py,sha256=E3g65cX3P3dT9b5N0GgFBG6yB0FXwIGpisKoozmJmWU,9094
|
16
16
|
sky/status_lib.py,sha256=J7Jb4_Dz0v2T64ttOdyUgpokvl4S0sBJrMfH7Fvo51A,1457
|
@@ -31,7 +31,7 @@ sky/adaptors/vsphere.py,sha256=zJP9SeObEoLrpgHW2VHvZE48EhgVf8GfAEIwBeaDMfM,2129
|
|
31
31
|
sky/backends/__init__.py,sha256=UDjwbUgpTRApbPJnNfR786GadUuwgRk3vsWoVu5RB_c,536
|
32
32
|
sky/backends/backend.py,sha256=wwfbrxPhjMPs6PSyy3tAHI8WJhl-xhgzWBsAZjmJJ6g,6249
|
33
33
|
sky/backends/backend_utils.py,sha256=LmLsaLiPuuUyGebOXykdvwZpUY-8sB7n4o2AnmwNmdQ,121714
|
34
|
-
sky/backends/cloud_vm_ray_backend.py,sha256=
|
34
|
+
sky/backends/cloud_vm_ray_backend.py,sha256=ZWAzdmKzSf3qalDoKfmLGaO3PywjLtIA5Q3AeeHhvHA,233158
|
35
35
|
sky/backends/docker_utils.py,sha256=Hyw1YY20EyghhEbYx6O2FIMDcGkNzBzV9TM7LFynei8,8358
|
36
36
|
sky/backends/local_docker_backend.py,sha256=0JL5m0YUgOmOL4aWEUe4tmt89dsxjk4_WXkPwgEKEis,16801
|
37
37
|
sky/backends/wheel_utils.py,sha256=3QS4T_Ydvo4DbYhogtyADyNBEf04I6jUCL71M285shQ,7963
|
@@ -95,11 +95,11 @@ sky/data/storage.py,sha256=x8YYY4zVBdit_5oAR_MXV-TM9qDefV_ZV4z0irv6ZaU,163102
|
|
95
95
|
sky/data/storage_utils.py,sha256=cM3kxlffYE7PnJySDu8huyUsMX_JYsf9uer8r5OYsjo,9556
|
96
96
|
sky/jobs/__init__.py,sha256=yucibSB_ZimtJMvOhMxn6ZqwBIYNfcwmc6pSXtCqmNQ,1483
|
97
97
|
sky/jobs/constants.py,sha256=YLgcCg_RHSYr_rfsI_4UIdXk78KKKOK29Oem88t5j8I,1350
|
98
|
-
sky/jobs/controller.py,sha256=
|
98
|
+
sky/jobs/controller.py,sha256=sirpi730_GfKfPZeZ2PvCXnJWger0r6AyLSOx2sLd6A,27368
|
99
99
|
sky/jobs/core.py,sha256=RkBFaKDlovmdzqlOAgQ0xAimZFgo4pXq3qaQkAvGsGk,16908
|
100
|
-
sky/jobs/recovery_strategy.py,sha256=
|
101
|
-
sky/jobs/state.py,sha256=
|
102
|
-
sky/jobs/utils.py,sha256=
|
100
|
+
sky/jobs/recovery_strategy.py,sha256=FpPK6e2PT61cZPDUJqIfo6g53uSRTBh7dOTbfR1DLVE,26672
|
101
|
+
sky/jobs/state.py,sha256=TV1G12vEMQJRgwWXsAjb3lmkJqkZmAOUUOja2QQPrg8,24307
|
102
|
+
sky/jobs/utils.py,sha256=pF4Kyl4v1M_Bmm2jIRlXGTSdII5BJ3f4qwex_oCFgBk,37742
|
103
103
|
sky/jobs/dashboard/dashboard.py,sha256=HFShuaxKir97QTeK2x37h6bsY6ncaFaNEg1USZqJPdc,3050
|
104
104
|
sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
|
105
105
|
sky/jobs/dashboard/templates/index.html,sha256=DBKMYEkkJ6sgLYod9ro7drgL8Y_neDsCx_WbwhWDsWM,9837
|
@@ -184,7 +184,7 @@ sky/serve/serve_utils.py,sha256=wqBxChpJylZ_qHWyFmMBJqrG8_7xTIOr9nlOeyHs9P8,3943
|
|
184
184
|
sky/serve/service.py,sha256=fkfJvNJ2BO6rfV0TblZG-QkOXaCyZlpkwbGgrsTzf2w,11872
|
185
185
|
sky/serve/service_spec.py,sha256=1aS6b-ku7W4CjyekXKDxjZsDdt-O8ygos-jFeXu31cA,13766
|
186
186
|
sky/setup_files/MANIFEST.in,sha256=CXz8lIJMgWlH9TvYgzIL3vPFtSDoQq-UMfD9K62rtH4,590
|
187
|
-
sky/setup_files/setup.py,sha256=
|
187
|
+
sky/setup_files/setup.py,sha256=G767GNB-jXqyC8MR-IdiojnnI2E6tP4gMYenKU14ZGA,12156
|
188
188
|
sky/skylet/LICENSE,sha256=BnFrJSvUFpMUoH5mOpWnEvaC5R6Uux8W6WXgrte8iYg,12381
|
189
189
|
sky/skylet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
190
190
|
sky/skylet/attempt_skylet.py,sha256=GZ6ITjjA0m-da3IxXXfoHR6n4pjp3X3TOXUqVvSrV0k,2136
|
@@ -192,7 +192,7 @@ sky/skylet/autostop_lib.py,sha256=JPDHmByuhoNYXSUHl-OnyeJUkOFWn7gDM1FrS7Kr3E8,44
|
|
192
192
|
sky/skylet/configs.py,sha256=UtnpmEL0F9hH6PSjhsps7xgjGZ6qzPOfW1p2yj9tSng,1887
|
193
193
|
sky/skylet/constants.py,sha256=OsuJcQp6UgkQ9Yfml6f_raXXbHS7-_h-v4QNv92y0Gw,14642
|
194
194
|
sky/skylet/events.py,sha256=A09E7LmmwzcGrSG0n8K7d3EZ1ZJr1mmmzoGyhnArYJA,12303
|
195
|
-
sky/skylet/job_lib.py,sha256=
|
195
|
+
sky/skylet/job_lib.py,sha256=jqJ4D3UeG6fNMm8xPtdWclnrVHQb6WiRqb1nrBp8TPg,35200
|
196
196
|
sky/skylet/log_lib.py,sha256=Jyj3h2yMBlheFX53AabXEiPaKyCbu06hLEhay5_ZRN0,18734
|
197
197
|
sky/skylet/log_lib.pyi,sha256=AHMkW2DGK2erFovb3ToZWxRiYaATlzkxKb5J9pkgF2Y,4295
|
198
198
|
sky/skylet/skylet.py,sha256=U9plr5hmhD9-Nyy0LMCymlE8DWtRXTFXQvfbFsS746Y,1153
|
@@ -249,14 +249,14 @@ sky/utils/command_runner.py,sha256=3CDcqRXEmoe3C-t2P58McgcRg6p9m5haUWYj1rOLuqM,3
|
|
249
249
|
sky/utils/command_runner.pyi,sha256=mJOzCgcYZAfHwnY_6Wf1YwlTEJGb9ihzc2f0rE0Kw98,7751
|
250
250
|
sky/utils/common_utils.py,sha256=Qy25LuIoTT0qg391EWyT9i5D6fwk1S4OdFwRpCTZ9Vk,24657
|
251
251
|
sky/utils/controller_utils.py,sha256=wF4_y1PCsLAWoo3XEtECwkNYTN6hO3vn_cxGxgQYcd8,43268
|
252
|
-
sky/utils/dag_utils.py,sha256=
|
252
|
+
sky/utils/dag_utils.py,sha256=pVX3lGDDcYTcGoH_1jEWzl9767Y4mwlIEYIzoyHO6gM,6105
|
253
253
|
sky/utils/db_utils.py,sha256=AOvMmBEN9cF4I7CoXihPCtus4mU2VDGjBQSVMMgzKlA,2786
|
254
254
|
sky/utils/env_options.py,sha256=3oAaUPxowL6vI2XmxXrH56V7Myj9IJWsL-MXFmRFVdI,1294
|
255
255
|
sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
|
256
256
|
sky/utils/log_utils.py,sha256=ptv2sbsiJSgk4NvdccrMsUR-MvOKnbu4BQiRSishgk0,12472
|
257
257
|
sky/utils/resources_utils.py,sha256=Xqi7gxPYw2y5wl5okUI5zx5LEij0hJF_V3Zi8q7TXYg,7890
|
258
258
|
sky/utils/rich_utils.py,sha256=hmnI1X5dKvRIQzB7EyNb34FT97qFNve-0QHqM5r0mVk,3066
|
259
|
-
sky/utils/schemas.py,sha256=
|
259
|
+
sky/utils/schemas.py,sha256=mogoStpQ77S936VfChinAW2I1DT4q2c5E7qY_qNiO0w,29094
|
260
260
|
sky/utils/subprocess_utils.py,sha256=3R54Elc2n8DQeO6Y8MCDJ6N6v27HDGpbNMIfCquqXYQ,6552
|
261
261
|
sky/utils/timeline.py,sha256=ao_nm0y52ZQILfL7Y92c3pSEFRyPm_ElORC3DrI5BwQ,3936
|
262
262
|
sky/utils/ux_utils.py,sha256=CqyIFGDuSE8fQasPkna_loZMwtboC9KedR09WEQ7qz0,6502
|
@@ -274,9 +274,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
|
|
274
274
|
sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
|
275
275
|
sky/utils/kubernetes/rsync_helper.sh,sha256=hyYDaYSNxYaNvzUQBzC8AidB7nDeojizjkzc_CTxycY,1077
|
276
276
|
sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=RFLJ3k7MR5UN4SKHykQ0lV9SgXumoULpKYIAt1vh-HU,6560
|
277
|
-
skypilot_nightly-1.0.0.
|
278
|
-
skypilot_nightly-1.0.0.
|
279
|
-
skypilot_nightly-1.0.0.
|
280
|
-
skypilot_nightly-1.0.0.
|
281
|
-
skypilot_nightly-1.0.0.
|
282
|
-
skypilot_nightly-1.0.0.
|
277
|
+
skypilot_nightly-1.0.0.dev20241030.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
|
278
|
+
skypilot_nightly-1.0.0.dev20241030.dist-info/METADATA,sha256=bwgfsg4Zzl63yZYrUfZIBNeMitC8bOcgqKucALPDnbk,19708
|
279
|
+
skypilot_nightly-1.0.0.dev20241030.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
280
|
+
skypilot_nightly-1.0.0.dev20241030.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
|
281
|
+
skypilot_nightly-1.0.0.dev20241030.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
|
282
|
+
skypilot_nightly-1.0.0.dev20241030.dist-info/RECORD,,
|
File without changes
|
{skypilot_nightly-1.0.0.dev20241029.dist-info → skypilot_nightly-1.0.0.dev20241030.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|