skypilot-nightly 1.0.0.dev20250115__py3-none-any.whl → 1.0.0.dev20250117__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/cloud_vm_ray_backend.py +50 -67
  3. sky/cli.py +11 -34
  4. sky/core.py +8 -5
  5. sky/data/storage.py +66 -14
  6. sky/global_user_state.py +1 -1
  7. sky/jobs/constants.py +8 -7
  8. sky/jobs/controller.py +19 -22
  9. sky/jobs/core.py +0 -2
  10. sky/jobs/recovery_strategy.py +114 -143
  11. sky/jobs/scheduler.py +283 -0
  12. sky/jobs/state.py +257 -21
  13. sky/jobs/utils.py +338 -96
  14. sky/provision/kubernetes/instance.py +1 -1
  15. sky/resources.py +1 -1
  16. sky/serve/core.py +30 -5
  17. sky/serve/replica_managers.py +1 -3
  18. sky/skylet/constants.py +1 -1
  19. sky/skylet/events.py +7 -3
  20. sky/skylet/job_lib.py +10 -30
  21. sky/skylet/log_lib.py +8 -8
  22. sky/skylet/log_lib.pyi +3 -0
  23. sky/skylet/skylet.py +1 -1
  24. sky/templates/jobs-controller.yaml.j2 +7 -3
  25. sky/utils/kubernetes/deploy_remote_cluster.sh +5 -5
  26. sky/utils/resources_utils.py +25 -21
  27. sky/utils/subprocess_utils.py +48 -9
  28. {skypilot_nightly-1.0.0.dev20250115.dist-info → skypilot_nightly-1.0.0.dev20250117.dist-info}/METADATA +1 -1
  29. {skypilot_nightly-1.0.0.dev20250115.dist-info → skypilot_nightly-1.0.0.dev20250117.dist-info}/RECORD +33 -32
  30. {skypilot_nightly-1.0.0.dev20250115.dist-info → skypilot_nightly-1.0.0.dev20250117.dist-info}/LICENSE +0 -0
  31. {skypilot_nightly-1.0.0.dev20250115.dist-info → skypilot_nightly-1.0.0.dev20250117.dist-info}/WHEEL +0 -0
  32. {skypilot_nightly-1.0.0.dev20250115.dist-info → skypilot_nightly-1.0.0.dev20250117.dist-info}/entry_points.txt +0 -0
  33. {skypilot_nightly-1.0.0.dev20250115.dist-info → skypilot_nightly-1.0.0.dev20250117.dist-info}/top_level.txt +0 -0
@@ -998,9 +998,7 @@ class SkyPilotReplicaManager(ReplicaManager):
998
998
  # Re-raise the exception if it is not preempted.
999
999
  raise
1000
1000
  job_status = list(job_statuses.values())[0]
1001
- if job_status in [
1002
- job_lib.JobStatus.FAILED, job_lib.JobStatus.FAILED_SETUP
1003
- ]:
1001
+ if job_status in job_lib.JobStatus.user_code_failure_states():
1004
1002
  info.status_property.user_app_failed = True
1005
1003
  serve_state.add_or_update_replica(self._service_name,
1006
1004
  info.replica_id, info)
sky/skylet/constants.py CHANGED
@@ -86,7 +86,7 @@ TASK_ID_LIST_ENV_VAR = 'SKYPILOT_TASK_IDS'
86
86
  # cluster yaml is updated.
87
87
  #
88
88
  # TODO(zongheng,zhanghao): make the upgrading of skylet automatic?
89
- SKYLET_VERSION = '9'
89
+ SKYLET_VERSION = '10'
90
90
  # The version of the lib files that skylet/jobs use. Whenever there is an API
91
91
  # change for the job_lib or log_lib, we need to bump this version, so that the
92
92
  # user can be notified to update their SkyPilot version on the remote cluster.
sky/skylet/events.py CHANGED
@@ -13,6 +13,8 @@ from sky import clouds
13
13
  from sky import sky_logging
14
14
  from sky.backends import cloud_vm_ray_backend
15
15
  from sky.clouds import cloud_registry
16
+ from sky.jobs import scheduler as managed_job_scheduler
17
+ from sky.jobs import state as managed_job_state
16
18
  from sky.jobs import utils as managed_job_utils
17
19
  from sky.serve import serve_utils
18
20
  from sky.skylet import autostop_lib
@@ -67,12 +69,13 @@ class JobSchedulerEvent(SkyletEvent):
67
69
  job_lib.scheduler.schedule_step(force_update_jobs=True)
68
70
 
69
71
 
70
- class ManagedJobUpdateEvent(SkyletEvent):
71
- """Skylet event for updating managed job status."""
72
+ class ManagedJobEvent(SkyletEvent):
73
+ """Skylet event for updating and scheduling managed jobs."""
72
74
  EVENT_INTERVAL_SECONDS = 300
73
75
 
74
76
  def _run(self):
75
77
  managed_job_utils.update_managed_job_status()
78
+ managed_job_scheduler.maybe_schedule_next_jobs()
76
79
 
77
80
 
78
81
  class ServiceUpdateEvent(SkyletEvent):
@@ -116,7 +119,8 @@ class AutostopEvent(SkyletEvent):
116
119
  logger.debug('autostop_config not set. Skipped.')
117
120
  return
118
121
 
119
- if job_lib.is_cluster_idle():
122
+ if (job_lib.is_cluster_idle() and
123
+ not managed_job_state.get_num_alive_jobs()):
120
124
  idle_minutes = (time.time() -
121
125
  autostop_lib.get_last_active_time()) // 60
122
126
  logger.debug(
sky/skylet/job_lib.py CHANGED
@@ -10,9 +10,8 @@ import pathlib
10
10
  import shlex
11
11
  import signal
12
12
  import sqlite3
13
- import subprocess
14
13
  import time
15
- from typing import Any, Dict, List, Optional
14
+ from typing import Any, Dict, List, Optional, Sequence
16
15
 
17
16
  import colorama
18
17
  import filelock
@@ -23,6 +22,7 @@ from sky.skylet import constants
23
22
  from sky.utils import common_utils
24
23
  from sky.utils import db_utils
25
24
  from sky.utils import log_utils
25
+ from sky.utils import subprocess_utils
26
26
 
27
27
  logger = sky_logging.init_logger(__name__)
28
28
 
@@ -162,13 +162,17 @@ class JobStatus(enum.Enum):
162
162
  def nonterminal_statuses(cls) -> List['JobStatus']:
163
163
  return [cls.INIT, cls.SETTING_UP, cls.PENDING, cls.RUNNING]
164
164
 
165
- def is_terminal(self):
165
+ def is_terminal(self) -> bool:
166
166
  return self not in self.nonterminal_statuses()
167
167
 
168
- def __lt__(self, other):
168
+ @classmethod
169
+ def user_code_failure_states(cls) -> Sequence['JobStatus']:
170
+ return (cls.FAILED, cls.FAILED_SETUP)
171
+
172
+ def __lt__(self, other: 'JobStatus') -> bool:
169
173
  return list(JobStatus).index(self) < list(JobStatus).index(other)
170
174
 
171
- def colored_str(self):
175
+ def colored_str(self) -> str:
172
176
  color = _JOB_STATUS_TO_COLOR[self]
173
177
  return f'{color}{self.value}{colorama.Style.RESET_ALL}'
174
178
 
@@ -205,31 +209,7 @@ class JobScheduler:
205
209
  _CURSOR.execute((f'UPDATE pending_jobs SET submit={int(time.time())} '
206
210
  f'WHERE job_id={job_id!r}'))
207
211
  _CONN.commit()
208
- # Use nohup to ensure the job driver process is a separate process tree,
209
- # instead of being a child of the current process. This is important to
210
- # avoid a chain of driver processes (job driver can call schedule_step()
211
- # to submit new jobs, and the new job can also call schedule_step()
212
- # recursively).
213
- #
214
- # echo $! will output the PID of the last background process started
215
- # in the current shell, so we can retrieve it and record in the DB.
216
- #
217
- # TODO(zhwu): A more elegant solution is to use another daemon process
218
- # to be in charge of starting these driver processes, instead of
219
- # starting them in the current process.
220
- wrapped_cmd = (f'nohup bash -c {shlex.quote(run_cmd)} '
221
- '</dev/null >/dev/null 2>&1 & echo $!')
222
- proc = subprocess.run(wrapped_cmd,
223
- stdout=subprocess.PIPE,
224
- stderr=subprocess.PIPE,
225
- stdin=subprocess.DEVNULL,
226
- start_new_session=True,
227
- check=True,
228
- shell=True,
229
- text=True)
230
- # Get the PID of the detached process
231
- pid = int(proc.stdout.strip())
232
-
212
+ pid = subprocess_utils.launch_new_process_tree(run_cmd)
233
213
  # TODO(zhwu): Backward compatibility, remove this check after 0.10.0.
234
214
  # This is for the case where the job is submitted with SkyPilot older
235
215
  # than #4318, using ray job submit.
sky/skylet/log_lib.py CHANGED
@@ -25,9 +25,9 @@ from sky.utils import log_utils
25
25
  from sky.utils import subprocess_utils
26
26
  from sky.utils import ux_utils
27
27
 
28
- _SKY_LOG_WAITING_GAP_SECONDS = 1
29
- _SKY_LOG_WAITING_MAX_RETRY = 5
30
- _SKY_LOG_TAILING_GAP_SECONDS = 0.2
28
+ SKY_LOG_WAITING_GAP_SECONDS = 1
29
+ SKY_LOG_WAITING_MAX_RETRY = 5
30
+ SKY_LOG_TAILING_GAP_SECONDS = 0.2
31
31
  # Peek the head of the lines to check if we need to start
32
32
  # streaming when tail > 0.
33
33
  PEEK_HEAD_LINES_FOR_START_STREAM = 20
@@ -336,7 +336,7 @@ def _follow_job_logs(file,
336
336
  ]:
337
337
  if wait_last_logs:
338
338
  # Wait all the logs are printed before exit.
339
- time.sleep(1 + _SKY_LOG_TAILING_GAP_SECONDS)
339
+ time.sleep(1 + SKY_LOG_TAILING_GAP_SECONDS)
340
340
  wait_last_logs = False
341
341
  continue
342
342
  status_str = status.value if status is not None else 'None'
@@ -345,7 +345,7 @@ def _follow_job_logs(file,
345
345
  f'Job finished (status: {status_str}).'))
346
346
  return
347
347
 
348
- time.sleep(_SKY_LOG_TAILING_GAP_SECONDS)
348
+ time.sleep(SKY_LOG_TAILING_GAP_SECONDS)
349
349
  status = job_lib.get_status_no_lock(job_id)
350
350
 
351
351
 
@@ -426,15 +426,15 @@ def tail_logs(job_id: Optional[int],
426
426
  retry_cnt += 1
427
427
  if os.path.exists(log_path) and status != job_lib.JobStatus.INIT:
428
428
  break
429
- if retry_cnt >= _SKY_LOG_WAITING_MAX_RETRY:
429
+ if retry_cnt >= SKY_LOG_WAITING_MAX_RETRY:
430
430
  print(
431
431
  f'{colorama.Fore.RED}ERROR: Logs for '
432
432
  f'{job_str} (status: {status.value}) does not exist '
433
433
  f'after retrying {retry_cnt} times.{colorama.Style.RESET_ALL}')
434
434
  return
435
- print(f'INFO: Waiting {_SKY_LOG_WAITING_GAP_SECONDS}s for the logs '
435
+ print(f'INFO: Waiting {SKY_LOG_WAITING_GAP_SECONDS}s for the logs '
436
436
  'to be written...')
437
- time.sleep(_SKY_LOG_WAITING_GAP_SECONDS)
437
+ time.sleep(SKY_LOG_WAITING_GAP_SECONDS)
438
438
  status = job_lib.update_job_status([job_id], silent=True)[0]
439
439
 
440
440
  start_stream_at = LOG_FILE_START_STREAMING_AT
sky/skylet/log_lib.pyi CHANGED
@@ -13,6 +13,9 @@ from sky.skylet import constants as constants
13
13
  from sky.skylet import job_lib as job_lib
14
14
  from sky.utils import log_utils as log_utils
15
15
 
16
+ SKY_LOG_WAITING_GAP_SECONDS: int = ...
17
+ SKY_LOG_WAITING_MAX_RETRY: int = ...
18
+ SKY_LOG_TAILING_GAP_SECONDS: float = ...
16
19
  LOG_FILE_START_STREAMING_AT: str = ...
17
20
 
18
21
 
sky/skylet/skylet.py CHANGED
@@ -20,7 +20,7 @@ EVENTS = [
20
20
  # The managed job update event should be after the job update event.
21
21
  # Otherwise, the abnormal managed job status update will be delayed
22
22
  # until the next job update event.
23
- events.ManagedJobUpdateEvent(),
23
+ events.ManagedJobEvent(),
24
24
  # This is for monitoring controller job status. If it becomes
25
25
  # unhealthy, this event will correctly update the controller
26
26
  # status to CONTROLLER_FAILED.
@@ -63,9 +63,13 @@ setup: |
63
63
 
64
64
  run: |
65
65
  {{ sky_activate_python_env }}
66
- # Start the controller for the current managed job.
67
- python -u -m sky.jobs.controller {{remote_user_yaml_path}} \
68
- --job-id $SKYPILOT_INTERNAL_JOB_ID {% if retry_until_up %}--retry-until-up{% endif %}
66
+ # Submit the job to the scheduler.
67
+ # Note: The job is already in the `spot` table, marked as PENDING.
68
+ # CloudVmRayBackend._exec_code_on_head() calls
69
+ # managed_job_codegen.set_pending() before we get here.
70
+ python -u -m sky.jobs.scheduler {{remote_user_yaml_path}} \
71
+ --job-id $SKYPILOT_INTERNAL_JOB_ID
72
+
69
73
 
70
74
  envs:
71
75
  {%- for env_name, env_value in controller_envs.items() %}
@@ -93,11 +93,11 @@ cleanup_agent_node() {
93
93
 
94
94
  check_gpu() {
95
95
  local NODE_IP=$1
96
- run_remote "$NODE_IP" "
97
- if command -v nvidia-smi &> /dev/null; then
98
- nvidia-smi --list-gpus | grep 'GPU 0'
99
- fi
100
- "
96
+ if run_remote "$NODE_IP" "command -v nvidia-smi &> /dev/null && nvidia-smi --query-gpu=gpu_name --format=csv,noheader &> /dev/null"; then
97
+ return 0 # GPU detected
98
+ else
99
+ return 1 # No GPU detected
100
+ fi
101
101
  }
102
102
 
103
103
  # Pre-flight checks
@@ -137,31 +137,35 @@ def simplify_ports(ports: List[str]) -> List[str]:
137
137
  return port_set_to_ranges(port_ranges_to_set(ports))
138
138
 
139
139
 
140
+ def format_resource(resource: 'resources_lib.Resources',
141
+ simplify: bool = False) -> str:
142
+ if simplify:
143
+ cloud = resource.cloud
144
+ if resource.accelerators is None:
145
+ vcpu, _ = cloud.get_vcpus_mem_from_instance_type(
146
+ resource.instance_type)
147
+ hardware = f'vCPU={int(vcpu)}'
148
+ else:
149
+ hardware = f'{resource.accelerators}'
150
+ spot = '[Spot]' if resource.use_spot else ''
151
+ return f'{cloud}({spot}{hardware})'
152
+ else:
153
+ # accelerator_args is way too long.
154
+ # Convert from:
155
+ # GCP(n1-highmem-8, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '2.12.0'} # pylint: disable=line-too-long
156
+ # to:
157
+ # GCP(n1-highmem-8, {'tpu-v2-8': 1}...)
158
+ pattern = ', accelerator_args={.*}'
159
+ launched_resource_str = re.sub(pattern, '...', str(resource))
160
+ return launched_resource_str
161
+
162
+
140
163
  def get_readable_resources_repr(handle: 'backends.CloudVmRayResourceHandle',
141
164
  simplify: bool = False) -> str:
142
165
  if (handle.launched_nodes is not None and
143
166
  handle.launched_resources is not None):
144
- if simplify:
145
- cloud = handle.launched_resources.cloud
146
- if handle.launched_resources.accelerators is None:
147
- vcpu, _ = cloud.get_vcpus_mem_from_instance_type(
148
- handle.launched_resources.instance_type)
149
- hardware = f'vCPU={int(vcpu)}'
150
- else:
151
- hardware = f'{handle.launched_resources.accelerators}'
152
- spot = '[Spot]' if handle.launched_resources.use_spot else ''
153
- return f'{handle.launched_nodes}x {cloud}({spot}{hardware})'
154
- else:
155
- launched_resource_str = str(handle.launched_resources)
156
- # accelerator_args is way too long.
157
- # Convert from:
158
- # GCP(n1-highmem-8, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '2.12.0'} # pylint: disable=line-too-long
159
- # to:
160
- # GCP(n1-highmem-8, {'tpu-v2-8': 1}...)
161
- pattern = ', accelerator_args={.*}'
162
- launched_resource_str = re.sub(pattern, '...',
163
- launched_resource_str)
164
- return f'{handle.launched_nodes}x {launched_resource_str}'
167
+ return (f'{handle.launched_nodes}x '
168
+ f'{format_resource(handle.launched_resources, simplify)}')
165
169
  return _DEFAULT_MESSAGE_HANDLE_INITIALIZING
166
170
 
167
171
 
@@ -3,6 +3,7 @@ from multiprocessing import pool
3
3
  import os
4
4
  import random
5
5
  import resource
6
+ import shlex
6
7
  import subprocess
7
8
  import time
8
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
@@ -101,8 +102,6 @@ def run_in_parallel(func: Callable,
101
102
  num_threads: Optional[int] = None) -> List[Any]:
102
103
  """Run a function in parallel on a list of arguments.
103
104
 
104
- The function 'func' should raise a CommandError if the command fails.
105
-
106
105
  Args:
107
106
  func: The function to run in parallel
108
107
  args: Iterable of arguments to pass to func
@@ -111,19 +110,23 @@ def run_in_parallel(func: Callable,
111
110
 
112
111
  Returns:
113
112
  A list of the return values of the function func, in the same order as the
114
- arguments.
113
+ arguments.
114
+
115
+ Raises:
116
+ Exception: The first exception encountered.
115
117
  """
118
+ # Short-circuit for short lists
116
119
  if len(args) == 0:
117
120
  return []
118
- # Short-circuit for single element
119
121
  if len(args) == 1:
120
122
  return [func(args[0])]
121
- # Reference: https://stackoverflow.com/questions/25790279/python-multiprocessing-early-termination # pylint: disable=line-too-long
122
- processes = num_threads if num_threads is not None else get_parallel_threads(
123
- )
123
+
124
+ processes = (num_threads
125
+ if num_threads is not None else get_parallel_threads())
126
+
124
127
  with pool.ThreadPool(processes=processes) as p:
125
- # Run the function in parallel on the arguments, keeping the order.
126
- return list(p.imap(func, args))
128
+ ordered_iterators = p.imap(func, args)
129
+ return list(ordered_iterators)
127
130
 
128
131
 
129
132
  def handle_returncode(returncode: int,
@@ -298,3 +301,39 @@ def kill_process_daemon(process_pid: int) -> None:
298
301
  # Disable input
299
302
  stdin=subprocess.DEVNULL,
300
303
  )
304
+
305
+
306
+ def launch_new_process_tree(cmd: str, log_output: str = '/dev/null') -> int:
307
+ """Launch a new process that will not be a child of the current process.
308
+
309
+ This will launch bash in a new session, which will launch the given cmd.
310
+ This will ensure that cmd is in its own process tree, and once bash exits,
311
+ will not be an ancestor of the current process. This is useful for job
312
+ launching.
313
+
314
+ Returns the pid of the launched cmd.
315
+ """
316
+ # Use nohup to ensure the job driver process is a separate process tree,
317
+ # instead of being a child of the current process. This is important to
318
+ # avoid a chain of driver processes (job driver can call schedule_step() to
319
+ # submit new jobs, and the new job can also call schedule_step()
320
+ # recursively).
321
+ #
322
+ # echo $! will output the PID of the last background process started in the
323
+ # current shell, so we can retrieve it and record in the DB.
324
+ #
325
+ # TODO(zhwu): A more elegant solution is to use another daemon process to be
326
+ # in charge of starting these driver processes, instead of starting them in
327
+ # the current process.
328
+ wrapped_cmd = (f'nohup bash -c {shlex.quote(cmd)} '
329
+ f'</dev/null >{log_output} 2>&1 & echo $!')
330
+ proc = subprocess.run(wrapped_cmd,
331
+ stdout=subprocess.PIPE,
332
+ stderr=subprocess.PIPE,
333
+ stdin=subprocess.DEVNULL,
334
+ start_new_session=True,
335
+ check=True,
336
+ shell=True,
337
+ text=True)
338
+ # Get the PID of the detached process
339
+ return int(proc.stdout.strip())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250115
3
+ Version: 1.0.0.dev20250117
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -1,16 +1,16 @@
1
- sky/__init__.py,sha256=UAjMZW9jvx-0gdHn5FPa2lTkzlErK7Hu0pqF32A7B1c,5944
1
+ sky/__init__.py,sha256=kA2ieB1SawvfYteqYNXCfJepPEo1gELmFmJNfKXgdkM,5944
2
2
  sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
3
3
  sky/authentication.py,sha256=LXUDABKP1FJCS256xTTDJa40WXwHKF5x49S-4hZbD1M,21501
4
4
  sky/check.py,sha256=s8deMVL-k9y8gd519K7NWZc3DqWsEySwiAr0uH3Vvcc,9459
5
- sky/cli.py,sha256=ra3u-Erv8TwalWFU1Fw4_ix0oUWfVAd9eQsruQRx_Lc,214915
5
+ sky/cli.py,sha256=suOjHrt7mQTK47Z9ZQjogyUwnxfsKZ3_eP86AI29Dko,213891
6
6
  sky/cloud_stores.py,sha256=PcLT57_8SZy7o6paAluElfBynaLkbaOq3l-8dNg1AVM,23672
7
- sky/core.py,sha256=CPwNZQlC5WKLzTb2Tjo2Uogg0EvOt-yLCRlegqK_92A,38598
7
+ sky/core.py,sha256=fE1rn4Ku94S0XmWTO5-6t6eT6aaJImNczRqEnTe8v7Q,38742
8
8
  sky/dag.py,sha256=f3sJlkH4bE6Uuz3ozNtsMhcBpRx7KmC9Sa4seDKt4hU,3104
9
9
  sky/exceptions.py,sha256=rUi_au7QBNn3_wvwa8Y_MSHN3QDRpVLry8Mfa56LyGk,9197
10
10
  sky/execution.py,sha256=dpbk1kGRkGHT0FCJKGvjqeV3qIGEN2K20NDZbVrcAvI,28483
11
- sky/global_user_state.py,sha256=m2LJsXkh8eAvvz0ADnSP6idfYWZTA_Xi3uxwR3DrJxo,30241
11
+ sky/global_user_state.py,sha256=cTwltMCDIIBaapuGgARxFwpDJDCiKKyVW-PP_qtWuCA,30241
12
12
  sky/optimizer.py,sha256=d5BPAEZVrS3a2oBclSwo8MWkHQKQ3u4tcyawOANN0_0,59836
13
- sky/resources.py,sha256=zgUHgqCZGxvAABTe3JYukl4HrzQZi67D7ULFzAMk9YY,70325
13
+ sky/resources.py,sha256=D3jteQxKOUydoNm7VDl90p02dwP3RpbO3gqNcl4dpOI,70327
14
14
  sky/sky_logging.py,sha256=7Zk9mL1TDxFkGsy3INMBKYlqsbognVGSMzAsHZdZlhw,5891
15
15
  sky/skypilot_config.py,sha256=FN93hSG-heQCHBnemlIK2TwrJngKbpx4vMXNUzPIzV8,9087
16
16
  sky/status_lib.py,sha256=J7Jb4_Dz0v2T64ttOdyUgpokvl4S0sBJrMfH7Fvo51A,1457
@@ -32,7 +32,7 @@ sky/adaptors/vsphere.py,sha256=zJP9SeObEoLrpgHW2VHvZE48EhgVf8GfAEIwBeaDMfM,2129
32
32
  sky/backends/__init__.py,sha256=UDjwbUgpTRApbPJnNfR786GadUuwgRk3vsWoVu5RB_c,536
33
33
  sky/backends/backend.py,sha256=iBs5gnMaaUoH2OIQ3xhAjWdrJWqj8T61Za9TGsBFpvQ,7515
34
34
  sky/backends/backend_utils.py,sha256=Eeew8YV0VYSYxozqzadNMZrjhEMjlE3yuzTRP7YSl50,137348
35
- sky/backends/cloud_vm_ray_backend.py,sha256=ANDYIisCZ-IKWHIdQ2-XoJzxaASaOZxVxdTBI4f2Yo0,247430
35
+ sky/backends/cloud_vm_ray_backend.py,sha256=J7_vO4VqednTvSjiQhRapL5zGZctdpoLUYCoblNOCcc,247106
36
36
  sky/backends/docker_utils.py,sha256=Hyw1YY20EyghhEbYx6O2FIMDcGkNzBzV9TM7LFynei8,8358
37
37
  sky/backends/local_docker_backend.py,sha256=nSYCjms3HOPjPNOrcCqsUKm1WV3AAovRFjEQ7hcEXW4,17021
38
38
  sky/backends/wheel_utils.py,sha256=5BUzBqfYz7p1ME6_0PXGmcsAkLVb8NrFt317p7a4X8s,8278
@@ -94,15 +94,16 @@ sky/data/__init__.py,sha256=Nhaf1NURisXpZuwWANa2IuCyppIuc720FRwqSE2oEwY,184
94
94
  sky/data/data_transfer.py,sha256=wixC4_3_JaeJFdGKOp-O5ulcsMugDSgrCR0SnPpugGc,8946
95
95
  sky/data/data_utils.py,sha256=HjcgMDuWRR_fNQ9gjuROi9GgPVvTGApiJwxGtdb2_UU,28860
96
96
  sky/data/mounting_utils.py,sha256=tJHBPEDP1Wg_r3oSGBwFhMDLnPCMPSFRz26O0QkDd0Y,14908
97
- sky/data/storage.py,sha256=07ccD5YaQ9j6R_zPkvNk7qXnW3awDkCn9V-Sx-KXGvo,201715
97
+ sky/data/storage.py,sha256=CWVKnHhdzXw1biPbRqYizkyVexL_OCELuJCqtd4hit4,204094
98
98
  sky/data/storage_utils.py,sha256=cM3kxlffYE7PnJySDu8huyUsMX_JYsf9uer8r5OYsjo,9556
99
99
  sky/jobs/__init__.py,sha256=ObZcz3lL1ip8JcmR6gbfZ4RMMfXJJdsnuU2zLQUb8jY,1546
100
- sky/jobs/constants.py,sha256=YLgcCg_RHSYr_rfsI_4UIdXk78KKKOK29Oem88t5j8I,1350
101
- sky/jobs/controller.py,sha256=DDt92Sa0TV3VULnEyM5QopUowciH6PE9u0yTDumFatM,28538
102
- sky/jobs/core.py,sha256=AVbboohNCUDqfK_7DDkc-wJOg87nE7L6Vw0wbPTelIA,20022
103
- sky/jobs/recovery_strategy.py,sha256=eP9CLy5qiNTyMJTWWzAxdQ4YolUZWL1g3cLMH7tw8Es,27312
104
- sky/jobs/state.py,sha256=1NeW0SVtfVd02MnS9OzvV-OV9Plch8QLH-ZZnttaLCg,27598
105
- sky/jobs/utils.py,sha256=G-3f0qxJEep4Rl52UxnXLcVmjt2uLYn0qUja1pClwmw,39031
100
+ sky/jobs/constants.py,sha256=6RphkJ6pmafQ7XYW5qwId1Zvqb99HJelA9kgrgfNR7o,1421
101
+ sky/jobs/controller.py,sha256=Qv7vOj4OXkbnZF0F9wKrlJsYhkSNJjJ1Mgrn2FyQyaM,28250
102
+ sky/jobs/core.py,sha256=2_Q9thiBPnd3i2nDqyUtQY-dsGZ1kRgAdnLcXHoycYo,19938
103
+ sky/jobs/recovery_strategy.py,sha256=m-EA-MWXPFrgx2CYFPr6MmgeUoDTEBmY2xruD2PRSGY,26365
104
+ sky/jobs/scheduler.py,sha256=WAvNb8-vBk8q1zFordFdpH7gxqWDjPHDGZZay6aodOk,12028
105
+ sky/jobs/state.py,sha256=DK-tQnN200SpCXzDllRRibfQOjDCCFo7VRGSdi-FKA4,37856
106
+ sky/jobs/utils.py,sha256=waKmLbUNRXeuYKBn_U7sekSFGAEgoPp9QemUULK4Y9k,49491
106
107
  sky/jobs/dashboard/dashboard.py,sha256=KMSarpVcfnc-ELPFvy1M9_I1k4kSeXubTk3ibQC67Tg,3219
107
108
  sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
108
109
  sky/jobs/dashboard/templates/index.html,sha256=su1tqgcsXNl1lGl9hfIR6ig1f531OO57x1Tc2mNDK7U,11139
@@ -145,7 +146,7 @@ sky/provision/gcp/instance_utils.py,sha256=veRBr6Oziv0KaUdC4acuWeaOremNV0gMYCCHa
145
146
  sky/provision/gcp/mig_utils.py,sha256=oFpcFZoapHMILSE4iIm8V5bxP1RhbMHRF7cciqq8qAk,7883
146
147
  sky/provision/kubernetes/__init__.py,sha256=y6yVfii81WYG3ROxv4hiIj-ydinS5-xGxLvXnARVQoI,719
147
148
  sky/provision/kubernetes/config.py,sha256=bXwOGdSAnXCkDreew0KsSUqSv3ZrptNeevqat76LLts,29012
148
- sky/provision/kubernetes/instance.py,sha256=OpX2S2O4ubI7J-1yuJmhbYGxQUcYDzZRGihsjA9LlYs,50307
149
+ sky/provision/kubernetes/instance.py,sha256=AQikdRgNklpeMgiEd4w2Hh7kGssVABsy0aCh9xsKi5Y,50313
149
150
  sky/provision/kubernetes/network.py,sha256=EpNjRQ131CXepqbdkoRKFu4szVrm0oKEpv1l8EgOkjU,12364
150
151
  sky/provision/kubernetes/network_utils.py,sha256=52BZY_5ynCH6IXlivKObYyAHDgQCJyAJIjmM7J4MpFo,11393
151
152
  sky/provision/kubernetes/utils.py,sha256=BklPlHXKNTNKamdAygnQ_sOIROq1bN3xbIPxwNRqMV0,104774
@@ -190,10 +191,10 @@ sky/serve/__init__.py,sha256=Bqw8nB9u1QF3ryjbV797SPZq0DWAcjT94E_5B8J24ag,1808
190
191
  sky/serve/autoscalers.py,sha256=N7yRGT9Ay5_yJUOkqaBGC7jG3eIdzA5d66i8kskGxZc,30351
191
192
  sky/serve/constants.py,sha256=7MflfgTHO9gDSux93U4BmNeEMWXxZB4q7I54KUwgp-s,4651
192
193
  sky/serve/controller.py,sha256=R5iIEGEEFtbm_6MvSGelYZP-vSmW0cSFuy64OexUc4g,11719
193
- sky/serve/core.py,sha256=j2pyYi_DPHndVe-lQ_WdLaI0_NBgH3tHosi8vV6fCBg,32303
194
+ sky/serve/core.py,sha256=UAbbnxmOZ8GBT7vaeFvtFC7_qXu05TFsNIFcLrdT3Oo,33341
194
195
  sky/serve/load_balancer.py,sha256=nNvDPJPRIrBc_qsBYJz1zzKa_fXDgfi0VDUf4SJEuW8,12990
195
196
  sky/serve/load_balancing_policies.py,sha256=XVj76qBgqh7h6wfx53RKQFzBefDWTE4TCdCEtFLLtI4,5398
196
- sky/serve/replica_managers.py,sha256=mNlIOdyd1Mo_PTGazHOQHA-Ql778TdDHZQ7V1yTMSiY,57764
197
+ sky/serve/replica_managers.py,sha256=SFvK7ewilc3NVRcqXg63WtU1WmhJKPtJd27JfKR2aow,57716
197
198
  sky/serve/serve_state.py,sha256=MAx63zlGOXaIgXedP9fUFlRxDKiez1shmyMetrJK6yQ,19756
198
199
  sky/serve/serve_utils.py,sha256=WgPcqEw3WyMOdgRTFg8DSsWyIG1xnRbRkI1-f09tNKg,39741
199
200
  sky/serve/service.py,sha256=7bvK9R9D48PZSYcOKSievXQ2mHUMk1d3AAIxtra7WOI,12083
@@ -206,12 +207,12 @@ sky/skylet/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
206
207
  sky/skylet/attempt_skylet.py,sha256=GZ6ITjjA0m-da3IxXXfoHR6n4pjp3X3TOXUqVvSrV0k,2136
207
208
  sky/skylet/autostop_lib.py,sha256=JPDHmByuhoNYXSUHl-OnyeJUkOFWn7gDM1FrS7Kr3E8,4478
208
209
  sky/skylet/configs.py,sha256=UtnpmEL0F9hH6PSjhsps7xgjGZ6qzPOfW1p2yj9tSng,1887
209
- sky/skylet/constants.py,sha256=1h5nhXsAvryo9THpfQ0wQKPSDjXcY9GeN6oX378yAyM,16021
210
- sky/skylet/events.py,sha256=A09E7LmmwzcGrSG0n8K7d3EZ1ZJr1mmmzoGyhnArYJA,12303
211
- sky/skylet/job_lib.py,sha256=9YO4N_0cSn4Pp8nia1iTGESWUd1eO06H4vvjr-s0UCE,43840
212
- sky/skylet/log_lib.py,sha256=fcQzEe4OK8exsNVBhbdYe4uIq2cdSHszsKZTtX8a3-Q,20453
213
- sky/skylet/log_lib.pyi,sha256=VpA_VoL970Noj-YrBkKqLxFi34JVMY7KLrOQ3o4AqEI,4336
214
- sky/skylet/skylet.py,sha256=U9plr5hmhD9-Nyy0LMCymlE8DWtRXTFXQvfbFsS746Y,1153
210
+ sky/skylet/constants.py,sha256=qW5tilb-D4B5IVlwphCFhPHxB3q63ICjgTbx-ftfAQQ,16022
211
+ sky/skylet/events.py,sha256=8xK2J_KOPUugZUFQunnrTCMtGJaI8Fodtv6HJjBLsAI,12532
212
+ sky/skylet/job_lib.py,sha256=Rk-C069cusJIRXsks8xqCb016JSt7GlpU7LrpX0qFJk,42785
213
+ sky/skylet/log_lib.py,sha256=oFEBd85vDYFrIyyZKekH30yc4rRYILC0F0o-COQ64oE,20445
214
+ sky/skylet/log_lib.pyi,sha256=rRk4eUX0RHGs1QL9CXsJq6RE7FqqxZlfuPJOLXTvg7I,4453
215
+ sky/skylet/skylet.py,sha256=Tpv4yYR3jwxZsYeFPexB1gS1bCL5_AAfPzGKLsknPhA,1147
215
216
  sky/skylet/subprocess_daemon.py,sha256=gcL-_Hea7-SrBUyZfAbo40RBFbaeuBmPCW0dm4YYkPo,3537
216
217
  sky/skylet/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
217
218
  sky/skylet/providers/command_runner.py,sha256=DdBKP0QX325_N3zAVYwnmXmfbfXNqkzWQZpF9DSR7Go,16259
@@ -237,7 +238,7 @@ sky/templates/do-ray.yml.j2,sha256=sRKpn0tC-uPYtSZ20OB4fMzE7RbPQUr8kOCIbuJ4b4Q,4
237
238
  sky/templates/fluidstack-ray.yml.j2,sha256=t8TCULgiErCZdtFmBZVsA8ZdcqR7ccwsmQhuDFTBEAU,3541
238
239
  sky/templates/gcp-ray.yml.j2,sha256=y95B-Nk6hFxm6vEIaxI1wFzAIcy_GcKC3XMYo9m-ThI,9662
239
240
  sky/templates/ibm-ray.yml.j2,sha256=RMBUqPId8i4CnVwcyfK3DbRapF1jFMuGQlY0E0PFbMU,6669
240
- sky/templates/jobs-controller.yaml.j2,sha256=SDC4VzQ-difQ1pSh6YensI14GDVJjeKMBMjl7gibq7A,2597
241
+ sky/templates/jobs-controller.yaml.j2,sha256=xShAe0ei-psRvfdJZ5ebQufEDIbaKV8KWEnQD-TYF5k,2705
241
242
  sky/templates/kubernetes-ingress.yml.j2,sha256=73iDklVDWBMbItg0IexCa6_ClXPJOxw7PWz3leku4nE,1340
242
243
  sky/templates/kubernetes-loadbalancer.yml.j2,sha256=IxrNYM366N01bbkJEbZ_UPYxUP8wyVEbRNFHRsBuLsw,626
243
244
  sky/templates/kubernetes-port-forward-proxy-command.sh,sha256=iw7mypHszg6Ggq9MbyiYMFOkSlXaQZulaxqC5IWYGCc,3381
@@ -268,10 +269,10 @@ sky/utils/db_utils.py,sha256=K2-OHPg0FeHCarevMdWe0IWzm6wWumViEeYeJuGoFUE,3747
268
269
  sky/utils/env_options.py,sha256=E5iwRFBUY2Iq6e0y0c1Mv5OSQ4MRNdk0-p38xUyVerc,1366
269
270
  sky/utils/kubernetes_enums.py,sha256=imGqHSa8O07zD_6xH1SDMM7dBU5lF5fzFFlQuQy00QM,1384
270
271
  sky/utils/log_utils.py,sha256=xEbUZfDiIiZkyWoLHXwIcqVMCBDEENsLCiogEXMDLt0,14139
271
- sky/utils/resources_utils.py,sha256=Xqi7gxPYw2y5wl5okUI5zx5LEij0hJF_V3Zi8q7TXYg,7890
272
+ sky/utils/resources_utils.py,sha256=06Kx6AfbBdwBYGmIYFEY_qm6OBc2a5esZMPvIX7gCvc,7787
272
273
  sky/utils/rich_utils.py,sha256=hmnI1X5dKvRIQzB7EyNb34FT97qFNve-0QHqM5r0mVk,3066
273
274
  sky/utils/schemas.py,sha256=KcU6wSmLQ-2HhfE6m4RHN9D3mqMAc8X1j5vOb-bUki0,30064
274
- sky/utils/subprocess_utils.py,sha256=ClFU9kb6rBp2Ou7P0c4eX_Lda5-pZZxtZT8jE3H74r8,10530
275
+ sky/utils/subprocess_utils.py,sha256=YhtxqHoaZLw2M9TikTH56dTboZN3Qu2RsGeWo4uwJVA,12054
275
276
  sky/utils/timeline.py,sha256=ebHxKJK2HX0utGArrUgSezTPkcwav3VETa_AQS34t-E,3925
276
277
  sky/utils/ux_utils.py,sha256=CqyIFGDuSE8fQasPkna_loZMwtboC9KedR09WEQ7qz0,6502
277
278
  sky/utils/validator.py,sha256=cAFERCoC7jH0DFKepcU4x9SYmdrYL1iVmW9tXA18hvo,701
@@ -280,7 +281,7 @@ sky/utils/cli_utils/status_utils.py,sha256=2HrH6IBJCJ__AbuZ0ooIEgarBKIVIA5M3myE5
280
281
  sky/utils/kubernetes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
281
282
  sky/utils/kubernetes/create_cluster.sh,sha256=VLXfazav9XCMQmeKVqhuOQzt2vM6G1jgnvvb0SHUFno,7773
282
283
  sky/utils/kubernetes/delete_cluster.sh,sha256=BSccHF43GyepDNf-FZcenzHzpXXATkVD92vgn1lWPgk,927
283
- sky/utils/kubernetes/deploy_remote_cluster.sh,sha256=LzrhZbZ8W399U_0IktTi3Elb0w91oq267e4Wk5oUfb4,8471
284
+ sky/utils/kubernetes/deploy_remote_cluster.sh,sha256=dj_q2LHFgq03bXJWJhtMFFWOpcWnWAYKfFQyMv7Gr5A,8551
284
285
  sky/utils/kubernetes/generate_kind_config.py,sha256=_TNLnifA_r7-CRq083IP1xjelYqiLjzQX9ohuqYpDH8,3187
285
286
  sky/utils/kubernetes/generate_kubeconfig.sh,sha256=MBvXJio0PeujZSCXiRKE_pa6HCTiU9qBzR1WrXccVSY,10477
286
287
  sky/utils/kubernetes/gpu_labeler.py,sha256=4px7FyfsukacPEvKwTLUNb3WwacMIUrHWjP93qTi3kE,6998
@@ -288,9 +289,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml,sha256=k0TBoQ4zgf79-sVkixKSGYFHQ7Z
288
289
  sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488oMQvhRZWwsj9vBbPUg,3812
289
290
  sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
290
291
  sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
291
- skypilot_nightly-1.0.0.dev20250115.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
292
- skypilot_nightly-1.0.0.dev20250115.dist-info/METADATA,sha256=-lAA4FLTOtuusE4mPzY1fSOIimvAVooos0h4Og89Hzs,20884
293
- skypilot_nightly-1.0.0.dev20250115.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
294
- skypilot_nightly-1.0.0.dev20250115.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
295
- skypilot_nightly-1.0.0.dev20250115.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
296
- skypilot_nightly-1.0.0.dev20250115.dist-info/RECORD,,
292
+ skypilot_nightly-1.0.0.dev20250117.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
293
+ skypilot_nightly-1.0.0.dev20250117.dist-info/METADATA,sha256=NeM0Gfty_C94SeoCFNc7G5N0Q2Gb4tNCxgwZv_e_jnQ,20884
294
+ skypilot_nightly-1.0.0.dev20250117.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
295
+ skypilot_nightly-1.0.0.dev20250117.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
296
+ skypilot_nightly-1.0.0.dev20250117.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
297
+ skypilot_nightly-1.0.0.dev20250117.dist-info/RECORD,,