skypilot-nightly 1.0.0.dev20250510__py3-none-any.whl → 1.0.0.dev20250513__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +3 -0
  3. sky/backends/cloud_vm_ray_backend.py +7 -0
  4. sky/cli.py +109 -109
  5. sky/client/cli.py +109 -109
  6. sky/clouds/gcp.py +35 -8
  7. sky/dashboard/out/404.html +1 -1
  8. sky/dashboard/out/_next/static/{C0fkLhvxyqkymoV7IeInQ → 2dkponv64SfFShA8Rnw0D}/_buildManifest.js +1 -1
  9. sky/dashboard/out/_next/static/chunks/845-0ca6f2c1ba667c3b.js +1 -0
  10. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  11. sky/dashboard/out/clusters/[cluster].html +1 -1
  12. sky/dashboard/out/clusters.html +1 -1
  13. sky/dashboard/out/index.html +1 -1
  14. sky/dashboard/out/jobs/[job].html +1 -1
  15. sky/dashboard/out/jobs.html +1 -1
  16. sky/global_user_state.py +2 -0
  17. sky/provision/docker_utils.py +4 -1
  18. sky/provision/gcp/config.py +197 -15
  19. sky/provision/gcp/constants.py +64 -0
  20. sky/provision/nebius/instance.py +3 -1
  21. sky/provision/nebius/utils.py +4 -2
  22. sky/server/requests/executor.py +114 -22
  23. sky/server/requests/requests.py +15 -0
  24. sky/server/server.py +12 -7
  25. sky/server/uvicorn.py +12 -2
  26. sky/sky_logging.py +40 -2
  27. sky/skylet/constants.py +3 -0
  28. sky/skylet/log_lib.py +51 -11
  29. sky/templates/gcp-ray.yml.j2 +11 -0
  30. sky/templates/nebius-ray.yml.j2 +4 -0
  31. sky/templates/websocket_proxy.py +29 -9
  32. sky/utils/command_runner.py +3 -0
  33. sky/utils/context.py +264 -0
  34. sky/utils/context_utils.py +172 -0
  35. sky/utils/rich_utils.py +81 -37
  36. sky/utils/schemas.py +9 -1
  37. sky/utils/subprocess_utils.py +8 -2
  38. {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/METADATA +1 -1
  39. {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/RECORD +44 -42
  40. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  41. /sky/dashboard/out/_next/static/{C0fkLhvxyqkymoV7IeInQ → 2dkponv64SfFShA8Rnw0D}/_ssgManifest.js +0 -0
  42. {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/WHEEL +0 -0
  43. {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/entry_points.txt +0 -0
  44. {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/licenses/LICENSE +0 -0
  45. {skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '18a1d8499158f53818133261776ae408ac447de3'
8
+ _SKYPILOT_COMMIT_SHA = 'c23907b7f1baf65740791dc1e17ff1411e7d9a97'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250510'
38
+ __version__ = '1.0.0.dev20250513'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -40,6 +40,7 @@ from sky.utils import cluster_utils
40
40
  from sky.utils import command_runner
41
41
  from sky.utils import common
42
42
  from sky.utils import common_utils
43
+ from sky.utils import context_utils
43
44
  from sky.utils import controller_utils
44
45
  from sky.utils import env_options
45
46
  from sky.utils import registry
@@ -2204,6 +2205,7 @@ def refresh_cluster_record(
2204
2205
 
2205
2206
 
2206
2207
  @timeline.event
2208
+ @context_utils.cancellation_guard
2207
2209
  def refresh_cluster_status_handle(
2208
2210
  cluster_name: str,
2209
2211
  *,
@@ -2253,6 +2255,7 @@ def check_cluster_available(
2253
2255
  ...
2254
2256
 
2255
2257
 
2258
+ @context_utils.cancellation_guard
2256
2259
  def check_cluster_available(
2257
2260
  cluster_name: str,
2258
2261
  *,
@@ -61,6 +61,7 @@ from sky.utils import cluster_utils
61
61
  from sky.utils import command_runner
62
62
  from sky.utils import common
63
63
  from sky.utils import common_utils
64
+ from sky.utils import context_utils
64
65
  from sky.utils import controller_utils
65
66
  from sky.utils import env_options
66
67
  from sky.utils import log_utils
@@ -274,6 +275,7 @@ class RayCodeGen:
274
275
  ray_address = 'auto'
275
276
  self._code = [
276
277
  textwrap.dedent(f"""\
278
+ import functools
277
279
  import getpass
278
280
  import hashlib
279
281
  import io
@@ -301,6 +303,8 @@ class RayCodeGen:
301
303
  from sky.skylet import autostop_lib
302
304
  from sky.skylet import constants
303
305
  from sky.skylet import job_lib
306
+ from sky.utils import context
307
+ from sky.utils import context_utils
304
308
  from sky.utils import log_utils
305
309
  from sky.utils import subprocess_utils
306
310
 
@@ -2415,6 +2419,7 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle):
2415
2419
  internal_external_ips[1:], key=lambda x: x[1])
2416
2420
  self.stable_internal_external_ips = stable_internal_external_ips
2417
2421
 
2422
+ @context_utils.cancellation_guard
2418
2423
  @annotations.lru_cache(scope='global')
2419
2424
  @timeline.event
2420
2425
  def get_command_runners(self,
@@ -3842,6 +3847,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3842
3847
  subprocess_utils.run_in_parallel(_rsync_down, parallel_args)
3843
3848
  return dict(zip(job_ids, local_log_dirs))
3844
3849
 
3850
+ @context_utils.cancellation_guard
3845
3851
  def tail_logs(self,
3846
3852
  handle: CloudVmRayResourceHandle,
3847
3853
  job_id: Optional[int],
@@ -4559,6 +4565,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
4559
4565
  # TODO(zhwu): Refactor this to a CommandRunner class, so different backends
4560
4566
  # can support its own command runner.
4561
4567
  @timeline.event
4568
+ @context_utils.cancellation_guard
4562
4569
  def run_on_head(
4563
4570
  self,
4564
4571
  handle: CloudVmRayResourceHandle,
sky/cli.py CHANGED
@@ -91,6 +91,8 @@ from sky.utils.cli_utils import status_utils
91
91
  if typing.TYPE_CHECKING:
92
92
  import types
93
93
 
94
+ import prettytable
95
+
94
96
  pd = adaptors_common.LazyImport('pandas')
95
97
  logger = sky_logging.init_logger(__name__)
96
98
 
@@ -3371,12 +3373,8 @@ def show_gpus(
3371
3373
  * ``QTY_PER_NODE`` (Kubernetes only): GPU quantities that can be requested
3372
3374
  on a single node.
3373
3375
 
3374
- * ``TOTAL_GPUS`` (Kubernetes only): Total number of GPUs available in the
3375
- Kubernetes cluster.
3376
-
3377
- * ``TOTAL_FREE_GPUS`` (Kubernetes only): Number of currently free GPUs
3378
- in the Kubernetes cluster. This is fetched in real-time and may change
3379
- when other users are using the cluster.
3376
+ * ``UTILIZATION`` (Kubernetes only): Total number of GPUs free / available
3377
+ in the Kubernetes cluster.
3380
3378
  """
3381
3379
  # validation for the --region flag
3382
3380
  if region is not None and cloud is None:
@@ -3415,15 +3413,16 @@ def show_gpus(
3415
3413
  # TODO(zhwu,romilb): We should move most of these kubernetes related
3416
3414
  # queries into the backend, especially behind the server.
3417
3415
  def _get_kubernetes_realtime_gpu_tables(
3418
- context: Optional[str] = None,
3419
- name_filter: Optional[str] = None,
3420
- quantity_filter: Optional[int] = None):
3416
+ context: Optional[str] = None,
3417
+ name_filter: Optional[str] = None,
3418
+ quantity_filter: Optional[int] = None
3419
+ ) -> Tuple[List[Tuple[str, 'prettytable.PrettyTable']],
3420
+ Optional['prettytable.PrettyTable'], List[Tuple[
3421
+ str, 'models.KubernetesNodesInfo']]]:
3421
3422
  if quantity_filter:
3422
3423
  qty_header = 'QTY_FILTER'
3423
- free_header = 'FILTERED_FREE_GPUS'
3424
3424
  else:
3425
3425
  qty_header = 'REQUESTABLE_QTY_PER_NODE'
3426
- free_header = 'TOTAL_FREE_GPUS'
3427
3426
 
3428
3427
  realtime_gpu_availability_lists = sdk.stream_and_get(
3429
3428
  sdk.realtime_kubernetes_gpu_availability(
@@ -3449,41 +3448,19 @@ def show_gpus(
3449
3448
  realtime_gpu_infos = []
3450
3449
  total_gpu_info: Dict[str, List[int]] = collections.defaultdict(
3451
3450
  lambda: [0, 0])
3451
+ all_nodes_info = []
3452
3452
 
3453
- # TODO(kyuds): remove backwards compatibility code (else branch)
3454
- # when API version is bumped
3455
3453
  if realtime_gpu_availability_lists:
3456
- # can't check for isinstance tuple as the tuple is converted to list
3457
- if len(realtime_gpu_availability_lists[0]) == 2:
3458
- for (ctx, availability_list) in realtime_gpu_availability_lists:
3459
- realtime_gpu_table = log_utils.create_table(
3460
- ['GPU', qty_header, 'TOTAL_GPUS', free_header])
3461
- for realtime_gpu_availability in sorted(availability_list):
3462
- gpu_availability = models.RealtimeGpuAvailability(
3463
- *realtime_gpu_availability)
3464
- available_qty = (gpu_availability.available
3465
- if gpu_availability.available != -1
3466
- else no_permissions_str)
3467
- realtime_gpu_table.add_row([
3468
- gpu_availability.gpu,
3469
- _list_to_str(gpu_availability.counts),
3470
- gpu_availability.capacity,
3471
- available_qty,
3472
- ])
3473
- gpu = gpu_availability.gpu
3474
- capacity = gpu_availability.capacity
3475
- # we want total, so skip permission denied.
3476
- available = max(gpu_availability.available, 0)
3477
- if capacity > 0:
3478
- total_gpu_info[gpu][0] += capacity
3479
- total_gpu_info[gpu][1] += available
3480
- realtime_gpu_infos.append((ctx, realtime_gpu_table))
3481
- else:
3482
- # can remove this with api server version bump.
3483
- # 2025.05.03
3484
- availability_list = realtime_gpu_availability_lists
3454
+ if len(realtime_gpu_availability_lists[0]) != 2:
3455
+ # TODO(kyuds): for backwards compatibility, as we add new
3456
+ # context to the API server response in #5362. Remove this after
3457
+ # 0.10.0.
3458
+ realtime_gpu_availability_lists = [
3459
+ (context, realtime_gpu_availability_lists)
3460
+ ]
3461
+ for (ctx, availability_list) in realtime_gpu_availability_lists:
3485
3462
  realtime_gpu_table = log_utils.create_table(
3486
- ['GPU', qty_header, 'TOTAL_GPUS', free_header])
3463
+ ['GPU', qty_header, 'UTILIZATION'])
3487
3464
  for realtime_gpu_availability in sorted(availability_list):
3488
3465
  gpu_availability = models.RealtimeGpuAvailability(
3489
3466
  *realtime_gpu_availability)
@@ -3493,49 +3470,100 @@ def show_gpus(
3493
3470
  realtime_gpu_table.add_row([
3494
3471
  gpu_availability.gpu,
3495
3472
  _list_to_str(gpu_availability.counts),
3496
- gpu_availability.capacity,
3497
- available_qty,
3473
+ f'{available_qty} of {gpu_availability.capacity} free',
3498
3474
  ])
3499
- realtime_gpu_infos.append((context, realtime_gpu_table))
3475
+ gpu = gpu_availability.gpu
3476
+ capacity = gpu_availability.capacity
3477
+ # we want total, so skip permission denied.
3478
+ available = max(gpu_availability.available, 0)
3479
+ if capacity > 0:
3480
+ total_gpu_info[gpu][0] += capacity
3481
+ total_gpu_info[gpu][1] += available
3482
+ realtime_gpu_infos.append((ctx, realtime_gpu_table))
3483
+ # Collect node info for this context
3484
+ nodes_info = sdk.stream_and_get(
3485
+ sdk.kubernetes_node_info(context=ctx))
3486
+ all_nodes_info.append((ctx, nodes_info))
3500
3487
 
3501
3488
  # display an aggregated table for all contexts
3502
3489
  # if there are more than one contexts with GPUs
3503
3490
  if len(realtime_gpu_infos) > 1:
3504
3491
  total_realtime_gpu_table = log_utils.create_table(
3505
- ['GPU', 'TOTAL_GPUS', free_header])
3492
+ ['GPU', 'UTILIZATION'])
3506
3493
  for gpu, stats in total_gpu_info.items():
3507
- total_realtime_gpu_table.add_row([gpu, stats[0], stats[1]])
3494
+ total_realtime_gpu_table.add_row(
3495
+ [gpu, f'{stats[1]} of {stats[0]} free'])
3508
3496
  else:
3509
3497
  total_realtime_gpu_table = None
3510
3498
 
3511
- return realtime_gpu_infos, total_realtime_gpu_table
3499
+ return realtime_gpu_infos, total_realtime_gpu_table, all_nodes_info
3512
3500
 
3513
- def _format_kubernetes_node_info(context: Optional[str]):
3501
+ def _format_kubernetes_node_info_combined(
3502
+ contexts_info: List[Tuple[str,
3503
+ 'models.KubernetesNodesInfo']]) -> str:
3514
3504
  node_table = log_utils.create_table(
3515
- ['NODE_NAME', 'GPU_NAME', 'TOTAL_GPUS', 'FREE_GPUS'])
3505
+ ['CONTEXT', 'NODE', 'GPU', 'UTILIZATION'])
3516
3506
 
3517
- nodes_info = sdk.stream_and_get(
3518
- sdk.kubernetes_node_info(context=context))
3519
3507
  no_permissions_str = '<no permissions>'
3520
- for node_name, node_info in nodes_info.node_info_dict.items():
3521
- available = node_info.free[
3522
- 'accelerators_available'] if node_info.free[
3523
- 'accelerators_available'] != -1 else no_permissions_str
3524
- total = node_info.total['accelerator_count']
3525
- if total > 0:
3508
+ hints = []
3509
+
3510
+ for context, nodes_info in contexts_info:
3511
+ context_name = context if context else 'default'
3512
+ if nodes_info.hint:
3513
+ hints.append(f'{context_name}: {nodes_info.hint}')
3514
+
3515
+ for node_name, node_info in nodes_info.node_info_dict.items():
3516
+ available = node_info.free[
3517
+ 'accelerators_available'] if node_info.free[
3518
+ 'accelerators_available'] != -1 else no_permissions_str
3519
+ acc_type = node_info.accelerator_type
3520
+ if acc_type is None:
3521
+ acc_type = '-'
3526
3522
  node_table.add_row([
3527
- node_name, node_info.accelerator_type,
3528
- node_info.total['accelerator_count'], available
3523
+ context_name, node_name, acc_type,
3524
+ f'{available} of {node_info.total["accelerator_count"]} '
3525
+ 'free'
3529
3526
  ])
3530
- k8s_per_node_acc_message = (
3531
- 'Kubernetes per node accelerator availability ')
3532
- if nodes_info.hint:
3533
- k8s_per_node_acc_message += nodes_info.hint
3527
+
3528
+ k8s_per_node_acc_message = ('Kubernetes per-node GPU availability')
3529
+ if hints:
3530
+ k8s_per_node_acc_message += ' (' + '; '.join(hints) + ')'
3531
+
3534
3532
  return (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
3535
3533
  f'{k8s_per_node_acc_message}'
3536
3534
  f'{colorama.Style.RESET_ALL}\n'
3537
3535
  f'{node_table.get_string()}')
3538
3536
 
3537
+ def _format_kubernetes_realtime_gpu(
3538
+ total_table: 'prettytable.PrettyTable',
3539
+ k8s_realtime_infos: List[Tuple[str, 'prettytable.PrettyTable']],
3540
+ all_nodes_info: List[Tuple[str, 'models.KubernetesNodesInfo']],
3541
+ show_node_info: bool) -> Generator[str, None, None]:
3542
+ yield (f'{colorama.Fore.GREEN}{colorama.Style.BRIGHT}'
3543
+ 'Kubernetes GPUs'
3544
+ f'{colorama.Style.RESET_ALL}')
3545
+ # print total table
3546
+ if total_table is not None:
3547
+ yield '\n'
3548
+ yield from total_table.get_string()
3549
+
3550
+ # print individual infos.
3551
+ for (ctx, k8s_realtime_table) in k8s_realtime_infos:
3552
+ yield '\n'
3553
+ # Print context header separately
3554
+ if ctx:
3555
+ context_str = f'Context: {ctx}'
3556
+ else:
3557
+ context_str = 'Default Context'
3558
+ yield (
3559
+ f'{colorama.Fore.CYAN}{context_str}{colorama.Style.RESET_ALL}\n'
3560
+ )
3561
+ yield from k8s_realtime_table.get_string()
3562
+
3563
+ if show_node_info:
3564
+ yield '\n'
3565
+ yield _format_kubernetes_node_info_combined(all_nodes_info)
3566
+
3539
3567
  def _output() -> Generator[str, None, None]:
3540
3568
  gpu_table = log_utils.create_table(
3541
3569
  ['COMMON_GPU', 'AVAILABLE_QUANTITIES'])
@@ -3568,7 +3596,7 @@ def show_gpus(
3568
3596
  # If --cloud kubernetes is not specified, we want to catch
3569
3597
  # the case where no GPUs are available on the cluster and
3570
3598
  # print the warning at the end.
3571
- k8s_realtime_infos, total_table = _get_kubernetes_realtime_gpu_tables(context) # pylint: disable=line-too-long
3599
+ k8s_realtime_infos, total_table, all_nodes_info = _get_kubernetes_realtime_gpu_tables(context) # pylint: disable=line-too-long
3572
3600
  except ValueError as e:
3573
3601
  if not cloud_is_kubernetes:
3574
3602
  # Make it a note if cloud is not kubernetes
@@ -3577,27 +3605,12 @@ def show_gpus(
3577
3605
  else:
3578
3606
  print_section_titles = True
3579
3607
 
3580
- # print total table
3581
- if total_table is not None:
3582
- yield (f'{colorama.Fore.GREEN}{colorama.Style.BRIGHT}'
3583
- 'Total Kubernetes GPUs'
3584
- f'{colorama.Style.RESET_ALL}\n')
3585
- yield from total_table.get_string()
3586
- yield '\n\n'
3587
-
3588
- # print individual infos.
3589
- for (idx,
3590
- (ctx,
3591
- k8s_realtime_table)) in enumerate(k8s_realtime_infos):
3592
- context_str = f'(Context: {ctx})' if ctx else ''
3593
- yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
3594
- f'Kubernetes GPUs {context_str}'
3595
- f'{colorama.Style.RESET_ALL}\n')
3596
- yield from k8s_realtime_table.get_string()
3597
- yield '\n\n'
3598
- yield _format_kubernetes_node_info(ctx)
3599
- if idx != len(k8s_realtime_infos) - 1:
3600
- yield '\n\n'
3608
+ yield from _format_kubernetes_realtime_gpu(
3609
+ total_table,
3610
+ k8s_realtime_infos,
3611
+ all_nodes_info,
3612
+ show_node_info=True)
3613
+
3601
3614
  if kubernetes_autoscaling:
3602
3615
  k8s_messages += (
3603
3616
  '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
@@ -3688,31 +3701,18 @@ def show_gpus(
3688
3701
  print_section_titles = True
3689
3702
  # TODO(romilb): Show filtered per node GPU availability here as well
3690
3703
  try:
3691
- k8s_realtime_infos, total_table = _get_kubernetes_realtime_gpu_tables( # pylint: disable=line-too-long
3692
- context=region,
3693
- name_filter=name,
3694
- quantity_filter=quantity)
3695
-
3696
- # print total table
3697
- if total_table is not None:
3698
- yield (f'{colorama.Fore.GREEN}{colorama.Style.BRIGHT}'
3699
- 'Total Kubernetes GPUs'
3700
- f'{colorama.Style.RESET_ALL}\n')
3701
- yield from total_table.get_string()
3702
- yield '\n\n'
3703
-
3704
- # print individual tables
3705
- for (ctx, k8s_realtime_table) in k8s_realtime_infos:
3706
- context_str = f'(Context: {ctx})' if ctx else ''
3707
- yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
3708
- f'Kubernetes GPUs {context_str}'
3709
- f'{colorama.Style.RESET_ALL}\n')
3710
- yield from k8s_realtime_table.get_string()
3711
- yield '\n\n'
3704
+ (k8s_realtime_infos, total_table,
3705
+ all_nodes_info) = _get_kubernetes_realtime_gpu_tables(
3706
+ context=region, name_filter=name, quantity_filter=quantity)
3707
+
3708
+ yield from _format_kubernetes_realtime_gpu(total_table,
3709
+ k8s_realtime_infos,
3710
+ all_nodes_info,
3711
+ show_node_info=False)
3712
3712
  except ValueError as e:
3713
3713
  # In the case of a specific accelerator, show the error message
3714
3714
  # immediately (e.g., "Resources H100 not found ...")
3715
- yield str(e)
3715
+ yield common_utils.format_exception(e, use_bracket=True)
3716
3716
  if kubernetes_autoscaling:
3717
3717
  k8s_messages += ('\n' +
3718
3718
  kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)