PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250510__py3-none-any.whl → 1.0.0.dev20250513__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250510py3-none-any.whl → 1.0.0.dev20250513py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

sky/__init__.py +2 -2
sky/backends/backend_utils.py +3 -0
sky/backends/cloud_vm_ray_backend.py +7 -0
sky/cli.py +109 -109
sky/client/cli.py +109 -109
sky/clouds/gcp.py +35 -8
sky/dashboard/out/404.html +1 -1
sky/dashboard/out/_next/static/{C0fkLhvxyqkymoV7IeInQ → 2dkponv64SfFShA8Rnw0D}/_buildManifest.js +1 -1
sky/dashboard/out/_next/static/chunks/845-0ca6f2c1ba667c3b.js +1 -0
sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
sky/dashboard/out/clusters/[cluster].html +1 -1
sky/dashboard/out/clusters.html +1 -1
sky/dashboard/out/index.html +1 -1
sky/dashboard/out/jobs/[job].html +1 -1
sky/dashboard/out/jobs.html +1 -1
sky/global_user_state.py +2 -0
sky/provision/docker_utils.py +4 -1
sky/provision/gcp/config.py +197 -15
sky/provision/gcp/constants.py +64 -0
sky/provision/nebius/instance.py +3 -1
sky/provision/nebius/utils.py +4 -2
sky/server/requests/executor.py +114 -22
sky/server/requests/requests.py +15 -0
sky/server/server.py +12 -7
sky/server/uvicorn.py +12 -2
sky/sky_logging.py +40 -2
sky/skylet/constants.py +3 -0
sky/skylet/log_lib.py +51 -11
sky/templates/gcp-ray.yml.j2 +11 -0
sky/templates/nebius-ray.yml.j2 +4 -0
sky/templates/websocket_proxy.py +29 -9
sky/utils/command_runner.py +3 -0
sky/utils/context.py +264 -0
sky/utils/context_utils.py +172 -0
sky/utils/rich_utils.py +81 -37
sky/utils/schemas.py +9 -1
sky/utils/subprocess_utils.py +8 -2
{skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/METADATA +1 -1
{skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/RECORD +44 -42
sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
/sky/dashboard/out/_next/static/{C0fkLhvxyqkymoV7IeInQ → 2dkponv64SfFShA8Rnw0D}/_ssgManifest.js +0 -0
{skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/WHEEL +0 -0
{skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/entry_points.txt +0 -0
{skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/licenses/LICENSE +0 -0
{skypilot_nightly-1.0.0.dev20250510.dist-info → skypilot_nightly-1.0.0.dev20250513.dist-info}/top_level.txt +0 -0

sky/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Optional
 import urllib.request
 # Replaced with the current commit when building the wheels.
-_SKYPILOT_COMMIT_SHA = '18a1d8499158f53818133261776ae408ac447de3'
+_SKYPILOT_COMMIT_SHA = 'c23907b7f1baf65740791dc1e17ff1411e7d9a97'
 def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
 __commit__ = _get_git_commit()
-__version__ = '1.0.0.dev20250510'
+__version__ = '1.0.0.dev20250513'
 __root_dir__ = os.path.dirname(os.path.abspath(__file__))

sky/backends/backend_utils.py CHANGED Viewed

@@ -40,6 +40,7 @@ from sky.utils import cluster_utils
 from sky.utils import command_runner
 from sky.utils import common
 from sky.utils import common_utils
+from sky.utils import context_utils
 from sky.utils import controller_utils
 from sky.utils import env_options
 from sky.utils import registry
@@ -2204,6 +2205,7 @@ def refresh_cluster_record(
 @timeline.event
+@context_utils.cancellation_guard
 def refresh_cluster_status_handle(
     cluster_name: str,
     *,
@@ -2253,6 +2255,7 @@ def check_cluster_available(
     ...
+@context_utils.cancellation_guard
 def check_cluster_available(
     cluster_name: str,
     *,

sky/backends/cloud_vm_ray_backend.py CHANGED Viewed

@@ -61,6 +61,7 @@ from sky.utils import cluster_utils
 from sky.utils import command_runner
 from sky.utils import common
 from sky.utils import common_utils
+from sky.utils import context_utils
 from sky.utils import controller_utils
 from sky.utils import env_options
 from sky.utils import log_utils
@@ -274,6 +275,7 @@ class RayCodeGen:
         ray_address = 'auto'
         self._code = [
             textwrap.dedent(f"""\
+            import functools
             import getpass
             import hashlib
             import io
@@ -301,6 +303,8 @@ class RayCodeGen:
             from sky.skylet import autostop_lib
             from sky.skylet import constants
             from sky.skylet import job_lib
+            from sky.utils import context
+            from sky.utils import context_utils
             from sky.utils import log_utils
             from sky.utils import subprocess_utils
@@ -2415,6 +2419,7 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle):
             internal_external_ips[1:], key=lambda x: x[1])
         self.stable_internal_external_ips = stable_internal_external_ips
+    @context_utils.cancellation_guard
     @annotations.lru_cache(scope='global')
     @timeline.event
     def get_command_runners(self,
@@ -3842,6 +3847,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
         subprocess_utils.run_in_parallel(_rsync_down, parallel_args)
         return dict(zip(job_ids, local_log_dirs))
+    @context_utils.cancellation_guard
     def tail_logs(self,
                   handle: CloudVmRayResourceHandle,
                   job_id: Optional[int],
@@ -4559,6 +4565,7 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
     # TODO(zhwu): Refactor this to a CommandRunner class, so different backends
     # can support its own command runner.
     @timeline.event
+    @context_utils.cancellation_guard
     def run_on_head(
         self,
         handle: CloudVmRayResourceHandle,

sky/cli.py CHANGED Viewed

@@ -91,6 +91,8 @@ from sky.utils.cli_utils import status_utils
 if typing.TYPE_CHECKING:
     import types
+    import prettytable
 pd = adaptors_common.LazyImport('pandas')
 logger = sky_logging.init_logger(__name__)
@@ -3371,12 +3373,8 @@ def show_gpus(
     * ``QTY_PER_NODE`` (Kubernetes only): GPU quantities that can be requested
       on a single node.
-    * ``TOTAL_GPUS`` (Kubernetes only): Total number of GPUs available in the
-      Kubernetes cluster.
-    * ``TOTAL_FREE_GPUS`` (Kubernetes only): Number of currently free GPUs
-      in the Kubernetes cluster. This is fetched in real-time and may change
-      when other users are using the cluster.
+    * ``UTILIZATION`` (Kubernetes only): Total number of GPUs free / available
+      in the Kubernetes cluster.
     """
     # validation for the --region flag
     if region is not None and cloud is None:
@@ -3415,15 +3413,16 @@ def show_gpus(
     # TODO(zhwu,romilb): We should move most of these kubernetes related
     # queries into the backend, especially behind the server.
     def _get_kubernetes_realtime_gpu_tables(
-            context: Optional[str] = None,
-            name_filter: Optional[str] = None,
-            quantity_filter: Optional[int] = None):
+        context: Optional[str] = None,
+        name_filter: Optional[str] = None,
+        quantity_filter: Optional[int] = None
+    ) -> Tuple[List[Tuple[str, 'prettytable.PrettyTable']],
+               Optional['prettytable.PrettyTable'], List[Tuple[
+                   str, 'models.KubernetesNodesInfo']]]:
         if quantity_filter:
             qty_header = 'QTY_FILTER'
-            free_header = 'FILTERED_FREE_GPUS'
         else:
             qty_header = 'REQUESTABLE_QTY_PER_NODE'
-            free_header = 'TOTAL_FREE_GPUS'
         realtime_gpu_availability_lists = sdk.stream_and_get(
             sdk.realtime_kubernetes_gpu_availability(
@@ -3449,41 +3448,19 @@ def show_gpus(
         realtime_gpu_infos = []
         total_gpu_info: Dict[str, List[int]] = collections.defaultdict(
             lambda: [0, 0])
+        all_nodes_info = []
-        # TODO(kyuds): remove backwards compatibility code (else branch)
-        # when API version is bumped
         if realtime_gpu_availability_lists:
-            # can't check for isinstance tuple as the tuple is converted to list
-            if len(realtime_gpu_availability_lists[0]) == 2:
-                for (ctx, availability_list) in realtime_gpu_availability_lists:
-                    realtime_gpu_table = log_utils.create_table(
-                        ['GPU', qty_header, 'TOTAL_GPUS', free_header])
-                    for realtime_gpu_availability in sorted(availability_list):
-                        gpu_availability = models.RealtimeGpuAvailability(
-                            *realtime_gpu_availability)
-                        available_qty = (gpu_availability.available
-                                         if gpu_availability.available != -1
-                                         else no_permissions_str)
-                        realtime_gpu_table.add_row([
-                            gpu_availability.gpu,
-                            _list_to_str(gpu_availability.counts),
-                            gpu_availability.capacity,
-                            available_qty,
-                        ])
-                        gpu = gpu_availability.gpu
-                        capacity = gpu_availability.capacity
-                        # we want total, so skip permission denied.
-                        available = max(gpu_availability.available, 0)
-                        if capacity > 0:
-                            total_gpu_info[gpu][0] += capacity
-                            total_gpu_info[gpu][1] += available
-                    realtime_gpu_infos.append((ctx, realtime_gpu_table))
-            else:
-                # can remove this with api server version bump.
-                # 2025.05.03
-                availability_list = realtime_gpu_availability_lists
+            if len(realtime_gpu_availability_lists[0]) != 2:
+                # TODO(kyuds): for backwards compatibility, as we add new
+                # context to the API server response in #5362. Remove this after
+                # 0.10.0.
+                realtime_gpu_availability_lists = [
+                    (context, realtime_gpu_availability_lists)
+                ]
+            for (ctx, availability_list) in realtime_gpu_availability_lists:
                 realtime_gpu_table = log_utils.create_table(
-                    ['GPU', qty_header, 'TOTAL_GPUS', free_header])
+                    ['GPU', qty_header, 'UTILIZATION'])
                 for realtime_gpu_availability in sorted(availability_list):
                     gpu_availability = models.RealtimeGpuAvailability(
                         *realtime_gpu_availability)
@@ -3493,49 +3470,100 @@ def show_gpus(
                     realtime_gpu_table.add_row([
                         gpu_availability.gpu,
                         _list_to_str(gpu_availability.counts),
-                        gpu_availability.capacity,
-                        available_qty,
+                        f'{available_qty} of {gpu_availability.capacity} free',
                     ])
-                realtime_gpu_infos.append((context, realtime_gpu_table))
+                    gpu = gpu_availability.gpu
+                    capacity = gpu_availability.capacity
+                    # we want total, so skip permission denied.
+                    available = max(gpu_availability.available, 0)
+                    if capacity > 0:
+                        total_gpu_info[gpu][0] += capacity
+                        total_gpu_info[gpu][1] += available
+                realtime_gpu_infos.append((ctx, realtime_gpu_table))
+                # Collect node info for this context
+                nodes_info = sdk.stream_and_get(
+                    sdk.kubernetes_node_info(context=ctx))
+                all_nodes_info.append((ctx, nodes_info))
         # display an aggregated table for all contexts
         # if there are more than one contexts with GPUs
         if len(realtime_gpu_infos) > 1:
             total_realtime_gpu_table = log_utils.create_table(
-                ['GPU', 'TOTAL_GPUS', free_header])
+                ['GPU', 'UTILIZATION'])
             for gpu, stats in total_gpu_info.items():
-                total_realtime_gpu_table.add_row([gpu, stats[0], stats[1]])
+                total_realtime_gpu_table.add_row(
+                    [gpu, f'{stats[1]} of {stats[0]} free'])
         else:
             total_realtime_gpu_table = None
-        return realtime_gpu_infos, total_realtime_gpu_table
+        return realtime_gpu_infos, total_realtime_gpu_table, all_nodes_info
-    def _format_kubernetes_node_info(context: Optional[str]):
+    def _format_kubernetes_node_info_combined(
+            contexts_info: List[Tuple[str,
+                                      'models.KubernetesNodesInfo']]) -> str:
         node_table = log_utils.create_table(
-            ['NODE_NAME', 'GPU_NAME', 'TOTAL_GPUS', 'FREE_GPUS'])
+            ['CONTEXT', 'NODE', 'GPU', 'UTILIZATION'])
-        nodes_info = sdk.stream_and_get(
-            sdk.kubernetes_node_info(context=context))
         no_permissions_str = '<no permissions>'
-        for node_name, node_info in nodes_info.node_info_dict.items():
-            available = node_info.free[
-                'accelerators_available'] if node_info.free[
-                    'accelerators_available'] != -1 else no_permissions_str
-            total = node_info.total['accelerator_count']
-            if total > 0:
+        hints = []
+        for context, nodes_info in contexts_info:
+            context_name = context if context else 'default'
+            if nodes_info.hint:
+                hints.append(f'{context_name}: {nodes_info.hint}')
+            for node_name, node_info in nodes_info.node_info_dict.items():
+                available = node_info.free[
+                    'accelerators_available'] if node_info.free[
+                        'accelerators_available'] != -1 else no_permissions_str
+                acc_type = node_info.accelerator_type
+                if acc_type is None:
+                    acc_type = '-'
                 node_table.add_row([
-                    node_name, node_info.accelerator_type,
-                    node_info.total['accelerator_count'], available
+                    context_name, node_name, acc_type,
+                    f'{available} of {node_info.total["accelerator_count"]} '
+                    'free'
                 ])
-        k8s_per_node_acc_message = (
-            'Kubernetes per node accelerator availability ')
-        if nodes_info.hint:
-            k8s_per_node_acc_message += nodes_info.hint
+        k8s_per_node_acc_message = ('Kubernetes per-node GPU availability')
+        if hints:
+            k8s_per_node_acc_message += ' (' + '; '.join(hints) + ')'
         return (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
                 f'{k8s_per_node_acc_message}'
                 f'{colorama.Style.RESET_ALL}\n'
                 f'{node_table.get_string()}')
+    def _format_kubernetes_realtime_gpu(
+            total_table: 'prettytable.PrettyTable',
+            k8s_realtime_infos: List[Tuple[str, 'prettytable.PrettyTable']],
+            all_nodes_info: List[Tuple[str, 'models.KubernetesNodesInfo']],
+            show_node_info: bool) -> Generator[str, None, None]:
+        yield (f'{colorama.Fore.GREEN}{colorama.Style.BRIGHT}'
+               'Kubernetes GPUs'
+               f'{colorama.Style.RESET_ALL}')
+        # print total table
+        if total_table is not None:
+            yield '\n'
+            yield from total_table.get_string()
+        # print individual infos.
+        for (ctx, k8s_realtime_table) in k8s_realtime_infos:
+            yield '\n'
+            # Print context header separately
+            if ctx:
+                context_str = f'Context: {ctx}'
+            else:
+                context_str = 'Default Context'
+            yield (
+                f'{colorama.Fore.CYAN}{context_str}{colorama.Style.RESET_ALL}\n'
+            )
+            yield from k8s_realtime_table.get_string()
+        if show_node_info:
+            yield '\n'
+            yield _format_kubernetes_node_info_combined(all_nodes_info)
     def _output() -> Generator[str, None, None]:
         gpu_table = log_utils.create_table(
             ['COMMON_GPU', 'AVAILABLE_QUANTITIES'])
@@ -3568,7 +3596,7 @@ def show_gpus(
                     # If --cloud kubernetes is not specified, we want to catch
                     # the case where no GPUs are available on the cluster and
                     # print the warning at the end.
-                    k8s_realtime_infos, total_table = _get_kubernetes_realtime_gpu_tables(context)  # pylint: disable=line-too-long
+                    k8s_realtime_infos, total_table, all_nodes_info = _get_kubernetes_realtime_gpu_tables(context)  # pylint: disable=line-too-long
                 except ValueError as e:
                     if not cloud_is_kubernetes:
                         # Make it a note if cloud is not kubernetes
@@ -3577,27 +3605,12 @@ def show_gpus(
                 else:
                     print_section_titles = True
-                    # print total table
-                    if total_table is not None:
-                        yield (f'{colorama.Fore.GREEN}{colorama.Style.BRIGHT}'
-                               'Total Kubernetes GPUs'
-                               f'{colorama.Style.RESET_ALL}\n')
-                        yield from total_table.get_string()
-                        yield '\n\n'
-                    # print individual infos.
-                    for (idx,
-                         (ctx,
-                          k8s_realtime_table)) in enumerate(k8s_realtime_infos):
-                        context_str = f'(Context: {ctx})' if ctx else ''
-                        yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
-                               f'Kubernetes GPUs {context_str}'
-                               f'{colorama.Style.RESET_ALL}\n')
-                        yield from k8s_realtime_table.get_string()
-                        yield '\n\n'
-                        yield _format_kubernetes_node_info(ctx)
-                        if idx != len(k8s_realtime_infos) - 1:
-                            yield '\n\n'
+                    yield from _format_kubernetes_realtime_gpu(
+                        total_table,
+                        k8s_realtime_infos,
+                        all_nodes_info,
+                        show_node_info=True)
                 if kubernetes_autoscaling:
                     k8s_messages += (
                         '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
@@ -3688,31 +3701,18 @@ def show_gpus(
             print_section_titles = True
             # TODO(romilb): Show filtered per node GPU availability here as well
             try:
-                k8s_realtime_infos, total_table = _get_kubernetes_realtime_gpu_tables(  # pylint: disable=line-too-long
-                    context=region,
-                    name_filter=name,
-                    quantity_filter=quantity)
-                # print total table
-                if total_table is not None:
-                    yield (f'{colorama.Fore.GREEN}{colorama.Style.BRIGHT}'
-                           'Total Kubernetes GPUs'
-                           f'{colorama.Style.RESET_ALL}\n')
-                    yield from total_table.get_string()
-                    yield '\n\n'
-                # print individual tables
-                for (ctx, k8s_realtime_table) in k8s_realtime_infos:
-                    context_str = f'(Context: {ctx})' if ctx else ''
-                    yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
-                           f'Kubernetes GPUs {context_str}'
-                           f'{colorama.Style.RESET_ALL}\n')
-                    yield from k8s_realtime_table.get_string()
-                    yield '\n\n'
+                (k8s_realtime_infos, total_table,
+                 all_nodes_info) = _get_kubernetes_realtime_gpu_tables(
+                     context=region, name_filter=name, quantity_filter=quantity)
+                yield from _format_kubernetes_realtime_gpu(total_table,
+                                                           k8s_realtime_infos,
+                                                           all_nodes_info,
+                                                           show_node_info=False)
             except ValueError as e:
                 # In the case of a specific accelerator, show the error message
                 # immediately (e.g., "Resources H100 not found ...")
-                yield str(e)
+                yield common_utils.format_exception(e, use_bracket=True)
             if kubernetes_autoscaling:
                 k8s_messages += ('\n' +
                                  kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)

skypilot-nightly 1.0.0.dev20250510__py3-none-any.whl → 1.0.0.dev20250513__py3-none-any.whl

skypilot-nightly 1.0.0.dev20250510py3-none-any.whl → 1.0.0.dev20250513py3-none-any.whl