PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250218__py3-none-any.whl → 1.0.0.dev20250220__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250218py3-none-any.whl → 1.0.0.dev20250220py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

sky/__init__.py +2 -2
sky/cli.py +15 -24
sky/client/cli.py +15 -24
sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +2 -2
sky/jobs/server/core.py +22 -17
sky/jobs/server/dashboard_utils.py +6 -1
sky/jobs/server/server.py +8 -10
sky/serve/server/core.py +10 -7
sky/serve/server/server.py +6 -11
sky/server/common.py +5 -27
sky/server/requests/executor.py +94 -87
sky/server/server.py +10 -5
sky/server/stream_utils.py +8 -11
sky/utils/common.py +23 -43
sky/utils/common_utils.py +38 -0
sky/utils/controller_utils.py +7 -6
{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/METADATA +1 -1
{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/RECORD +22 -22
{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/LICENSE +0 -0
{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/WHEEL +0 -0
{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/entry_points.txt +0 -0
{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/top_level.txt +0 -0

sky/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Optional
 import urllib.request
 # Replaced with the current commit when building the wheels.
-_SKYPILOT_COMMIT_SHA = '912b8293b3ebeba84941c108dbede1e6dcbc9b6f'
+_SKYPILOT_COMMIT_SHA = '6b2b31d8358f3ff8394a7a33ec49e9985ada230f'
 def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
 __commit__ = _get_git_commit()
-__version__ = '1.0.0.dev20250218'
+__version__ = '1.0.0.dev20250220'
 __root_dir__ = os.path.dirname(os.path.abspath(__file__))

sky/cli.py CHANGED Viewed

@@ -1419,16 +1419,16 @@ def _handle_jobs_queue_request(
         try:
             # Check the controller status again, as the RuntimeError is likely
             # due to the controller being autostopped when querying the jobs.
-            controller_type = controller_utils.Controllers.JOBS_CONTROLLER
-            # Query status of the controller cluster. We add a wildcard because
-            # the controller cluster name can have a suffix like
-            # '-remote-<hash>' when using remote API server.
+            # Since we are client-side, we may not know the exact name of the
+            # controller, so use the prefix with a wildcard.
+            # Query status of the controller cluster.
             records = sdk.get(
-                sdk.status(
-                    cluster_names=[controller_type.value.cluster_name + '*']))
+                sdk.status(cluster_names=[common.JOB_CONTROLLER_PREFIX + '*'],
+                           all_users=True))
             if (not records or
                     records[0]['status'] == status_lib.ClusterStatus.STOPPED):
-                msg = controller_type.value.default_hint_if_non_existent
+                controller = controller_utils.Controllers.JOBS_CONTROLLER.value
+                msg = controller.default_hint_if_non_existent
         except Exception:  # pylint: disable=broad-except
             # This is to an best effort to find the latest controller status to
             # print more helpful message, so we can ignore any exception to
@@ -1494,16 +1494,18 @@ def _handle_services_request(
             # Check the controller status again, as the RuntimeError is likely
             # due to the controller being autostopped when querying the
             # services.
-            controller_type = controller_utils.Controllers.SKY_SERVE_CONTROLLER
-            # Query status of the controller cluster. We add a wildcard because
-            # the controller cluster name can have a suffix like
-            # '-remote-<hash>' when using remote API server.
+            # Since we are client-side, we may not know the exact name of the
+            # controller, so use the prefix with a wildcard.
+            # Query status of the controller cluster.
             records = sdk.get(
                 sdk.status(
-                    cluster_names=[controller_type.value.cluster_name + '*']))
+                    cluster_names=[common.SKY_SERVE_CONTROLLER_PREFIX + '*'],
+                    all_users=True))
             if (not records or
                     records[0]['status'] == status_lib.ClusterStatus.STOPPED):
-                msg = controller_type.value.default_hint_if_non_existent
+                controller = (
+                    controller_utils.Controllers.SKY_SERVE_CONTROLLER.value)
+                msg = controller.default_hint_if_non_existent
         except Exception:  # pylint: disable=broad-except
             # This is to an best effort to find the latest controller status to
             # print more helpful message, so we can ignore any exception to
@@ -2804,11 +2806,6 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str,
             to be torn down (e.g., because it has jobs running or
             it is in init state)
     """
-    if not common.is_current_user_controller(controller_name):
-        with ux_utils.print_exception_no_traceback():
-            raise exceptions.NotSupportedError(
-                f'Tearing down other user\'s managed job controller '
-                f'{controller_name!r} is not allowed.')
     controller = controller_utils.Controllers.from_name(controller_name)
     assert controller is not None, controller_name
@@ -2868,12 +2865,6 @@ def _hint_or_raise_for_down_sky_serve_controller(controller_name: str,
             to be torn down (e.g., because it has services running or
             it is in init state)
     """
-    # TODO(zhwu): Move this check to the sdk or even API server side.
-    if not common.is_current_user_controller(controller_name):
-        with ux_utils.print_exception_no_traceback():
-            raise exceptions.NotSupportedError(
-                f'Tearing down other user\'s sky serve controller '
-                f'{controller_name!r} is not allowed.')
     controller = controller_utils.Controllers.from_name(controller_name)
     assert controller is not None, controller_name
     with rich_utils.client_status('[bold cyan]Checking for live services[/]'):

sky/client/cli.py CHANGED Viewed

@@ -1419,16 +1419,16 @@ def _handle_jobs_queue_request(
         try:
             # Check the controller status again, as the RuntimeError is likely
             # due to the controller being autostopped when querying the jobs.
-            controller_type = controller_utils.Controllers.JOBS_CONTROLLER
-            # Query status of the controller cluster. We add a wildcard because
-            # the controller cluster name can have a suffix like
-            # '-remote-<hash>' when using remote API server.
+            # Since we are client-side, we may not know the exact name of the
+            # controller, so use the prefix with a wildcard.
+            # Query status of the controller cluster.
             records = sdk.get(
-                sdk.status(
-                    cluster_names=[controller_type.value.cluster_name + '*']))
+                sdk.status(cluster_names=[common.JOB_CONTROLLER_PREFIX + '*'],
+                           all_users=True))
             if (not records or
                     records[0]['status'] == status_lib.ClusterStatus.STOPPED):
-                msg = controller_type.value.default_hint_if_non_existent
+                controller = controller_utils.Controllers.JOBS_CONTROLLER.value
+                msg = controller.default_hint_if_non_existent
         except Exception:  # pylint: disable=broad-except
             # This is to an best effort to find the latest controller status to
             # print more helpful message, so we can ignore any exception to
@@ -1494,16 +1494,18 @@ def _handle_services_request(
             # Check the controller status again, as the RuntimeError is likely
             # due to the controller being autostopped when querying the
             # services.
-            controller_type = controller_utils.Controllers.SKY_SERVE_CONTROLLER
-            # Query status of the controller cluster. We add a wildcard because
-            # the controller cluster name can have a suffix like
-            # '-remote-<hash>' when using remote API server.
+            # Since we are client-side, we may not know the exact name of the
+            # controller, so use the prefix with a wildcard.
+            # Query status of the controller cluster.
             records = sdk.get(
                 sdk.status(
-                    cluster_names=[controller_type.value.cluster_name + '*']))
+                    cluster_names=[common.SKY_SERVE_CONTROLLER_PREFIX + '*'],
+                    all_users=True))
             if (not records or
                     records[0]['status'] == status_lib.ClusterStatus.STOPPED):
-                msg = controller_type.value.default_hint_if_non_existent
+                controller = (
+                    controller_utils.Controllers.SKY_SERVE_CONTROLLER.value)
+                msg = controller.default_hint_if_non_existent
         except Exception:  # pylint: disable=broad-except
             # This is to an best effort to find the latest controller status to
             # print more helpful message, so we can ignore any exception to
@@ -2804,11 +2806,6 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str,
             to be torn down (e.g., because it has jobs running or
             it is in init state)
     """
-    if not common.is_current_user_controller(controller_name):
-        with ux_utils.print_exception_no_traceback():
-            raise exceptions.NotSupportedError(
-                f'Tearing down other user\'s managed job controller '
-                f'{controller_name!r} is not allowed.')
     controller = controller_utils.Controllers.from_name(controller_name)
     assert controller is not None, controller_name
@@ -2868,12 +2865,6 @@ def _hint_or_raise_for_down_sky_serve_controller(controller_name: str,
             to be torn down (e.g., because it has services running or
             it is in init state)
     """
-    # TODO(zhwu): Move this check to the sdk or even API server side.
-    if not common.is_current_user_controller(controller_name):
-        with ux_utils.print_exception_no_traceback():
-            raise exceptions.NotSupportedError(
-                f'Tearing down other user\'s sky serve controller '
-                f'{controller_name!r} is not allowed.')
     controller = controller_utils.Controllers.from_name(controller_name)
     assert controller is not None, controller_name
     with rich_utils.client_status('[bold cyan]Checking for live services[/]'):

sky/clouds/service_catalog/data_fetchers/fetch_gcp.py CHANGED Viewed

@@ -60,8 +60,8 @@ HIDDEN_TPU_DF = pd.read_csv(
  ,tpu-v3-2048,1,,,tpu-v3-2048,2048.0,614.4,us-east1,us-east1-d
  """)))
-# TPU V6e price for us-central2 is missing in the SKUs.
-TPU_V6E_MISSING_REGIONS = ['us-central2']
+# TPU V6e price for the following regions is missing in the SKUs.
+TPU_V6E_MISSING_REGIONS = ['us-central2', 'southamerica-west1']
 # TPU V5 is not visible in specific zones. We hardcode the missing zones here.
 # NOTE(dev): Keep the zones and the df in sync.

sky/jobs/server/core.py CHANGED Viewed

@@ -21,10 +21,11 @@ from sky.backends import backend_utils
 from sky.clouds.service_catalog import common as service_catalog_common
 from sky.jobs import constants as managed_job_constants
 from sky.jobs import utils as managed_job_utils
-from sky.provision import common
+from sky.provision import common as provision_common
 from sky.skylet import constants as skylet_constants
 from sky.usage import usage_lib
 from sky.utils import admin_policy_utils
+from sky.utils import common
 from sky.utils import common_utils
 from sky.utils import controller_utils
 from sky.utils import dag_utils
@@ -149,14 +150,18 @@ def launch(
             f'{colorama.Fore.YELLOW}'
             f'Launching managed job {dag.name!r} from jobs controller...'
             f'{colorama.Style.RESET_ALL}')
-        return execution.launch(task=controller_task,
-                                cluster_name=controller_name,
-                                stream_logs=stream_logs,
-                                idle_minutes_to_autostop=skylet_constants.
-                                CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP,
-                                retry_until_up=True,
-                                fast=True,
-                                _disable_controller_check=True)
+        # Launch with the api server's user hash, so that sky status does not
+        # show the owner of the controller as whatever user launched it first.
+        with common.with_server_user_hash():
+            return execution.launch(task=controller_task,
+                                    cluster_name=controller_name,
+                                    stream_logs=stream_logs,
+                                    idle_minutes_to_autostop=skylet_constants.
+                                    CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP,
+                                    retry_until_up=True,
+                                    fast=True,
+                                    _disable_controller_check=True)
 def queue_from_kubernetes_pod(
@@ -194,16 +199,16 @@ def queue_from_kubernetes_pod(
     provider_config = {'context': context}
     instances = {
         pod_name: [
-            common.InstanceInfo(instance_id=pod_name,
-                                internal_ip='',
-                                external_ip='',
-                                tags={})
+            provision_common.InstanceInfo(instance_id=pod_name,
+                                          internal_ip='',
+                                          external_ip='',
+                                          tags={})
         ]
     }  # Internal IP is not required for Kubernetes
-    cluster_info = common.ClusterInfo(provider_name='kubernetes',
-                                      head_instance_id=pod_name,
-                                      provider_config=provider_config,
-                                      instances=instances)
+    cluster_info = provision_common.ClusterInfo(provider_name='kubernetes',
+                                                head_instance_id=pod_name,
+                                                provider_config=provider_config,
+                                                instances=instances)
     managed_jobs_runner = provision_lib.get_command_runners(
         'kubernetes', cluster_info)[0]

sky/jobs/server/dashboard_utils.py CHANGED Viewed

@@ -1,4 +1,9 @@
-"""Persistent dashboard sessions."""
+"""Persistent dashboard sessions.
+Note: before #4717, this was useful because we needed to tunnel to multiple
+controllers - one per user. Now, there is only one controller for the whole API
+server, so this is not very useful. TODO(cooperc): Remove or fix this.
+"""
 import pathlib
 from typing import Tuple

sky/jobs/server/server.py CHANGED Viewed

@@ -21,11 +21,6 @@ logger = sky_logging.init_logger(__name__)
 router = fastapi.APIRouter()
-def _get_controller_name(request_body: payloads.RequestBody) -> str:
-    user_hash = request_body.user_hash
-    return common.get_controller_name(common.ControllerType.JOBS, user_hash)
 @router.post('/launch')
 async def launch(request: fastapi.Request,
                  jobs_launch_body: payloads.JobsLaunchBody) -> None:
@@ -35,7 +30,7 @@ async def launch(request: fastapi.Request,
         request_body=jobs_launch_body,
         func=core.launch,
         schedule_type=api_requests.ScheduleType.LONG,
-        request_cluster_name=_get_controller_name(jobs_launch_body),
+        request_cluster_name=common.JOB_CONTROLLER_NAME,
     )
@@ -49,7 +44,7 @@ async def queue(request: fastapi.Request,
         func=core.queue,
         schedule_type=(api_requests.ScheduleType.LONG if jobs_queue_body.refresh
                        else api_requests.ScheduleType.SHORT),
-        request_cluster_name=_get_controller_name(jobs_queue_body),
+        request_cluster_name=common.JOB_CONTROLLER_NAME,
     )
@@ -62,7 +57,7 @@ async def cancel(request: fastapi.Request,
         request_body=jobs_cancel_body,
         func=core.cancel,
         schedule_type=api_requests.ScheduleType.SHORT,
-        request_cluster_name=_get_controller_name(jobs_cancel_body),
+        request_cluster_name=common.JOB_CONTROLLER_NAME,
     )
@@ -78,7 +73,7 @@ async def logs(
         func=core.tail_logs,
         schedule_type=api_requests.ScheduleType.SHORT
         if jobs_logs_body.refresh else api_requests.ScheduleType.LONG,
-        request_cluster_name=_get_controller_name(jobs_logs_body),
+        request_cluster_name=common.JOB_CONTROLLER_NAME,
     )
     request_task = api_requests.get_request(request.state.request_id)
@@ -107,13 +102,16 @@ async def download_logs(
         func=core.download_logs,
         schedule_type=api_requests.ScheduleType.LONG
         if jobs_download_logs_body.refresh else api_requests.ScheduleType.SHORT,
-        request_cluster_name=_get_controller_name(jobs_download_logs_body),
+        request_cluster_name=common.JOB_CONTROLLER_NAME,
     )
 @router.get('/dashboard')
 async def dashboard(request: fastapi.Request,
                     user_hash: str) -> fastapi.Response:
+    # Note: before #4717, each user had their own controller, and thus their own
+    # dashboard. Now, all users share the same controller, so this isn't really
+    # necessary. TODO(cooperc): clean up.
     # Find the port for the dashboard of the user
     os.environ[constants.USER_ID_ENV_VAR] = user_hash
     server_common.reload_for_new_request(client_entrypoint=None,

sky/serve/server/core.py CHANGED Viewed

@@ -249,13 +249,16 @@ def up(
         # with the current job id, we know the service is up and running
         # for the first time; otherwise it is a name conflict.
         idle_minutes_to_autostop = constants.CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP
-        controller_job_id, controller_handle = execution.launch(
-            task=controller_task,
-            cluster_name=controller_name,
-            idle_minutes_to_autostop=idle_minutes_to_autostop,
-            retry_until_up=True,
-            _disable_controller_check=True,
-        )
+        # Since the controller may be shared among multiple users, launch the
+        # controller with the API server's user hash.
+        with common.with_server_user_hash():
+            controller_job_id, controller_handle = execution.launch(
+                task=controller_task,
+                cluster_name=controller_name,
+                idle_minutes_to_autostop=idle_minutes_to_autostop,
+                retry_until_up=True,
+                _disable_controller_check=True,
+            )
         style = colorama.Style
         fore = colorama.Fore

sky/serve/server/server.py CHANGED Viewed

@@ -14,11 +14,6 @@ logger = sky_logging.init_logger(__name__)
 router = fastapi.APIRouter()
-def _get_controller_name(request_body: payloads.RequestBody) -> str:
-    user_hash = request_body.user_hash
-    return common.get_controller_name(common.ControllerType.SERVE, user_hash)
 @router.post('/up')
 async def up(
     request: fastapi.Request,
@@ -30,7 +25,7 @@ async def up(
         request_body=up_body,
         func=core.up,
         schedule_type=api_requests.ScheduleType.LONG,
-        request_cluster_name=_get_controller_name(up_body),
+        request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
     )
@@ -45,7 +40,7 @@ async def update(
         request_body=update_body,
         func=core.update,
         schedule_type=api_requests.ScheduleType.SHORT,
-        request_cluster_name=_get_controller_name(update_body),
+        request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
     )
@@ -60,7 +55,7 @@ async def down(
         request_body=down_body,
         func=core.down,
         schedule_type=api_requests.ScheduleType.SHORT,
-        request_cluster_name=_get_controller_name(down_body),
+        request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
     )
@@ -75,7 +70,7 @@ async def terminate_replica(
         request_body=terminate_replica_body,
         func=core.terminate_replica,
         schedule_type=api_requests.ScheduleType.SHORT,
-        request_cluster_name=_get_controller_name(terminate_replica_body),
+        request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
     )
@@ -90,7 +85,7 @@ async def status(
         request_body=status_body,
         func=core.status,
         schedule_type=api_requests.ScheduleType.SHORT,
-        request_cluster_name=_get_controller_name(status_body),
+        request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
     )
@@ -105,7 +100,7 @@ async def tail_logs(
         request_body=log_body,
         func=core.tail_logs,
         schedule_type=api_requests.ScheduleType.SHORT,
-        request_cluster_name=_get_controller_name(log_body),
+        request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
     )
     request_task = api_requests.get_request(request.state.request_id)

sky/server/common.py CHANGED Viewed

@@ -3,7 +3,6 @@
 import dataclasses
 import enum
 import functools
-import importlib
 import json
 import os
 import pathlib
@@ -16,7 +15,6 @@ import uuid
 import colorama
 import filelock
-import psutil
 import pydantic
 import requests
@@ -28,14 +26,12 @@ from sky.server import constants as server_constants
 from sky.skylet import constants
 from sky.usage import usage_lib
 from sky.utils import annotations
-from sky.utils import common
 from sky.utils import common_utils
 from sky.utils import rich_utils
 from sky.utils import ux_utils
 if typing.TYPE_CHECKING:
     from sky import dag as dag_lib
-    from sky.server.requests import payloads
 DEFAULT_SERVER_URL = 'http://127.0.0.1:46580'
 AVAILBLE_LOCAL_API_SERVER_HOSTS = ['0.0.0.0', 'localhost', '127.0.0.1']
@@ -149,13 +145,14 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
     return ApiServerInfo(status=ApiServerStatus.UNHEALTHY, api_version=None)
-def start_uvicorn_in_background(deploy: bool = False, host: str = '127.0.0.1'):
+def start_api_server_in_background(deploy: bool = False,
+                                   host: str = '127.0.0.1'):
     if not is_api_server_local():
         raise RuntimeError(
             f'Cannot start API server: {get_server_url()} is not a local URL')
     # Check available memory before starting the server.
-    avail_mem_size_gb: float = psutil.virtual_memory().available / (1024**3)
+    avail_mem_size_gb: float = common_utils.get_mem_size_gb()
     if avail_mem_size_gb <= server_constants.MIN_AVAIL_MEM_GB:
         logger.warning(
             f'{colorama.Fore.YELLOW}Your SkyPilot API server machine only has '
@@ -166,8 +163,6 @@ def start_uvicorn_in_background(deploy: bool = False, host: str = '127.0.0.1'):
     log_path = os.path.expanduser(constants.API_SERVER_LOGS)
     os.makedirs(os.path.dirname(log_path), exist_ok=True)
-    # The command to run uvicorn. Adjust the app:app to your application's
-    # location.
     api_server_cmd = API_SERVER_CMD
     if deploy:
         api_server_cmd += ' --deploy'
@@ -175,7 +170,7 @@ def start_uvicorn_in_background(deploy: bool = False, host: str = '127.0.0.1'):
         api_server_cmd += f' --host {host}'
     cmd = f'{sys.executable} {api_server_cmd} > {log_path} 2>&1'
-    # Start the uvicorn process in the background and don't wait for it.
+    # Start the API server process in the background and don't wait for it.
     # If this is called from a CLI invocation, we need start_new_session=True so
     # that SIGINT on the CLI will not also kill the API server.
     subprocess.Popen(cmd, shell=True, start_new_session=True)
@@ -235,7 +230,7 @@ def _start_api_server(deploy: bool = False, host: str = '127.0.0.1'):
                     f'SkyPilot API server at {server_url}. '
                     'Starting a local server.'
                     f'{colorama.Style.RESET_ALL}')
-        start_uvicorn_in_background(deploy=deploy, host=host)
+        start_api_server_in_background(deploy=deploy, host=host)
         logger.info(ux_utils.finishing_message('SkyPilot API server started.'))
@@ -407,23 +402,6 @@ def request_body_to_params(body: pydantic.BaseModel) -> Dict[str, Any]:
 def reload_for_new_request(client_entrypoint: Optional[str],
                            client_command: Optional[str]):
     """Reload modules, global variables, and usage message for a new request."""
-    # When a user request is sent to api server, it changes the user hash in the
-    # env vars, but since controller_utils is imported before the env vars are
-    # set, it doesn't get updated. So we need to reload it here.
-    # pylint: disable=import-outside-toplevel
-    from sky.utils import controller_utils
-    common.SKY_SERVE_CONTROLLER_NAME = common.get_controller_name(
-        common.ControllerType.SERVE)
-    common.JOB_CONTROLLER_NAME = common.get_controller_name(
-        common.ControllerType.JOBS)
-    # TODO(zhwu): We should avoid reloading the controller_utils module.
-    # Instead, we should reload required cache or global variables.
-    # TODO(zhwu): Reloading the controller_utils module may cause the global
-    # variables in other modules referring the `controller_utils.Controllers`
-    # dangling, as they will be pointing to the old object. We should not use
-    # it in global variables.
-    importlib.reload(controller_utils)
     # Reset the client entrypoint and command for the usage message.
     common_utils.set_client_entrypoint_and_command(
         client_entrypoint=client_entrypoint,

sky/server/requests/executor.py CHANGED Viewed

@@ -32,7 +32,6 @@ import traceback
 import typing
 from typing import Any, Callable, Generator, List, Optional, TextIO, Tuple
-import psutil
 import setproctitle
 from sky import global_user_state
@@ -70,18 +69,36 @@ logger = sky_logging.init_logger(__name__)
 # platforms, including macOS.
 multiprocessing.set_start_method('spawn', force=True)
-# Constants based on profiling the peak memory usage of
-# various sky commands. See `tests/load_test/` for details.
-# Max memory consumption for each request.
-_PER_BLOCKING_REQUEST_MEM_GB = 0.25
-_PER_NON_BLOCKING_REQUEST_MEM_GB = 0.15
-# To control the number of blocking workers.
-_CPU_MULTIPLIER_FOR_BLOCKING_WORKERS = 2
-_MAX_BLOCKING_WORKERS_LOCAL = 4
-# Percentage of memory for blocking requests
+# Constants based on profiling the peak memory usage while serving various
+# sky commands. These estimation are highly related to usage patterns
+# (clouds enabled, type of requests, etc. see `tests/load_tests` for details.),
+# the profiling covers major clouds and common usage patterns. For user has
+# deviated usage pattern, they can override the default estimation by
+# environment variables.
+# NOTE(dev): update these constants for each release according to the load
+# test results.
+# TODO(aylei): maintaining these constants is error-prone, we may need to
+# automatically tune parallelism at runtime according to system usage stats
+# in the future.
+_LONG_WORKER_MEM_GB = 0.4
+_SHORT_WORKER_MEM_GB = 0.25
+# To control the number of long workers.
+_CPU_MULTIPLIER_FOR_LONG_WORKERS = 2
+# Limit the number of long workers of local API server, since local server is
+# typically:
+# 1. launched automatically in an environment with high resource contention
+#    (e.g. Laptop)
+# 2. used by a single user
+_MAX_LONG_WORKERS_LOCAL = 4
+# Percentage of memory for long requests
 # from the memory reserved for SkyPilot.
-# This is to reserve some memory for non-blocking requests.
+# This is to reserve some memory for short requests.
 _MAX_MEM_PERCENT_FOR_BLOCKING = 0.6
+# Minimal number of long workers to ensure responsiveness.
+_MIN_LONG_WORKERS = 1
+# Minimal number of short workers, there is a daemon task running on short
+# workers so at least 2 workers are needed to ensure responsiveness.
+_MIN_SHORT_WORKERS = 2
 class QueueBackend(enum.Enum):
@@ -301,34 +318,32 @@ def schedule_request(request_id: str,
     _get_queue(schedule_type).put(input_tuple)
+def executor_initializer(proc_group: str):
+    setproctitle.setproctitle(f'SkyPilot:executor:{proc_group}:'
+                              f'{multiprocessing.current_process().pid}')
 def request_worker(worker: RequestWorker, max_parallel_size: int) -> None:
     """Worker for the requests.
     Args:
         max_parallel_size: Maximum number of parallel jobs this worker can run.
     """
-    logger.info(f'Starting {worker} with pid '
-                f'{multiprocessing.current_process().pid}')
-    setproctitle.setproctitle(
-        f'SkyPilot:worker:{worker.schedule_type.value}-{worker.id}')
+    proc_group = f'{worker.schedule_type.value}-{worker.id}'
+    setproctitle.setproctitle(f'SkyPilot:worker:{proc_group}')
     queue = _get_queue(worker.schedule_type)
-    # Use concurrent.futures.ProcessPoolExecutor instead of multiprocessing.Pool
-    # because the former is more efficient with the support of lazy creation of
-    # worker processes.
-    # We use executor instead of individual multiprocessing.Process to avoid
-    # the overhead of forking a new process for each request, which can be about
-    # 1s delay.
-    with concurrent.futures.ProcessPoolExecutor(
-            max_workers=max_parallel_size) as executor:
-        while True:
+    def process_request(executor: concurrent.futures.ProcessPoolExecutor):
+        try:
             request_element = queue.get()
             if request_element is None:
                 time.sleep(0.1)
-                continue
+                return
             request_id, ignore_return_value = request_element
             request = api_requests.get_request(request_id)
+            assert request is not None, f'Request with ID {request_id} is None'
             if request.status == api_requests.RequestStatus.CANCELLED:
-                continue
+                return
             logger.info(f'[{worker}] Submitting request: {request_id}')
             # Start additional process to run the request, so that it can be
             # cancelled when requested by a user.
@@ -347,60 +362,49 @@ def request_worker(worker: RequestWorker, max_parallel_size: int) -> None:
                 logger.info(f'[{worker}] Finished request: {request_id}')
             else:
                 logger.info(f'[{worker}] Submitted request: {request_id}')
+        except KeyboardInterrupt:
+            # Interrupt the worker process will stop request execution, but
+            # the SIGTERM request should be respected anyway since it might
+            # be explicitly sent by user.
+            # TODO(aylei): crash the API server or recreate the worker process
+            # to avoid broken state.
+            logger.error(f'[{worker}] Worker process interrupted')
+            raise
+        except (Exception, SystemExit) as e:  # pylint: disable=broad-except
+            # Catch any other exceptions to avoid crashing the worker process.
+            logger.error(
+                f'[{worker}] Error processing request {request_id}: '
+                f'{common_utils.format_exception(e, use_bracket=True)}')
-def _get_cpu_count() -> int:
-    """Get the number of CPUs.
-    If the API server is deployed as a pod in k8s cluster, we assume the
-    number of CPUs is provided by the downward API.
-    """
-    cpu_count = os.getenv('SKYPILOT_POD_CPU_CORE_LIMIT')
-    if cpu_count is not None:
-        try:
-            return int(float(cpu_count))
-        except ValueError as e:
-            with ux_utils.print_exception_no_traceback():
-                raise ValueError(
-                    f'Failed to parse the number of CPUs from {cpu_count}'
-                ) from e
-    return psutil.cpu_count()
-def _get_mem_size_gb() -> float:
-    """Get the memory size in GB.
-    If the API server is deployed as a pod in k8s cluster, we assume the
-    memory size is provided by the downward API.
-    """
-    mem_size = os.getenv('SKYPILOT_POD_MEMORY_GB_LIMIT')
-    if mem_size is not None:
-        try:
-            return float(mem_size)
-        except ValueError as e:
-            with ux_utils.print_exception_no_traceback():
-                raise ValueError(
-                    f'Failed to parse the memory size from {mem_size}') from e
-    return psutil.virtual_memory().total / (1024**3)
+    # Use concurrent.futures.ProcessPoolExecutor instead of multiprocessing.Pool
+    # because the former is more efficient with the support of lazy creation of
+    # worker processes.
+    # We use executor instead of individual multiprocessing.Process to avoid
+    # the overhead of forking a new process for each request, which can be about
+    # 1s delay.
+    with concurrent.futures.ProcessPoolExecutor(
+            max_workers=max_parallel_size,
+            initializer=executor_initializer,
+            initargs=(proc_group,)) as executor:
+        while True:
+            process_request(executor)
 def start(deploy: bool) -> List[multiprocessing.Process]:
     """Start the request workers."""
     # Determine the job capacity of the workers based on the system resources.
-    cpu_count = _get_cpu_count()
-    mem_size_gb = _get_mem_size_gb()
+    cpu_count = common_utils.get_cpu_count()
+    mem_size_gb = common_utils.get_mem_size_gb()
     mem_size_gb = max(0, mem_size_gb - server_constants.MIN_AVAIL_MEM_GB)
-    parallel_for_blocking = _max_parallel_size_for_blocking(
-        cpu_count, mem_size_gb)
-    if not deploy:
-        parallel_for_blocking = min(parallel_for_blocking,
-                                    _MAX_BLOCKING_WORKERS_LOCAL)
-    max_parallel_for_non_blocking = _max_parallel_size_for_non_blocking(
-        mem_size_gb, parallel_for_blocking)
+    max_parallel_for_long = _max_long_worker_parallism(cpu_count,
+                                                       mem_size_gb,
+                                                       local=not deploy)
+    max_parallel_for_short = _max_short_worker_parallism(
+        mem_size_gb, max_parallel_for_long)
     logger.info(
-        f'SkyPilot API server will start {parallel_for_blocking} workers for '
-        f'blocking requests and will allow at max '
-        f'{max_parallel_for_non_blocking} non-blocking requests in parallel.')
+        f'SkyPilot API server will start {max_parallel_for_long} workers for '
+        f'long requests and will allow at max '
+        f'{max_parallel_for_short} short requests in parallel.')
     # Setup the queues.
     if queue_backend == QueueBackend.MULTIPROCESSING:
@@ -424,7 +428,7 @@ def start(deploy: bool) -> List[multiprocessing.Process]:
     logger.info('Request queues created')
     worker_procs = []
-    for worker_id in range(parallel_for_blocking):
+    for worker_id in range(max_parallel_for_long):
         worker = RequestWorker(id=worker_id,
                                schedule_type=api_requests.ScheduleType.LONG)
         worker_proc = multiprocessing.Process(target=request_worker,
@@ -432,31 +436,34 @@ def start(deploy: bool) -> List[multiprocessing.Process]:
         worker_proc.start()
         worker_procs.append(worker_proc)
-    # Start a non-blocking worker.
+    # Start a worker for short requests.
     worker = RequestWorker(id=1, schedule_type=api_requests.ScheduleType.SHORT)
     worker_proc = multiprocessing.Process(target=request_worker,
-                                          args=(worker,
-                                                max_parallel_for_non_blocking))
+                                          args=(worker, max_parallel_for_short))
     worker_proc.start()
     worker_procs.append(worker_proc)
     return worker_procs
 @annotations.lru_cache(scope='global', maxsize=1)
-def _max_parallel_size_for_blocking(cpu_count: int, mem_size_gb: float) -> int:
-    """Max parallelism for blocking requests."""
-    cpu_based_max_parallel = cpu_count * _CPU_MULTIPLIER_FOR_BLOCKING_WORKERS
+def _max_long_worker_parallism(cpu_count: int,
+                               mem_size_gb: float,
+                               local=False) -> int:
+    """Max parallelism for long workers."""
+    cpu_based_max_parallel = cpu_count * _CPU_MULTIPLIER_FOR_LONG_WORKERS
     mem_based_max_parallel = int(mem_size_gb * _MAX_MEM_PERCENT_FOR_BLOCKING /
-                                 _PER_BLOCKING_REQUEST_MEM_GB)
-    n = max(1, min(cpu_based_max_parallel, mem_based_max_parallel))
+                                 _LONG_WORKER_MEM_GB)
+    n = max(_MIN_LONG_WORKERS,
+            min(cpu_based_max_parallel, mem_based_max_parallel))
+    if local:
+        return min(n, _MAX_LONG_WORKERS_LOCAL)
     return n
 @annotations.lru_cache(scope='global', maxsize=1)
-def _max_parallel_size_for_non_blocking(mem_size_gb: float,
-                                        parallel_size_for_blocking: int) -> int:
-    """Max parallelism for non-blocking requests."""
-    available_mem = mem_size_gb - (parallel_size_for_blocking *
-                                   _PER_BLOCKING_REQUEST_MEM_GB)
-    n = max(1, int(available_mem / _PER_NON_BLOCKING_REQUEST_MEM_GB))
+def _max_short_worker_parallism(mem_size_gb: float,
+                                long_worker_parallism: int) -> int:
+    """Max parallelism for short workers."""
+    available_mem = mem_size_gb - (long_worker_parallism * _LONG_WORKER_MEM_GB)
+    n = max(_MIN_SHORT_WORKERS, int(available_mem / _SHORT_WORKER_MEM_GB))
     return n

sky/server/server.py CHANGED Viewed

@@ -57,7 +57,9 @@ P = ParamSpec('P')
 def _add_timestamp_prefix_for_server_logs() -> None:
     server_logger = sky_logging.init_logger('sky.server')
-    # Disable propagation to avoid the root logger of SkyPilot being affected.
+    # Clear existing handlers first to prevent duplicates
+    server_logger.handlers.clear()
+    # Disable propagation to avoid the root logger of SkyPilot being affected
     server_logger.propagate = False
     # Add date prefix to the log message printed by loggers under
     # server.
@@ -460,6 +462,7 @@ async def launch(launch_body: payloads.LaunchBody,
                  request: fastapi.Request) -> None:
     """Launches a cluster or task."""
     request_id = request.state.request_id
+    logger.info(f'Launching request: {request_id}')
     executor.schedule_request(
         request_id,
         request_name='launch',
@@ -627,6 +630,9 @@ async def logs(
         request_name='logs',
         request_body=cluster_job_body,
         func=core.tail_logs,
+        # TODO(aylei): We have tail logs scheduled as SHORT request, because it
+        # should be responsive. However, it can be long running if the user's
+        # job keeps running, and we should avoid it taking the SHORT worker.
         schedule_type=requests_lib.ScheduleType.SHORT,
         request_cluster_name=cluster_job_body.cluster_name,
     )
@@ -794,10 +800,9 @@ async def api_get(request_id: str) -> requests_lib.RequestPayload:
                                             detail=dataclasses.asdict(
                                                 request_task.encode()))
             return request_task.encode()
-        # Sleep 0 to yield, so other coroutines can run. This busy waiting
-        # loop is performance critical for short-running requests, so we do
-        # not want to yield too long.
-        await asyncio.sleep(0)
+        # yield control to allow other coroutines to run, sleep shortly
+        # to avoid storming the DB and CPU in the meantime
+        await asyncio.sleep(0.1)
 @app.get('/api/stream')

sky/server/stream_utils.py CHANGED Viewed

@@ -68,7 +68,7 @@ async def log_streamer(request_id: Optional[str],
             # Sleep 0 to yield, so other coroutines can run. This busy waiting
             # loop is performance critical for short-running requests, so we do
             # not want to yield too long.
-            await asyncio.sleep(0)
+            await asyncio.sleep(0.1)
             request_task = requests_lib.get_request(request_id)
             if not follow:
                 break
@@ -88,6 +88,9 @@ async def log_streamer(request_id: Optional[str],
                 yield line_str
         while True:
+            # Sleep 0 to yield control to allow other coroutines to run,
+            # while keeps the loop tight to make log stream responsive.
+            await asyncio.sleep(0)
             line: Optional[bytes] = await f.readline()
             if not line:
                 if request_id is not None:
@@ -100,24 +103,18 @@ async def log_streamer(request_id: Optional[str],
                         break
                 if not follow:
                     break
-                # Sleep 0 to yield, so other coroutines can run. This busy
-                # waiting loop is performance critical for short-running
-                # requests, so we do not want to yield too long.
-                await asyncio.sleep(0)
+                # Sleep shortly to avoid storming the DB and CPU, this has
+                # little impact on the responsivness here since we are waiting
+                # for a new line to come in.
+                await asyncio.sleep(0.1)
                 continue
             line_str = line.decode('utf-8')
             if plain_logs:
                 is_payload, line_str = message_utils.decode_payload(
                     line_str, raise_for_mismatch=False)
                 if is_payload:
-                    # Sleep 0 to yield, so other coroutines can run. This busy
-                    # waiting loop is performance critical for short-running
-                    # requests, so we do not want to yield too long.
-                    await asyncio.sleep(0)
                     continue
             yield line_str
-            await asyncio.sleep(0)  # Allow other tasks to run
 def stream_response(

sky/utils/common.py CHANGED Viewed

@@ -1,53 +1,41 @@
 """Common enumerators and classes."""
+import contextlib
 import enum
-from typing import Optional
+import os
+from typing import Generator
+from sky.skylet import constants
 from sky.utils import common_utils
 SKY_SERVE_CONTROLLER_PREFIX: str = 'sky-serve-controller-'
 JOB_CONTROLLER_PREFIX: str = 'sky-jobs-controller-'
-SERVER_ID_CONNECTOR: str = '-remote-'
-# We use the user hash (machine-specific) hash of the server to determine if a
-# SkyPilot API server is started by the same user. It will be the same across
-# the whole lifecycle of the server, including:
+# We use the user hash (machine-specific) for the controller name. It will be
+# the same across the whole lifecycle of the server, including:
 # 1. all requests, because this global variable is set once during server
 #    starts.
 # 2. SkyPilot API server restarts, as long as the `~/.sky` folder is persisted
 #    and the env var set during starting the server is the same.
+# This behavior is the same for the local API server (where SERVER_ID is the
+# same as the normal user hash). This ensures backwards-compatibility with jobs
+# controllers from before #4660.
 SERVER_ID = common_utils.get_user_hash()
+SKY_SERVE_CONTROLLER_NAME: str = f'{SKY_SERVE_CONTROLLER_PREFIX}{SERVER_ID}'
+JOB_CONTROLLER_NAME: str = f'{JOB_CONTROLLER_PREFIX}{SERVER_ID}'
-class ControllerType(enum.Enum):
-    SERVE = 'SERVE'
-    JOBS = 'JOBS'
-def get_controller_name(controller_type: ControllerType,
-                        user_hash: Optional[str] = None) -> str:
-    prefix = JOB_CONTROLLER_PREFIX
-    if controller_type == ControllerType.SERVE:
-        prefix = SKY_SERVE_CONTROLLER_PREFIX
-    if user_hash is None:
-        user_hash = common_utils.get_user_hash()
-    # Comparing the two IDs can determine if the caller is trying to get the
-    # controller created by their local API server or a remote API server.
-    if user_hash == SERVER_ID:
-        # Not adding server ID for locally created controller because
-        # of backward compatibility.
-        return f'{prefix}{user_hash}'
-    return f'{prefix}{user_hash}{SERVER_ID_CONNECTOR}{SERVER_ID}'
-# Controller names differ per user and per SkyPilot API server.
-# If local: <prefix>-<user_id>
-# If remote: <prefix>-<user_id>-remote-<api_server_user_id>
-# DO NOT use these variables on the client side because client side doesn't know
-# the remote server's user id, so client side will get local-version controller
-# name.
-# TODO(SKY-1106): remove dynamic constants like this.
-SKY_SERVE_CONTROLLER_NAME: str = get_controller_name(ControllerType.SERVE)
-JOB_CONTROLLER_NAME: str = get_controller_name(ControllerType.JOBS)
+@contextlib.contextmanager
+def with_server_user_hash() -> Generator[None, None, None]:
+    """Temporarily set the user hash to common.SERVER_ID."""
+    old_env_user_hash = os.getenv(constants.USER_ID_ENV_VAR)
+    os.environ[constants.USER_ID_ENV_VAR] = SERVER_ID
+    try:
+        yield
+    finally:
+        if old_env_user_hash is not None:
+            os.environ[constants.USER_ID_ENV_VAR] = old_env_user_hash
+        else:
+            os.environ.pop(constants.USER_ID_ENV_VAR)
 class StatusRefreshMode(enum.Enum):
@@ -64,11 +52,3 @@ class StatusRefreshMode(enum.Enum):
 class OptimizeTarget(enum.Enum):
     COST = 0
     TIME = 1
-def is_current_user_controller(controller_name: str) -> bool:
-    """If the controller name belongs to the current user."""
-    if SERVER_ID_CONNECTOR in controller_name:
-        controller_name = controller_name.split(SERVER_ID_CONNECTOR)[0]
-    controller_user_id = controller_name.split('-')[-1]
-    return controller_user_id == common_utils.get_user_hash()

sky/utils/common_utils.py CHANGED Viewed

@@ -18,6 +18,7 @@ import uuid
 import jinja2
 import jsonschema
+import psutil
 import yaml
 from sky import exceptions
@@ -755,3 +756,40 @@ def is_port_available(port: int, reuse_addr: bool = True) -> bool:
             return True
         except OSError:
             return False
+# TODO(aylei): should be aware of cgroups
+def get_cpu_count() -> int:
+    """Get the number of CPUs.
+    If the API server is deployed as a pod in k8s cluster, we assume the
+    number of CPUs is provided by the downward API.
+    """
+    cpu_count = os.getenv('SKYPILOT_POD_CPU_CORE_LIMIT')
+    if cpu_count is not None:
+        try:
+            return int(float(cpu_count))
+        except ValueError as e:
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    f'Failed to parse the number of CPUs from {cpu_count}'
+                ) from e
+    return psutil.cpu_count()
+# TODO(aylei): should be aware of cgroups
+def get_mem_size_gb() -> float:
+    """Get the memory size in GB.
+    If the API server is deployed as a pod in k8s cluster, we assume the
+    memory size is provided by the downward API.
+    """
+    mem_size = os.getenv('SKYPILOT_POD_MEMORY_GB_LIMIT')
+    if mem_size is not None:
+        try:
+            return float(mem_size)
+        except ValueError as e:
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    f'Failed to parse the memory size from {mem_size}') from e
+    return psutil.virtual_memory().total / (1024**3)

sky/utils/controller_utils.py CHANGED Viewed

@@ -91,10 +91,6 @@ class Controllers(enum.Enum):
     JOBS_CONTROLLER = _ControllerSpec(
         controller_type='jobs',
         name='managed jobs controller',
-        # Default cluster name is the current user's controller cluster unless
-        # caller initiate with a different controller name.
-        # TODO(zhwu): by having the controller name loaded in common, it
-        # will not respect the latest updated user hash.
         cluster_name=common.JOB_CONTROLLER_NAME,
         in_progress_hint=(
             '* {job_info}To see all managed jobs: '
@@ -164,13 +160,18 @@ class Controllers(enum.Enum):
         if name is None:
             return None
         controller = None
+        # The controller name is always the same. However, on the client-side,
+        # we may not know the exact name, because we are missing the server-side
+        # common.SERVER_ID. So, we will assume anything that matches the prefix
+        # is a controller.
         if name.startswith(common.SKY_SERVE_CONTROLLER_PREFIX):
             controller = cls.SKY_SERVE_CONTROLLER
         elif name.startswith(common.JOB_CONTROLLER_PREFIX):
             controller = cls.JOBS_CONTROLLER
         if controller is not None and name != controller.value.cluster_name:
-            # Input name is not the current user's controller name,
-            # so need to set the controller's cluster name to the input name.
+            # The client-side cluster_name is not accurate. Assume that `name`
+            # is the actual cluster name, so need to set the controller's
+            # cluster name to the input name.
             controller.value.cluster_name = name
         return controller

{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: skypilot-nightly
-Version: 1.0.0.dev20250218
+Version: 1.0.0.dev20250220
 Summary: SkyPilot: An intercloud broker for the clouds
 Author: SkyPilot Team
 License: Apache 2.0

{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-sky/__init__.py,sha256=Pna6srpHox46eHU0RFPPTH60skVAjqhun9lDfN7QwHM,6391
+sky/__init__.py,sha256=2WOLIr_y7h-Dzd_2cUqq56HiHaF6TBVULtoUaAeb-5c,6391
 sky/admin_policy.py,sha256=hPo02f_A32gCqhUueF0QYy1fMSSKqRwYEg_9FxScN_s,3248
 sky/authentication.py,sha256=hCEqi77nprQEg3ktfRL51xiiw16zwZOmFEDB_Z7fWVU,22384
 sky/check.py,sha256=NDKx_Zm7YRxPjMv82wz3ESLnGIPljaACyqVdVNM0PzY,11258
-sky/cli.py,sha256=Z_w8p_qGWdg2-7yWlimrqmaB1Yqy1hXWQkGy08jEee4,218814
+sky/cli.py,sha256=iwYBgEt3tgsYmOIp-ivPmL2FHoalvhH4Ng--C31ubws,218201
 sky/cloud_stores.py,sha256=-95XIqi_ouo7hvoN5mQNP6bGm07MyF6Yk-YP4Txb5wg,24034
 sky/core.py,sha256=gw_TrQOxz28sLAJJq6ajPnlRlrKQ2G1DtqLuntMejFU,45508
 sky/dag.py,sha256=Yl7Ry26Vql5cv4YMz8g9kOUgtoCihJnw7c8NgZYakMY,3242
@@ -42,7 +42,7 @@ sky/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sky/benchmark/benchmark_state.py,sha256=X8CXmuU9KgsDRhKedhFgjeRMUFWtQsjFs1qECvPG2yg,8723
 sky/benchmark/benchmark_utils.py,sha256=o4RymqSceq5mLEZL0upQM6NVEzJJQzj9s9tTm49uUTc,26365
 sky/client/__init__.py,sha256=pz6xvVSd9X-gwqbsDL0E9QOojYqM0KAD0j-NCyCIF1k,38
-sky/client/cli.py,sha256=Z_w8p_qGWdg2-7yWlimrqmaB1Yqy1hXWQkGy08jEee4,218814
+sky/client/cli.py,sha256=iwYBgEt3tgsYmOIp-ivPmL2FHoalvhH4Ng--C31ubws,218201
 sky/client/common.py,sha256=axDic7WOG1e78SdFm5XIwdhX7YNvf3g4k7INrsW3X4s,14611
 sky/client/sdk.py,sha256=q5R0_AquHAiLSLXpha8fIecQ9cgqqFba436xVzJ48oI,66943
 sky/clouds/__init__.py,sha256=taKUCz6gWoKZhqHLYJXX-d0Ux6ZSQZEwxcNFdniupL0,1365
@@ -87,7 +87,7 @@ sky/clouds/service_catalog/data_fetchers/fetch_aws.py,sha256=Zj4bqWPiDcT_ZFyHxQw
 sky/clouds/service_catalog/data_fetchers/fetch_azure.py,sha256=7YVnoGDGGZI2TK02bj_LOoD4E5J5CFl6eqz2XlR4Vy8,12790
 sky/clouds/service_catalog/data_fetchers/fetch_cudo.py,sha256=52P48lvWN0s1ArjeLPeLemPRpxjSRcHincRle0nqdm4,3440
 sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py,sha256=yKuAFbjBRNz_e2RNNDT_aHHAuKQ86Ac7GKgIie5O6Pg,7273
-sky/clouds/service_catalog/data_fetchers/fetch_gcp.py,sha256=4bU0j-mWZCymzUq7uyJfoIDaXXeJg49gUlM9oybBFI0,30903
+sky/clouds/service_catalog/data_fetchers/fetch_gcp.py,sha256=JnugFifzHPQITlbDKoKexE8NqgagOEfQWTxon7P6vJ0,30935
 sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py,sha256=MUzogyLruLQmIt-To6TsfnGPgv_nnlp49XYbeshsd7I,5003
 sky/clouds/service_catalog/data_fetchers/fetch_vast.py,sha256=zR9icM3ty5C8tGw13pQbsBtQQMgG4kl1j_jSGqqrgOA,4741
 sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py,sha256=Opp2r3KSzXPtwk3lKNbO8IX9QzjoRSwy1kW3jPjtS1c,21453
@@ -116,9 +116,9 @@ sky/jobs/dashboard/dashboard.py,sha256=kUKSXMAWAvPwJ_W_JK3wyz65Uope90_rNvhl8rZ1I
 sky/jobs/dashboard/static/favicon.ico,sha256=uYlvgxSM7gjBmXpZ8wydvZUPAbJiiix-rc2Xe5mma9s,15086
 sky/jobs/dashboard/templates/index.html,sha256=tz95q8O2pF7IvfY6yv0rnPyhj4DX8WX4RIVVxqFKV1Y,28519
 sky/jobs/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-sky/jobs/server/core.py,sha256=KUMmObt0rWhuCR50lQYmF6bFSKAjHbb8sw53WBnJzv0,22251
-sky/jobs/server/dashboard_utils.py,sha256=BKafOhnwU_e6LtKLqqmf_CyUtkbFWRwSbdjMwhSBQrM,2086
-sky/jobs/server/server.py,sha256=6W9FUPT-QFfX50Qwu6MBdJ2ScSW994w5jsyM-bHW8lE,7459
+sky/jobs/server/core.py,sha256=zMLSSdNFQkP-RsfzCZ9jIcHNCL0lSvRd7PH3Sie0yPA,22615
+sky/jobs/server/dashboard_utils.py,sha256=2Mbx40W1pQqPEPHsSDbHeaF0j5cgyKy-_A9Owdwp_AQ,2315
+sky/jobs/server/server.py,sha256=s3wULAh4u4drdIz2VA8l0HiXxHWdUzsBDYCstzU0Vxs,7411
 sky/provision/__init__.py,sha256=jiTOawg_wpy0s3Z-SEoOf7r280arLHUZzj-KPh-w7ek,6424
 sky/provision/common.py,sha256=E8AlSUFcn0FYQq1erNmoVfMAdsF9tP2yxfyk-9PLvQU,10286
 sky/provision/constants.py,sha256=oc_XDUkcoLQ_lwDy5yMeMSWviKS0j0s1c0pjlvpNeWY,800
@@ -218,16 +218,16 @@ sky/serve/service_spec.py,sha256=Q0qnFRjNnfGIpksubH5VqPKIlvpWs5had_Ma_PSHyo8,169
 sky/serve/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sky/serve/client/sdk.py,sha256=fVYQfvNuJxa8aZiS7LJoXFeGcjRidko0Tph5b6m0yMQ,11539
 sky/serve/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sky/serve/server/core.py,sha256=cide83JrRMl45WvA0KdPtj36_g75nSiblsFtPbJ4Qyc,36660
-sky/serve/server/server.py,sha256=IVEjseLX4h1EZGSpJofzEJl6lkGaBKlEY4IBlngQWD8,3479
+sky/serve/server/core.py,sha256=pRvFadEIH_WTUkTtSmuFoPBP4JFq8Obt68ifi9DWuog,36865
+sky/serve/server/server.py,sha256=gQGVU9nHYdGbaLhGjIUNIYn4xwKjRASRJkiiTL5AI1Y,3283
 sky/server/__init__.py,sha256=MPPBqFzXz6Jv5QSk6td_IcvnfXfNErDZVcizu4MLRow,27
-sky/server/common.py,sha256=8J1RZ2IGJtySw-gbLE_JEb9Hm24os5qwadmQDhQMqf4,18447
+sky/server/common.py,sha256=uBshF4a-U8NGgm8XOHTW2YNSq0CsByfdIFgiybU5PEg,17321
 sky/server/constants.py,sha256=SqhWJMassFyvWAJn2UJHvuA_0_C6f5vngMzZ2KYLsKw,770
-sky/server/server.py,sha256=TZplXKA0KMs4UHLV3K5NSyhUPD0l2cmsiYgAZohn_Gs,41902
-sky/server/stream_utils.py,sha256=6jo1Dq8EtD0AHmJ3e3zCUNAiSYQlUKbPil4h8pA-2ac,5813
+sky/server/server.py,sha256=0gcIn3jr_4DkHpBJYdNq--uPo9Im8bn2ftxgd8mBMcU,42225
+sky/server/stream_utils.py,sha256=-3IX1YCgxAFfcvQIV0TCvOn1wbRLWovAx3ckCrsExWU,5651
 sky/server/html/log.html,sha256=TSGZktua9Ysl_ysg3w60rjxAxhH61AJnsYDHdtqrjmI,6929
 sky/server/requests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sky/server/requests/executor.py,sha256=4PVgEK11YqWGG4ihhVPK2MPVFlCDkE9U9D07q_TbdBA,18759
+sky/server/requests/executor.py,sha256=NxVB0aFA05GddXDdt89wEwEYyJcIIrsQxE2wowklhUI,19597
 sky/server/requests/payloads.py,sha256=PeEkqQoTO3ellelkFX5yzPKbPkDV-NfVXkxHndYlrjE,15769
 sky/server/requests/requests.py,sha256=aMdjiK5kjSYP36pxdXFU6qgKOXcOmtViHbFm3V8Dvf8,19590
 sky/server/requests/queues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -301,11 +301,11 @@ sky/utils/annotations.py,sha256=-rfacB30Sl0xkFriejGvxma3oKctGfXXLZkQPHG33eo,1626
 sky/utils/cluster_utils.py,sha256=s6DFRXktv6_gF_DnwDEXJ7CniifHp8CAPeGciRCbXgI,14432
 sky/utils/command_runner.py,sha256=-7vxLvwZnTvYMQ_nScmuQWY6ZvQYv69yvvIp2uOaOqU,39063
 sky/utils/command_runner.pyi,sha256=mJOzCgcYZAfHwnY_6Wf1YwlTEJGb9ihzc2f0rE0Kw98,7751
-sky/utils/common.py,sha256=zBUmQjlSD7aF6tDG8mzbf-oU6JG3oYM2EAQ9sgSWSrA,2833
-sky/utils/common_utils.py,sha256=wPECJDpeloyixalXNrdmVKXFyU1UKUtBES6D0mRd2mE,26180
+sky/utils/common.py,sha256=P4oVXFATUYgkruHX92cN12SJBtfb8DiOOYZtbN1kvP0,1927
+sky/utils/common_utils.py,sha256=-O0GthIockeJy8LlA4heVYYtaUdQwNA-5mFMqHajRf8,27457
 sky/utils/config_utils.py,sha256=VQ2E3DQ2XysD-kul-diSrxn_pXWsDMfKAev91OiJQ1Q,9041
 sky/utils/control_master_utils.py,sha256=iD4M0onjYOdZ2RuxjwMBl4KhafHXJzuHjvqlBUnu-VE,1450
-sky/utils/controller_utils.py,sha256=1tnRFw9ANVyACGswIsl67uSK0fYDHLOoO6BQpxmFDgA,45674
+sky/utils/controller_utils.py,sha256=4Nck10XV6gNJKjBl7y_CIxIGqP3bbISuZSVTHbBumgs,45725
 sky/utils/dag_utils.py,sha256=sAus0aL1wtuuFZSDnpO4LY-6WK4u5iJY952oWQzHo3Y,7532
 sky/utils/db_utils.py,sha256=K2-OHPg0FeHCarevMdWe0IWzm6wWumViEeYeJuGoFUE,3747
 sky/utils/env_options.py,sha256=aaD6GoYK0LaZIqjOEZ-R7eccQuiRriW3EuLWtOI5En8,1578
@@ -336,9 +336,9 @@ sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml,sha256=VLKT2KKimZu1GDg_4AIlIt488
 sky/utils/kubernetes/kubernetes_deploy_utils.py,sha256=iAjfyPclOs8qlALACcfxLpRAO9CZ-h16leFqXZ6tNaY,10096
 sky/utils/kubernetes/rsync_helper.sh,sha256=h4YwrPFf9727CACnMJvF3EyK_0OeOYKKt4su_daKekw,1256
 sky/utils/kubernetes/ssh_jump_lifecycle_manager.py,sha256=Kq1MDygF2IxFmu9FXpCxqucXLmeUrvs6OtRij6XTQbo,6554
-skypilot_nightly-1.0.0.dev20250218.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
-skypilot_nightly-1.0.0.dev20250218.dist-info/METADATA,sha256=LTMWhkCmIQwt9zptcjlq9Se2Cs9MCe7IIMcqHEE7lN0,18916
-skypilot_nightly-1.0.0.dev20250218.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-skypilot_nightly-1.0.0.dev20250218.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
-skypilot_nightly-1.0.0.dev20250218.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
-skypilot_nightly-1.0.0.dev20250218.dist-info/RECORD,,
+skypilot_nightly-1.0.0.dev20250220.dist-info/LICENSE,sha256=emRJAvE7ngL6x0RhQvlns5wJzGI3NEQ_WMjNmd9TZc4,12170
+skypilot_nightly-1.0.0.dev20250220.dist-info/METADATA,sha256=uYtMxJQSUuL9hPmfqny_uQvuqWy65W5mHUHv7HvJb-o,18916
+skypilot_nightly-1.0.0.dev20250220.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+skypilot_nightly-1.0.0.dev20250220.dist-info/entry_points.txt,sha256=StA6HYpuHj-Y61L2Ze-hK2IcLWgLZcML5gJu8cs6nU4,36
+skypilot_nightly-1.0.0.dev20250220.dist-info/top_level.txt,sha256=qA8QuiNNb6Y1OF-pCUtPEr6sLEwy2xJX06Bd_CrtrHY,4
+skypilot_nightly-1.0.0.dev20250220.dist-info/RECORD,,

{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/LICENSE RENAMED Viewed

File without changes

{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/WHEEL RENAMED Viewed

File without changes

{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{skypilot_nightly-1.0.0.dev20250218.dist-info → skypilot_nightly-1.0.0.dev20250220.dist-info}/top_level.txt RENAMED Viewed

File without changes

skypilot-nightly 1.0.0.dev20250218__py3-none-any.whl → 1.0.0.dev20250220__py3-none-any.whl

skypilot-nightly 1.0.0.dev20250218py3-none-any.whl → 1.0.0.dev20250220py3-none-any.whl