PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250215__py3-none-any.whl → 1.0.0.dev20250217__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250215py3-none-any.whl → 1.0.0.dev20250217py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

sky/__init__.py +48 -22
sky/adaptors/aws.py +2 -1
sky/adaptors/azure.py +4 -4
sky/adaptors/cloudflare.py +4 -4
sky/adaptors/kubernetes.py +8 -8
sky/authentication.py +42 -45
sky/backends/backend.py +2 -2
sky/backends/backend_utils.py +108 -221
sky/backends/cloud_vm_ray_backend.py +283 -282
sky/benchmark/benchmark_utils.py +6 -2
sky/check.py +40 -28
sky/cli.py +1213 -1116
sky/client/__init__.py +1 -0
sky/client/cli.py +5644 -0
sky/client/common.py +345 -0
sky/client/sdk.py +1757 -0
sky/cloud_stores.py +12 -6
sky/clouds/__init__.py +0 -2
sky/clouds/aws.py +20 -13
sky/clouds/azure.py +5 -3
sky/clouds/cloud.py +1 -1
sky/clouds/cudo.py +2 -1
sky/clouds/do.py +2 -1
sky/clouds/fluidstack.py +3 -2
sky/clouds/gcp.py +10 -8
sky/clouds/ibm.py +8 -7
sky/clouds/kubernetes.py +7 -6
sky/clouds/lambda_cloud.py +8 -7
sky/clouds/oci.py +4 -3
sky/clouds/paperspace.py +2 -1
sky/clouds/runpod.py +2 -1
sky/clouds/scp.py +8 -7
sky/clouds/service_catalog/__init__.py +3 -3
sky/clouds/service_catalog/aws_catalog.py +7 -1
sky/clouds/service_catalog/common.py +4 -2
sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +2 -2
sky/clouds/utils/oci_utils.py +1 -1
sky/clouds/vast.py +2 -1
sky/clouds/vsphere.py +2 -1
sky/core.py +263 -99
sky/dag.py +4 -0
sky/data/mounting_utils.py +2 -1
sky/data/storage.py +97 -35
sky/data/storage_utils.py +69 -9
sky/exceptions.py +138 -5
sky/execution.py +47 -50
sky/global_user_state.py +105 -22
sky/jobs/__init__.py +12 -14
sky/jobs/client/__init__.py +0 -0
sky/jobs/client/sdk.py +296 -0
sky/jobs/constants.py +30 -1
sky/jobs/controller.py +12 -6
sky/jobs/dashboard/dashboard.py +2 -6
sky/jobs/recovery_strategy.py +22 -29
sky/jobs/server/__init__.py +1 -0
sky/jobs/{core.py → server/core.py} +101 -34
sky/jobs/server/dashboard_utils.py +64 -0
sky/jobs/server/server.py +182 -0
sky/jobs/utils.py +32 -23
sky/models.py +27 -0
sky/optimizer.py +9 -11
sky/provision/__init__.py +6 -3
sky/provision/aws/config.py +2 -2
sky/provision/aws/instance.py +1 -1
sky/provision/azure/instance.py +1 -1
sky/provision/cudo/instance.py +1 -1
sky/provision/do/instance.py +1 -1
sky/provision/do/utils.py +0 -5
sky/provision/fluidstack/fluidstack_utils.py +4 -3
sky/provision/fluidstack/instance.py +4 -2
sky/provision/gcp/instance.py +1 -1
sky/provision/instance_setup.py +2 -2
sky/provision/kubernetes/constants.py +8 -0
sky/provision/kubernetes/instance.py +1 -1
sky/provision/kubernetes/utils.py +67 -76
sky/provision/lambda_cloud/instance.py +3 -15
sky/provision/logging.py +1 -1
sky/provision/oci/instance.py +7 -4
sky/provision/paperspace/instance.py +1 -1
sky/provision/provisioner.py +3 -2
sky/provision/runpod/instance.py +1 -1
sky/provision/vast/instance.py +1 -1
sky/provision/vast/utils.py +2 -1
sky/provision/vsphere/instance.py +2 -11
sky/resources.py +55 -40
sky/serve/__init__.py +6 -10
sky/serve/client/__init__.py +0 -0
sky/serve/client/sdk.py +366 -0
sky/serve/constants.py +3 -0
sky/serve/replica_managers.py +10 -10
sky/serve/serve_utils.py +56 -36
sky/serve/server/__init__.py +0 -0
sky/serve/{core.py → server/core.py} +37 -17
sky/serve/server/server.py +117 -0
sky/serve/service.py +8 -1
sky/server/__init__.py +1 -0
sky/server/common.py +441 -0
sky/server/constants.py +21 -0
sky/server/html/log.html +174 -0
sky/server/requests/__init__.py +0 -0
sky/server/requests/executor.py +462 -0
sky/server/requests/payloads.py +481 -0
sky/server/requests/queues/__init__.py +0 -0
sky/server/requests/queues/mp_queue.py +76 -0
sky/server/requests/requests.py +567 -0
sky/server/requests/serializers/__init__.py +0 -0
sky/server/requests/serializers/decoders.py +192 -0
sky/server/requests/serializers/encoders.py +166 -0
sky/server/server.py +1095 -0
sky/server/stream_utils.py +144 -0
sky/setup_files/MANIFEST.in +1 -0
sky/setup_files/dependencies.py +12 -4
sky/setup_files/setup.py +1 -1
sky/sky_logging.py +9 -13
sky/skylet/autostop_lib.py +2 -2
sky/skylet/constants.py +46 -12
sky/skylet/events.py +5 -6
sky/skylet/job_lib.py +78 -66
sky/skylet/log_lib.py +17 -11
sky/skypilot_config.py +79 -94
sky/task.py +119 -73
sky/templates/aws-ray.yml.j2 +4 -4
sky/templates/azure-ray.yml.j2 +3 -2
sky/templates/cudo-ray.yml.j2 +3 -2
sky/templates/fluidstack-ray.yml.j2 +3 -2
sky/templates/gcp-ray.yml.j2 +3 -2
sky/templates/ibm-ray.yml.j2 +3 -2
sky/templates/jobs-controller.yaml.j2 +1 -12
sky/templates/kubernetes-ray.yml.j2 +3 -2
sky/templates/lambda-ray.yml.j2 +3 -2
sky/templates/oci-ray.yml.j2 +3 -2
sky/templates/paperspace-ray.yml.j2 +3 -2
sky/templates/runpod-ray.yml.j2 +3 -2
sky/templates/scp-ray.yml.j2 +3 -2
sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
sky/templates/vsphere-ray.yml.j2 +4 -2
sky/templates/websocket_proxy.py +64 -0
sky/usage/constants.py +8 -0
sky/usage/usage_lib.py +45 -11
sky/utils/accelerator_registry.py +33 -53
sky/utils/admin_policy_utils.py +2 -1
sky/utils/annotations.py +51 -0
sky/utils/cli_utils/status_utils.py +33 -3
sky/utils/cluster_utils.py +356 -0
sky/utils/command_runner.py +69 -14
sky/utils/common.py +74 -0
sky/utils/common_utils.py +133 -93
sky/utils/config_utils.py +204 -0
sky/utils/control_master_utils.py +2 -3
sky/utils/controller_utils.py +133 -147
sky/utils/dag_utils.py +72 -24
sky/utils/kubernetes/deploy_remote_cluster.sh +2 -2
sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
sky/utils/log_utils.py +83 -23
sky/utils/message_utils.py +81 -0
sky/utils/registry.py +127 -0
sky/utils/resources_utils.py +2 -2
sky/utils/rich_utils.py +213 -34
sky/utils/schemas.py +19 -2
sky/{status_lib.py → utils/status_lib.py} +12 -7
sky/utils/subprocess_utils.py +51 -35
sky/utils/timeline.py +7 -2
sky/utils/ux_utils.py +95 -25
{skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/METADATA +8 -3
{skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/RECORD +170 -132
sky/clouds/cloud_registry.py +0 -76
sky/utils/cluster_yaml_utils.py +0 -24
{skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/LICENSE +0 -0
{skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/WHEEL +0 -0
{skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/entry_points.txt +0 -0
{skypilot_nightly-1.0.0.dev20250215.dist-info → skypilot_nightly-1.0.0.dev20250217.dist-info}/top_level.txt +0 -0

sky/server/requests/executor.py ADDED Viewed

@@ -0,0 +1,462 @@
+"""Executor for the requests.
+We start limited number of workers for long-running requests, and
+significantly more workers for short-running requests. This is to optimize the
+resource usage and the latency of the requests.
+* Long-running requests are those requests that can take a long time to finish
+and more resources are needed, such as cluster launching, starting, job
+submission, managed job submission, etc.
+* Short-running requests are those requests that can be done quickly, and
+require a quick response, such as status check, job status check, etc.
+With more short-running workers, we can serve more short-running requests in
+parallel, and reduce the latency.
+The number of the workers is determined by the system resources.
+See the [README.md](../README.md) for detailed architecture of the executor.
+"""
+import concurrent.futures
+import contextlib
+import dataclasses
+import enum
+import multiprocessing
+import os
+import queue as queue_lib
+import signal
+import sys
+import time
+import traceback
+import typing
+from typing import Any, Callable, Generator, List, Optional, TextIO, Tuple
+import psutil
+import setproctitle
+from sky import global_user_state
+from sky import models
+from sky import sky_logging
+from sky import skypilot_config
+from sky.server import common as server_common
+from sky.server import constants as server_constants
+from sky.server.requests import payloads
+from sky.server.requests import requests as api_requests
+from sky.server.requests.queues import mp_queue
+from sky.skylet import constants
+from sky.utils import annotations
+from sky.utils import common_utils
+from sky.utils import timeline
+from sky.utils import ux_utils
+if typing.TYPE_CHECKING:
+    import types
+# pylint: disable=ungrouped-imports
+if sys.version_info >= (3, 10):
+    from typing import ParamSpec
+else:
+    from typing_extensions import ParamSpec
+P = ParamSpec('P')
+logger = sky_logging.init_logger(__name__)
+# On macOS, the default start method for multiprocessing is 'fork', which
+# can cause issues with certain types of resources, including those used in
+# the QueueManager in mp_queue.py.
+# The 'spawn' start method is generally more compatible across different
+# platforms, including macOS.
+multiprocessing.set_start_method('spawn', force=True)
+# Constants based on profiling the peak memory usage of
+# various sky commands. See `tests/load_test/` for details.
+# Max memory consumption for each request.
+_PER_BLOCKING_REQUEST_MEM_GB = 0.25
+_PER_NON_BLOCKING_REQUEST_MEM_GB = 0.15
+# To control the number of blocking workers.
+_CPU_MULTIPLIER_FOR_BLOCKING_WORKERS = 2
+_MAX_BLOCKING_WORKERS_LOCAL = 4
+# Percentage of memory for blocking requests
+# from the memory reserved for SkyPilot.
+# This is to reserve some memory for non-blocking requests.
+_MAX_MEM_PERCENT_FOR_BLOCKING = 0.6
+class QueueBackend(enum.Enum):
+    MULTIPROCESSING = 'multiprocessing'
+    # TODO(zhwu): we can add redis backend in the future.
+@dataclasses.dataclass
+class RequestWorker:
+    id: int
+    # The type of queue this worker works on.
+    schedule_type: api_requests.ScheduleType
+    def __str__(self) -> str:
+        return f'Worker(id={self.id}, schedule_type={self.schedule_type.value})'
+class RequestQueue:
+    """The queue for the requests, either redis or multiprocessing.
+    The elements in the queue are tuples of (request_id, ignore_return_value).
+    """
+    def __init__(self,
+                 schedule_type: api_requests.ScheduleType,
+                 backend: Optional[QueueBackend] = None) -> None:
+        self.name = schedule_type.value
+        self.backend = backend
+        assert (backend is None or
+                backend == QueueBackend.MULTIPROCESSING), backend
+        self.queue = mp_queue.get_queue(self.name)
+    def put(self, request: Tuple[str, bool]) -> None:
+        """Put and request to the queue.
+        Args:
+            request: A tuple of request_id and ignore_return_value.
+        """
+        self.queue.put(request)  # type: ignore
+    def get(self) -> Optional[Tuple[str, bool]]:
+        """Get a request from the queue.
+        It is non-blocking if the queue is empty, and returns None.
+        Returns:
+            A tuple of request_id and ignore_return_value.
+        """
+        try:
+            return self.queue.get(block=False)
+        except queue_lib.Empty:
+            return None
+    def __len__(self) -> int:
+        """Get the length of the queue."""
+        return self.queue.qsize()
+queue_backend = QueueBackend.MULTIPROCESSING
+@annotations.lru_cache(scope='global', maxsize=None)
+def _get_queue(schedule_type: api_requests.ScheduleType) -> RequestQueue:
+    return RequestQueue(schedule_type, backend=queue_backend)
+@contextlib.contextmanager
+def override_request_env_and_config(
+        request_body: payloads.RequestBody) -> Generator[None, None, None]:
+    """Override the environment and SkyPilot config for a request."""
+    original_env = os.environ.copy()
+    os.environ.update(request_body.env_vars)
+    user = models.User(id=request_body.env_vars[constants.USER_ID_ENV_VAR],
+                       name=request_body.env_vars[constants.USER_ENV_VAR])
+    global_user_state.add_or_update_user(user)
+    # Force color to be enabled.
+    os.environ['CLICOLOR_FORCE'] = '1'
+    server_common.reload_for_new_request(
+        client_entrypoint=request_body.entrypoint,
+        client_command=request_body.entrypoint_command)
+    try:
+        with skypilot_config.override_skypilot_config(
+                request_body.override_skypilot_config):
+            yield
+    finally:
+        # We need to call the save_timeline() since atexit will not be
+        # triggered as multiple requests can be sharing the same process.
+        timeline.save_timeline()
+        # Restore the original environment variables, so that a new request
+        # won't be affected by the previous request, e.g. SKYPILOT_DEBUG
+        # setting, etc. This is necessary as our executor is reusing the
+        # same process for multiple requests.
+        os.environ.clear()
+        os.environ.update(original_env)
+def _redirect_output(file: TextIO) -> Tuple[int, int]:
+    """Redirect stdout and stderr to the log file."""
+    fd = file.fileno()  # Get the file descriptor from the file object
+    # Store copies of the original stdout and stderr file descriptors
+    original_stdout = os.dup(sys.stdout.fileno())
+    original_stderr = os.dup(sys.stderr.fileno())
+    # Copy this fd to stdout and stderr
+    os.dup2(fd, sys.stdout.fileno())
+    os.dup2(fd, sys.stderr.fileno())
+    return original_stdout, original_stderr
+def _restore_output(original_stdout: int, original_stderr: int) -> None:
+    """Restore stdout and stderr to their original file descriptors."""
+    os.dup2(original_stdout, sys.stdout.fileno())
+    os.dup2(original_stderr, sys.stderr.fileno())
+    # Close the duplicate file descriptors
+    os.close(original_stdout)
+    os.close(original_stderr)
+def _request_execution_wrapper(request_id: str,
+                               ignore_return_value: bool) -> None:
+    """Wrapper for a request execution.
+    It wraps the execution of a request to:
+    1. Deserialize the request from the request database and serialize the
+       return value/exception in the request database;
+    2. Update the request status based on the execution result;
+    3. Redirect the stdout and stderr of the execution to log file;
+    4. Handle the SIGTERM signal to abort the request gracefully.
+    """
+    def sigterm_handler(signum: int,
+                        frame: Optional['types.FrameType']) -> None:
+        raise KeyboardInterrupt
+    signal.signal(signal.SIGTERM, sigterm_handler)
+    pid = multiprocessing.current_process().pid
+    logger.info(f'Running request {request_id} with pid {pid}')
+    with api_requests.update_request(request_id) as request_task:
+        assert request_task is not None, request_id
+        log_path = request_task.log_path
+        request_task.pid = pid
+        request_task.status = api_requests.RequestStatus.RUNNING
+        func = request_task.entrypoint
+        request_body = request_task.request_body
+    with log_path.open('w', encoding='utf-8') as f:
+        # Store copies of the original stdout and stderr file descriptors
+        original_stdout, original_stderr = _redirect_output(f)
+        # Redirect the stdout/stderr before overriding the environment and
+        # config, as there can be some logs during override that needs to be
+        # captured in the log file.
+        try:
+            with override_request_env_and_config(request_body):
+                return_value = func(**request_body.to_kwargs())
+        except KeyboardInterrupt:
+            logger.info(f'Request {request_id} cancelled by user')
+            _restore_output(original_stdout, original_stderr)
+            return
+        except (Exception, SystemExit) as e:  # pylint: disable=broad-except
+            with ux_utils.enable_traceback():
+                stacktrace = traceback.format_exc()
+            setattr(e, 'stacktrace', stacktrace)
+            with api_requests.update_request(request_id) as request_task:
+                assert request_task is not None, request_id
+                request_task.status = api_requests.RequestStatus.FAILED
+                request_task.set_error(e)
+            _restore_output(original_stdout, original_stderr)
+            logger.info(f'Request {request_id} failed due to '
+                        f'{common_utils.format_exception(e)}')
+            return
+        else:
+            with api_requests.update_request(request_id) as request_task:
+                assert request_task is not None, request_id
+                request_task.status = api_requests.RequestStatus.SUCCEEDED
+                if not ignore_return_value:
+                    request_task.set_return_value(return_value)
+            _restore_output(original_stdout, original_stderr)
+            logger.info(f'Request {request_id} finished')
+def schedule_request(request_id: str,
+                     request_name: str,
+                     request_body: payloads.RequestBody,
+                     func: Callable[P, Any],
+                     request_cluster_name: Optional[str] = None,
+                     ignore_return_value: bool = False,
+                     schedule_type: api_requests.ScheduleType = api_requests.
+                     ScheduleType.LONG,
+                     is_skypilot_system: bool = False) -> None:
+    """Enqueue a request to the request queue."""
+    user_id = request_body.env_vars[constants.USER_ID_ENV_VAR]
+    if is_skypilot_system:
+        user_id = server_constants.SKYPILOT_SYSTEM_USER_ID
+        global_user_state.add_or_update_user(
+            models.User(id=user_id, name=user_id))
+    request = api_requests.Request(request_id=request_id,
+                                   name=server_constants.REQUEST_NAME_PREFIX +
+                                   request_name,
+                                   entrypoint=func,
+                                   request_body=request_body,
+                                   status=api_requests.RequestStatus.PENDING,
+                                   created_at=time.time(),
+                                   schedule_type=schedule_type,
+                                   user_id=user_id,
+                                   cluster_name=request_cluster_name)
+    if not api_requests.create_if_not_exists(request):
+        logger.debug(f'Request {request_id} already exists.')
+        return
+    request.log_path.touch()
+    input_tuple = (request_id, ignore_return_value)
+    logger.info(f'Queuing request: {request_id}')
+    _get_queue(schedule_type).put(input_tuple)
+def request_worker(worker: RequestWorker, max_parallel_size: int) -> None:
+    """Worker for the requests.
+    Args:
+        max_parallel_size: Maximum number of parallel jobs this worker can run.
+    """
+    logger.info(f'Starting {worker} with pid '
+                f'{multiprocessing.current_process().pid}')
+    setproctitle.setproctitle(
+        f'SkyPilot:worker:{worker.schedule_type.value}-{worker.id}')
+    queue = _get_queue(worker.schedule_type)
+    # Use concurrent.futures.ProcessPoolExecutor instead of multiprocessing.Pool
+    # because the former is more efficient with the support of lazy creation of
+    # worker processes.
+    # We use executor instead of individual multiprocessing.Process to avoid
+    # the overhead of forking a new process for each request, which can be about
+    # 1s delay.
+    with concurrent.futures.ProcessPoolExecutor(
+            max_workers=max_parallel_size) as executor:
+        while True:
+            request_element = queue.get()
+            if request_element is None:
+                time.sleep(0.1)
+                continue
+            request_id, ignore_return_value = request_element
+            request = api_requests.get_request(request_id)
+            if request.status == api_requests.RequestStatus.CANCELLED:
+                continue
+            logger.info(f'[{worker}] Submitting request: {request_id}')
+            # Start additional process to run the request, so that it can be
+            # cancelled when requested by a user.
+            # TODO(zhwu): since the executor is reusing the request process,
+            # multiple requests can share the same process pid, which may cause
+            # issues with SkyPilot core functions if they rely on the exit of
+            # the process, such as subprocess_daemon.py.
+            future = executor.submit(_request_execution_wrapper, request_id,
+                                     ignore_return_value)
+            if worker.schedule_type == api_requests.ScheduleType.LONG:
+                try:
+                    future.result(timeout=None)
+                except Exception as e:  # pylint: disable=broad-except
+                    logger.error(f'[{worker}] Request {request_id} failed: {e}')
+                logger.info(f'[{worker}] Finished request: {request_id}')
+            else:
+                logger.info(f'[{worker}] Submitted request: {request_id}')
+def _get_cpu_count() -> int:
+    """Get the number of CPUs.
+    If the API server is deployed as a pod in k8s cluster, we assume the
+    number of CPUs is provided by the downward API.
+    """
+    cpu_count = os.getenv('SKYPILOT_POD_CPU_CORE_LIMIT')
+    if cpu_count is not None:
+        try:
+            return int(float(cpu_count))
+        except ValueError as e:
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    f'Failed to parse the number of CPUs from {cpu_count}'
+                ) from e
+    return psutil.cpu_count()
+def _get_mem_size_gb() -> float:
+    """Get the memory size in GB.
+    If the API server is deployed as a pod in k8s cluster, we assume the
+    memory size is provided by the downward API.
+    """
+    mem_size = os.getenv('SKYPILOT_POD_MEMORY_GB_LIMIT')
+    if mem_size is not None:
+        try:
+            return float(mem_size)
+        except ValueError as e:
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    f'Failed to parse the memory size from {mem_size}') from e
+    return psutil.virtual_memory().total / (1024**3)
+def start(deploy: bool) -> List[multiprocessing.Process]:
+    """Start the request workers."""
+    # Determine the job capacity of the workers based on the system resources.
+    cpu_count = _get_cpu_count()
+    mem_size_gb = _get_mem_size_gb()
+    mem_size_gb = max(0, mem_size_gb - server_constants.MIN_AVAIL_MEM_GB)
+    parallel_for_blocking = _max_parallel_size_for_blocking(
+        cpu_count, mem_size_gb)
+    if not deploy:
+        parallel_for_blocking = min(parallel_for_blocking,
+                                    _MAX_BLOCKING_WORKERS_LOCAL)
+    max_parallel_for_non_blocking = _max_parallel_size_for_non_blocking(
+        mem_size_gb, parallel_for_blocking)
+    logger.info(
+        f'SkyPilot API server will start {parallel_for_blocking} workers for '
+        f'blocking requests and will allow at max '
+        f'{max_parallel_for_non_blocking} non-blocking requests in parallel.')
+    # Setup the queues.
+    if queue_backend == QueueBackend.MULTIPROCESSING:
+        logger.info('Creating shared request queues')
+        queue_names = [
+            schedule_type.value for schedule_type in api_requests.ScheduleType
+        ]
+        # TODO(aylei): make queue manager port configurable or pick an available
+        # port automatically.
+        port = mp_queue.DEFAULT_QUEUE_MANAGER_PORT
+        if not common_utils.is_port_available(port):
+            raise RuntimeError(
+                f'SkyPilot API server fails to start as port {port!r} is '
+                'already in use by another process.')
+        queue_server = multiprocessing.Process(
+            target=mp_queue.start_queue_manager, args=(queue_names, port))
+        queue_server.start()
+        mp_queue.wait_for_queues_to_be_ready(queue_names, port=port)
+    logger.info('Request queues created')
+    worker_procs = []
+    for worker_id in range(parallel_for_blocking):
+        worker = RequestWorker(id=worker_id,
+                               schedule_type=api_requests.ScheduleType.LONG)
+        worker_proc = multiprocessing.Process(target=request_worker,
+                                              args=(worker, 1))
+        worker_proc.start()
+        worker_procs.append(worker_proc)
+    # Start a non-blocking worker.
+    worker = RequestWorker(id=1, schedule_type=api_requests.ScheduleType.SHORT)
+    worker_proc = multiprocessing.Process(target=request_worker,
+                                          args=(worker,
+                                                max_parallel_for_non_blocking))
+    worker_proc.start()
+    worker_procs.append(worker_proc)
+    return worker_procs
+@annotations.lru_cache(scope='global', maxsize=1)
+def _max_parallel_size_for_blocking(cpu_count: int, mem_size_gb: float) -> int:
+    """Max parallelism for blocking requests."""
+    cpu_based_max_parallel = cpu_count * _CPU_MULTIPLIER_FOR_BLOCKING_WORKERS
+    mem_based_max_parallel = int(mem_size_gb * _MAX_MEM_PERCENT_FOR_BLOCKING /
+                                 _PER_BLOCKING_REQUEST_MEM_GB)
+    n = max(1, min(cpu_based_max_parallel, mem_based_max_parallel))
+    return n
+@annotations.lru_cache(scope='global', maxsize=1)
+def _max_parallel_size_for_non_blocking(mem_size_gb: float,
+                                        parallel_size_for_blocking: int) -> int:
+    """Max parallelism for non-blocking requests."""
+    available_mem = mem_size_gb - (parallel_size_for_blocking *
+                                   _PER_BLOCKING_REQUEST_MEM_GB)
+    n = max(1, int(available_mem / _PER_NON_BLOCKING_REQUEST_MEM_GB))
+    return n

skypilot-nightly 1.0.0.dev20250215__py3-none-any.whl → 1.0.0.dev20250217__py3-none-any.whl

skypilot-nightly 1.0.0.dev20250215py3-none-any.whl → 1.0.0.dev20250217py3-none-any.whl