PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250607__py3-none-any.whl → 1.0.0.dev20250610__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250607py3-none-any.whl → 1.0.0.dev20250610py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

sky/server/requests/executor.py CHANGED Viewed

@@ -19,6 +19,7 @@ The number of the workers is determined by the system resources.
 See the [README.md](../README.md) for detailed architecture of the executor.
 """
 import asyncio
+import concurrent.futures
 import contextlib
 import multiprocessing
 import os
@@ -53,6 +54,7 @@ from sky.utils import context
 from sky.utils import context_utils
 from sky.utils import subprocess_utils
 from sky.utils import timeline
+from sky.workspaces import core as workspaces_core
 if typing.TYPE_CHECKING:
     import types
@@ -92,21 +94,21 @@ class RequestQueue:
         else:
             raise RuntimeError(f'Invalid queue backend: {backend}')
-    def put(self, request: Tuple[str, bool]) -> None:
+    def put(self, request: Tuple[str, bool, bool]) -> None:
         """Put and request to the queue.
         Args:
-            request: A tuple of request_id and ignore_return_value.
+            request: A tuple of request_id, ignore_return_value, and retryable.
         """
         self.queue.put(request)  # type: ignore
-    def get(self) -> Optional[Tuple[str, bool]]:
+    def get(self) -> Optional[Tuple[str, bool, bool]]:
         """Get a request from the queue.
         It is non-blocking if the queue is empty, and returns None.
         Returns:
-            A tuple of request_id and ignore_return_value.
+            A tuple of request_id, ignore_return_value, and retryable.
         """
         try:
             return self.queue.get(block=False)
@@ -158,7 +160,7 @@ class RequestWorker:
             if request_element is None:
                 time.sleep(0.1)
                 return
-            request_id, ignore_return_value = request_element
+            request_id, ignore_return_value, retryable = request_element
             request = api_requests.get_request(request_id)
             assert request is not None, f'Request with ID {request_id} is None'
             if request.status == api_requests.RequestStatus.CANCELLED:
@@ -170,8 +172,14 @@ class RequestWorker:
             # multiple requests can share the same process pid, which may cause
             # issues with SkyPilot core functions if they rely on the exit of
             # the process, such as subprocess_daemon.py.
-            executor.submit_until_success(_request_execution_wrapper,
-                                          request_id, ignore_return_value)
+            fut = executor.submit_until_success(_request_execution_wrapper,
+                                                request_id, ignore_return_value)
+            if retryable:
+                # If the task might fail and be retried, start a thread to
+                # monitor the future and process retry.
+                threading.Thread(target=self.handle_task_result,
+                                 args=(fut, request_element),
+                                 daemon=True).start()
             logger.info(f'[{self}] Submitted request: {request_id}')
         except (Exception, SystemExit) as e:  # pylint: disable=broad-except
@@ -181,6 +189,16 @@ class RequestWorker:
                 f'{request_id if "request_id" in locals() else ""} '
                 f'{common_utils.format_exception(e, use_bracket=True)}')
+    def handle_task_result(self, fut: concurrent.futures.Future,
+                           request_element: Tuple[str, bool, bool]) -> None:
+        try:
+            fut.result()
+        except exceptions.ExecutionRetryableError as e:
+            time.sleep(e.retry_wait_seconds)
+            # Reschedule the request.
+            queue = _get_queue(self.schedule_type)
+            queue.put(request_element)
     def run(self) -> None:
         # Handle the SIGTERM signal to abort the executor process gracefully.
         proc_group = f'{self.schedule_type.value}'
@@ -229,6 +247,9 @@ def override_request_env_and_config(
     original_env = os.environ.copy()
     os.environ.update(request_body.env_vars)
     # Note: may be overridden by AuthProxyMiddleware.
+    # TODO(zhwu): we need to make the entire request a context available to the
+    # entire request execution, so that we can access info like user through
+    # the execution.
     user = models.User(id=request_body.env_vars[constants.USER_ID_ENV_VAR],
                        name=request_body.env_vars[constants.USER_ENV_VAR])
     global_user_state.add_or_update_user(user)
@@ -237,13 +258,17 @@ def override_request_env_and_config(
     server_common.reload_for_new_request(
         client_entrypoint=request_body.entrypoint,
         client_command=request_body.entrypoint_command,
-        using_remote_api_server=request_body.using_remote_api_server)
+        using_remote_api_server=request_body.using_remote_api_server,
+        user=user)
     try:
         logger.debug(
             f'override path: {request_body.override_skypilot_config_path}')
         with skypilot_config.override_skypilot_config(
                 request_body.override_skypilot_config,
                 request_body.override_skypilot_config_path):
+            # Rejecting requests to workspaces that the user does not have
+            # permission to access.
+            workspaces_core.reject_request_for_unauthorized_workspace(user)
             yield
     finally:
         # We need to call the save_timeline() since atexit will not be
@@ -308,7 +333,9 @@ def _request_execution_wrapper(request_id: str,
         func = request_task.entrypoint
         request_body = request_task.request_body
-    with log_path.open('w', encoding='utf-8') as f:
+    # Append to the log file instead of overwriting it since there might be
+    # logs from previous retries.
+    with log_path.open('a', encoding='utf-8') as f:
         # Store copies of the original stdout and stderr file descriptors
         original_stdout, original_stderr = _redirect_output(f)
         # Redirect the stdout/stderr before overriding the environment and
@@ -332,6 +359,17 @@ def _request_execution_wrapper(request_id: str,
             subprocess_utils.kill_children_processes()
             _restore_output(original_stdout, original_stderr)
             return
+        except exceptions.ExecutionRetryableError as e:
+            logger.error(e)
+            logger.info(e.hint)
+            with api_requests.update_request(request_id) as request_task:
+                assert request_task is not None, request_id
+                # Retried request will undergo rescheduling and a new execution,
+                # clear the pid of the request.
+                request_task.pid = None
+            # Yield control to the scheduler for uniform handling of retries.
+            _restore_output(original_stdout, original_stderr)
+            raise
         except (Exception, SystemExit) as e:  # pylint: disable=broad-except
             api_requests.set_request_failed(request_id, e)
             _restore_output(original_stdout, original_stderr)
@@ -433,7 +471,7 @@ def prepare_request(
     """Prepare a request for execution."""
     user_id = request_body.env_vars[constants.USER_ID_ENV_VAR]
     if is_skypilot_system:
-        user_id = server_constants.SKYPILOT_SYSTEM_USER_ID
+        user_id = constants.SKYPILOT_SYSTEM_USER_ID
         global_user_state.add_or_update_user(
             models.User(id=user_id, name=user_id))
     request = api_requests.Request(request_id=request_id,
@@ -455,17 +493,17 @@ def prepare_request(
     return request
-def schedule_request(
-        request_id: str,
-        request_name: str,
-        request_body: payloads.RequestBody,
-        func: Callable[P, Any],
-        request_cluster_name: Optional[str] = None,
-        ignore_return_value: bool = False,
-        schedule_type: api_requests.ScheduleType = (
-            api_requests.ScheduleType.LONG),
-        is_skypilot_system: bool = False,
-        precondition: Optional[preconditions.Precondition] = None) -> None:
+def schedule_request(request_id: str,
+                     request_name: str,
+                     request_body: payloads.RequestBody,
+                     func: Callable[P, Any],
+                     request_cluster_name: Optional[str] = None,
+                     ignore_return_value: bool = False,
+                     schedule_type: api_requests.ScheduleType = (
+                         api_requests.ScheduleType.LONG),
+                     is_skypilot_system: bool = False,
+                     precondition: Optional[preconditions.Precondition] = None,
+                     retryable: bool = False) -> None:
     """Enqueue a request to the request queue.
     Args:
@@ -490,7 +528,7 @@ def schedule_request(
                     request_cluster_name, schedule_type, is_skypilot_system)
     def enqueue():
-        input_tuple = (request_id, ignore_return_value)
+        input_tuple = (request_id, ignore_return_value, retryable)
         logger.info(f'Queuing request: {request_id}')
         _get_queue(schedule_type).put(input_tuple)

sky/server/requests/payloads.py CHANGED Viewed

@@ -79,6 +79,9 @@ def get_override_skypilot_config_from_client() -> Dict[str, Any]:
     # server endpoint on the server side. This avoids the warning at
     # server-side.
     config.pop_nested(('api_server',), default_value=None)
+    # Remove the admin policy, as the policy has been applied on the client
+    # side.
+    config.pop_nested(('admin_policy',), default_value=None)
     return config

sky/server/requests/process.py CHANGED Viewed

@@ -6,6 +6,7 @@ import threading
 import time
 from typing import Callable, Dict, Optional, Tuple
+from sky import exceptions
 from sky.utils import atomic
 from sky.utils import subprocess_utils
@@ -67,14 +68,24 @@ class PoolExecutor(concurrent.futures.ProcessPoolExecutor):
 # Define the worker function outside of the class to avoid pickling self
-def _disposable_worker(fn, initializer: Optional[Callable], initargs: Tuple,
-                       args, kwargs):
+def _disposable_worker(fn, initializer, initargs, result_queue, args, kwargs):
+    """The worker function that is used to run the task.
+    Args:
+        fn: The function to run.
+        initializer: The initializer function to run before running the task.
+        initargs: The arguments to pass to the initializer function.
+        result_queue: The queue to put the result and exception into.
+        args: The arguments to pass to the function.
+        kwargs: The keyword arguments to pass to the function.
+    """
     try:
         if initializer is not None:
             initializer(*initargs)
-        fn(*args, **kwargs)
+        result = fn(*args, **kwargs)
+        result_queue.put(result)
     except BaseException as e:  # pylint: disable=broad-except
-        return e
+        result_queue.put(e)
 class DisposableExecutor:
@@ -98,28 +109,52 @@ class DisposableExecutor:
         self._initializer: Optional[Callable] = initializer
         self._initargs: Tuple = initargs
-    def _monitor_worker(self, process: multiprocessing.Process) -> None:
+    def _monitor_worker(self, process: multiprocessing.Process,
+                        future: concurrent.futures.Future,
+                        result_queue: multiprocessing.Queue) -> None:
         """Monitor the worker process and cleanup when it's done."""
-        process.join()
-        if process.pid:
-            with self._lock:
-                if process.pid in self.workers:
-                    del self.workers[process.pid]
-    # Submit is not compatible with ProcessPoolExecutor because we does not
-    # bother to return a Future. Can be improved if needed.
-    def submit(self, fn, *args, **kwargs) -> bool:
-        """Submit a task for execution."""
+        try:
+            process.join()
+            if not future.cancelled():
+                try:
+                    # Get result from the queue if process completed
+                    if not result_queue.empty():
+                        result = result_queue.get(block=False)
+                        if isinstance(result, BaseException):
+                            future.set_exception(result)
+                        else:
+                            future.set_result(result)
+                    else:
+                        # Process ended but no result
+                        future.set_result(None)
+                except (multiprocessing.TimeoutError, BrokenPipeError,
+                        EOFError) as e:
+                    future.set_exception(e)
+        finally:
+            if process.pid:
+                with self._lock:
+                    if process.pid in self.workers:
+                        del self.workers[process.pid]
+    def submit(self, fn, *args, **kwargs) -> concurrent.futures.Future:
+        """Submit a task for execution and return a Future."""
+        future: concurrent.futures.Future = concurrent.futures.Future()
         if self._shutdown:
-            return False
+            raise RuntimeError('Cannot submit task after executor is shutdown')
         with self._lock:
             if (self.max_workers is not None and
                     len(self.workers) >= self.max_workers):
-                return False
+                raise exceptions.ExecutionPoolFullError(
+                    'Maximum workers reached')
+        result_queue: multiprocessing.Queue = multiprocessing.Queue()
         process = multiprocessing.Process(target=_disposable_worker,
                                           args=(fn, self._initializer,
-                                                self._initargs, args, kwargs))
+                                                self._initargs, result_queue,
+                                                args, kwargs))
+        process.daemon = True
         process.start()
         with self._lock:
@@ -128,13 +163,13 @@ class DisposableExecutor:
                 raise RuntimeError('Failed to start process')
             self.workers[pid] = process
-        # Start monitor thread to cleanup the worker process when it's done.
+        # Start monitor thread to cleanup the worker process when it's done
         monitor_thread = threading.Thread(target=self._monitor_worker,
-                                          args=(process,),
+                                          args=(process, future, result_queue),
                                           daemon=True)
         monitor_thread.start()
-        return True
+        return future
     def has_idle_workers(self) -> bool:
         """Check if there are any idle workers."""
@@ -173,12 +208,14 @@ class BurstableExecutor:
             self._burst_executor = DisposableExecutor(max_workers=burst_workers,
                                                       **kwargs)
-    def submit_until_success(self, fn, *args, **kwargs):
+    def submit_until_success(self, fn, *args,
+                             **kwargs) -> concurrent.futures.Future:
         """Submit a task for execution until success.
         Prioritizes submitting to the guaranteed pool. If no idle workers
         are available in the guaranteed pool, it will submit to the burst
-        pool.
+        pool. If the burst pool is full, it will retry the whole process until
+        the task is submitted successfully.
         TODO(aylei): this is coupled with executor.RequestWorker since we
         know the worker is dedicated to request scheduling and it either
         blocks on request polling or request submitting. So it is no harm
@@ -188,17 +225,20 @@ class BurstableExecutor:
         while True:
             if self._executor is not None and self._executor.has_idle_workers():
-                self._executor.submit(fn, *args, **kwargs)
-                break
+                logger.info('Submitting to the guaranteed pool')
+                return self._executor.submit(fn, *args, **kwargs)
             if (self._burst_executor is not None and
                     self._burst_executor.has_idle_workers()):
-                self._burst_executor.submit(fn, *args, **kwargs)
-                break
+                try:
+                    fut = self._burst_executor.submit(fn, *args, **kwargs)
+                    return fut
+                except exceptions.ExecutionPoolFullError:
+                    # The burst pool is full, try the next candidate.
+                    pass
             if self._executor is not None:
                 # No idle workers in either pool, still queue the request
                 # to the guaranteed pool to keep behavior consistent.
-                self._executor.submit(fn, *args, **kwargs)
-                break
+                return self._executor.submit(fn, *args, **kwargs)
             logger.debug('No guaranteed pool set and the burst pool is full, '
                          'retry later.')
             time.sleep(0.1)

sky/server/requests/requests.py CHANGED Viewed

@@ -11,7 +11,7 @@ import signal
 import sqlite3
 import time
 import traceback
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, Generator, List, Optional, Tuple
 import colorama
 import filelock
@@ -204,7 +204,8 @@ class Request:
         """
         assert isinstance(self.request_body,
                           payloads.RequestBody), (self.name, self.request_body)
-        user_name = global_user_state.get_user(self.user_id).name
+        user = global_user_state.get_user(self.user_id)
+        user_name = user.name if user is not None else None
         return RequestPayload(
             request_id=self.request_id,
             name=self.name,
@@ -464,7 +465,7 @@ def request_lock_path(request_id: str) -> str:
 @contextlib.contextmanager
 @init_db
-def update_request(request_id: str):
+def update_request(request_id: str) -> Generator[Optional[Request], None, None]:
     """Get a SkyPilot API request."""
     request = _get_request_no_lock(request_id)
     yield request

sky/server/server.py CHANGED Viewed

@@ -49,6 +49,7 @@ from sky.server.requests import preconditions
 from sky.server.requests import requests as requests_lib
 from sky.skylet import constants
 from sky.usage import usage_lib
+from sky.users import permission
 from sky.users import server as users_rest
 from sky.utils import admin_policy_utils
 from sky.utils import common as common_lib
@@ -105,17 +106,21 @@ class RBACMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
     """Middleware to handle RBAC."""
     async def dispatch(self, request: fastapi.Request, call_next):
-        if request.url.path.startswith('/dashboard/'):
+        # TODO(hailong): should have a list of paths
+        # that are not checked for RBAC
+        if (request.url.path.startswith('/dashboard/') or
+                request.url.path.startswith('/api/')):
             return await call_next(request)
         auth_user = _get_auth_user_header(request)
         if auth_user is None:
             return await call_next(request)
-        permission_service = users_rest.permission_service
+        permission_service = permission.permission_service
         # Check the role permission
-        if permission_service.check_permission(auth_user.id, request.url.path,
-                                               request.method):
+        if permission_service.check_endpoint_permission(auth_user.id,
+                                                        request.url.path,
+                                                        request.method):
             return fastapi.responses.JSONResponse(
                 status_code=403, content={'detail': 'Forbidden'})
@@ -154,9 +159,15 @@ class AuthProxyMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
         if auth_user is not None:
             newly_added = global_user_state.add_or_update_user(auth_user)
             if newly_added:
-                users_rest.permission_service.add_user_if_not_exists(
+                permission.permission_service.add_user_if_not_exists(
                     auth_user.id)
+        # Store user info in request.state for access by GET endpoints
+        if auth_user is not None:
+            request.state.auth_user = auth_user
+        else:
+            request.state.auth_user = None
         body = await request.body()
         if auth_user and body:
             try:
@@ -177,6 +188,12 @@ class AuthProxyMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
                             f'"env_vars" in request body is not a dictionary '
                             f'for request {request.state.request_id}. '
                             'Skipping user info injection into body.')
+                else:
+                    original_json['env_vars'] = {}
+                    original_json['env_vars'][
+                        constants.USER_ID_ENV_VAR] = auth_user.id
+                    original_json['env_vars'][
+                        constants.USER_ENV_VAR] = auth_user.name
                 request._body = json.dumps(original_json).encode('utf-8')  # pylint: disable=protected-access
         return await call_next(request)
@@ -676,6 +693,7 @@ async def launch(launch_body: payloads.LaunchBody,
         func=execution.launch,
         schedule_type=requests_lib.ScheduleType.LONG,
         request_cluster_name=launch_body.cluster_name,
+        retryable=launch_body.retry_until_up,
     )

sky/server/stream_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import asyncio
 import collections
 import pathlib
-from typing import AsyncGenerator, Deque, Optional
+from typing import AsyncGenerator, Deque, List, Optional
 import aiofiles
 import fastapi
@@ -15,6 +15,12 @@ from sky.utils import rich_utils
 logger = sky_logging.init_logger(__name__)
+# When streaming log lines, buffer the lines in memory and flush them in chunks
+# to improve log tailing throughput. Buffer size is the max size bytes of each
+# chunk and the timeout threshold for flushing the buffer to ensure
+# responsiveness.
+_BUFFER_SIZE = 8 * 1024  # 8KB
+_BUFFER_TIMEOUT = 0.02  # 20ms
 _HEARTBEAT_INTERVAL = 30
@@ -36,7 +42,16 @@ async def log_streamer(request_id: Optional[str],
                        plain_logs: bool = False,
                        tail: Optional[int] = None,
                        follow: bool = True) -> AsyncGenerator[str, None]:
-    """Streams the logs of a request."""
+    """Streams the logs of a request.
+    Args:
+        request_id: The request ID to check whether the log tailing process
+            should be stopped.
+        log_path: The path to the log file.
+        plain_logs: Whether to show plain logs.
+        tail: The number of lines to tail. If None, tail the whole file.
+        follow: Whether to follow the log file.
+    """
     if request_id is not None:
         status_msg = rich_utils.EncodedStatusMessage(
@@ -80,65 +95,106 @@ async def log_streamer(request_id: Optional[str],
         if show_request_waiting_spinner:
             yield status_msg.stop()
-    # Find last n lines of the log file. Do not read the whole file into memory.
     async with aiofiles.open(log_path, 'rb') as f:
-        if tail is not None:
-            # TODO(zhwu): this will include the control lines for rich status,
-            # which may not lead to exact tail lines when showing on the client
-            # side.
-            lines: Deque[str] = collections.deque(maxlen=tail)
-            async for line_str in _yield_log_file_with_payloads_skipped(f):
-                lines.append(line_str)
-            for line_str in lines:
-                yield line_str
-        last_heartbeat_time = asyncio.get_event_loop().time()
+        async for chunk in _tail_log_file(f, request_id, plain_logs, tail,
+                                          follow):
+            yield chunk
+async def _tail_log_file(f: aiofiles.threadpool.binary.AsyncBufferedReader,
+                         request_id: Optional[str] = None,
+                         plain_logs: bool = False,
+                         tail: Optional[int] = None,
+                         follow: bool = True) -> AsyncGenerator[str, None]:
+    """Tail the opened log file, buffer the lines and flush in chunks."""
+    if tail is not None:
+        # Find last n lines of the log file. Do not read the whole file into
+        # memory.
+        # TODO(zhwu): this will include the control lines for rich status,
+        # which may not lead to exact tail lines when showing on the client
+        # side.
+        lines: Deque[str] = collections.deque(maxlen=tail)
+        async for line_str in _yield_log_file_with_payloads_skipped(f):
+            lines.append(line_str)
+        for line_str in lines:
+            yield line_str
-        while True:
-            # Sleep 0 to yield control to allow other coroutines to run,
-            # while keeps the loop tight to make log stream responsive.
-            await asyncio.sleep(0)
-            line: Optional[bytes] = await f.readline()
-            if not line:
-                if request_id is not None:
-                    request_task = requests_lib.get_request(request_id)
-                    if request_task.status > requests_lib.RequestStatus.RUNNING:
-                        if (request_task.status ==
-                                requests_lib.RequestStatus.CANCELLED):
-                            yield (f'{request_task.name!r} request {request_id}'
-                                   ' cancelled\n')
-                        break
-                if not follow:
+    last_heartbeat_time = asyncio.get_event_loop().time()
+    # Buffer the lines in memory and flush them in chunks to improve log
+    # tailing throughput.
+    buffer: List[str] = []
+    buffer_bytes = 0
+    last_flush_time = asyncio.get_event_loop().time()
+    async def flush_buffer() -> AsyncGenerator[str, None]:
+        nonlocal buffer, buffer_bytes, last_flush_time
+        if buffer:
+            yield ''.join(buffer)
+            buffer.clear()
+            buffer_bytes = 0
+            last_flush_time = asyncio.get_event_loop().time()
+    while True:
+        # Sleep 0 to yield control to allow other coroutines to run,
+        # while keeps the loop tight to make log stream responsive.
+        await asyncio.sleep(0)
+        current_time = asyncio.get_event_loop().time()
+        # Flush the buffer when it is not empty and the buffer is full or the
+        # flush timeout is reached.
+        if buffer and (buffer_bytes >= _BUFFER_SIZE or
+                       (current_time - last_flush_time) >= _BUFFER_TIMEOUT):
+            async for chunk in flush_buffer():
+                yield chunk
+        line: Optional[bytes] = await f.readline()
+        if not line:
+            if request_id is not None:
+                request_task = requests_lib.get_request(request_id)
+                if request_task.status > requests_lib.RequestStatus.RUNNING:
+                    if (request_task.status ==
+                            requests_lib.RequestStatus.CANCELLED):
+                        buffer.append(
+                            f'{request_task.name!r} request {request_id}'
+                            ' cancelled\n')
                     break
+            if not follow:
+                break
+            if current_time - last_heartbeat_time >= _HEARTBEAT_INTERVAL:
+                # Currently just used to keep the connection busy, refer to
+                # https://github.com/skypilot-org/skypilot/issues/5750 for
+                # more details.
+                buffer.append(
+                    message_utils.encode_payload(
+                        rich_utils.Control.HEARTBEAT.encode('')))
+                last_heartbeat_time = current_time
+            # Sleep shortly to avoid storming the DB and CPU, this has
+            # little impact on the responsivness here since we are waiting
+            # for a new line to come in.
+            await asyncio.sleep(0.1)
+            continue
-                current_time = asyncio.get_event_loop().time()
-                if current_time - last_heartbeat_time >= _HEARTBEAT_INTERVAL:
-                    # Currently just used to keep the connection busy, refer to
-                    # https://github.com/skypilot-org/skypilot/issues/5750 for
-                    # more details.
-                    yield message_utils.encode_payload(
-                        rich_utils.Control.HEARTBEAT.encode(''))
-                    last_heartbeat_time = current_time
-                # Sleep shortly to avoid storming the DB and CPU, this has
-                # little impact on the responsivness here since we are waiting
-                # for a new line to come in.
-                await asyncio.sleep(0.1)
+        # Refresh the heartbeat time, this is a trivial optimization for
+        # performance but it helps avoid unnecessary heartbeat strings
+        # being printed when the client runs in an old version.
+        last_heartbeat_time = asyncio.get_event_loop().time()
+        line_str = line.decode('utf-8')
+        if plain_logs:
+            is_payload, line_str = message_utils.decode_payload(
+                line_str, raise_for_mismatch=False)
+            # TODO(aylei): implement heartbeat mechanism for plain logs,
+            # sending invisible characters might be okay.
+            if is_payload:
                 continue
+        buffer.append(line_str)
+        buffer_bytes += len(line_str.encode('utf-8'))
-            # Refresh the heartbeat time, this is a trivial optimization for
-            # performance but it helps avoid unnecessary heartbeat strings
-            # being printed when the client runs in an old version.
-            last_heartbeat_time = asyncio.get_event_loop().time()
-            line_str = line.decode('utf-8')
-            if plain_logs:
-                is_payload, line_str = message_utils.decode_payload(
-                    line_str, raise_for_mismatch=False)
-                # TODO(aylei): implement heartbeat mechanism for plain logs,
-                # sending invisible characters might be okay.
-                if is_payload:
-                    continue
-            yield line_str
+    # Flush remaining lines in the buffer.
+    async for chunk in flush_buffer():
+        yield chunk
 def stream_response(

skypilot-nightly 1.0.0.dev20250607__py3-none-any.whl → 1.0.0.dev20250610__py3-none-any.whl

skypilot-nightly 1.0.0.dev20250607py3-none-any.whl → 1.0.0.dev20250610py3-none-any.whl