PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20251019__py3-none-any.whl → 1.0.0.dev20251022__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20251019py3-none-any.whl → 1.0.0.dev20251022py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (95) hide show

sky/server/stream_utils.py CHANGED Viewed

@@ -25,6 +25,17 @@ logger = sky_logging.init_logger(__name__)
 _BUFFER_SIZE = 8 * 1024  # 8KB
 _BUFFER_TIMEOUT = 0.02  # 20ms
 _HEARTBEAT_INTERVAL = 30
+# If a SHORT request has been stuck in pending for
+# _SHORT_REQUEST_SPINNER_TIMEOUT seconds, we show the waiting spinner
+_SHORT_REQUEST_SPINNER_TIMEOUT = 2
+# If there is an issue during provisioning that causes the cluster to be stuck
+# in INIT state, we use this timeout to break the loop and stop streaming
+# provision logs.
+_PROVISION_LOG_TIMEOUT = 3
+# Maximum time to wait for new log files to appear when streaming worker node
+# provision logs. Worker logs are created sequentially during the provisioning
+# process, so we need to wait for new files to appear.
+_MAX_WAIT_FOR_NEW_LOG_FILES = 3  # seconds
 LONG_REQUEST_POLL_INTERVAL = 1
 DEFAULT_POLL_INTERVAL = 0.1
@@ -45,7 +56,7 @@ async def _yield_log_file_with_payloads_skipped(
 async def log_streamer(
     request_id: Optional[str],
-    log_path: pathlib.Path,
+    log_path: Optional[pathlib.Path] = None,
     plain_logs: bool = False,
     tail: Optional[int] = None,
     follow: bool = True,
@@ -57,7 +68,9 @@ async def log_streamer(
     Args:
         request_id: The request ID to check whether the log tailing process
             should be stopped.
-        log_path: The path to the log file.
+        log_path: The path to the log file or directory containing the log
+        files. If it is a directory, all *.log files in the directory will be
+        streamed.
         plain_logs: Whether to show plain logs.
         tail: The number of lines to tail. If None, tail the whole file.
         follow: Whether to follow the log file.
@@ -66,17 +79,26 @@ async def log_streamer(
     """
     if request_id is not None:
+        start_time = asyncio.get_event_loop().time()
         status_msg = rich_utils.EncodedStatusMessage(
             f'[dim]Checking request: {request_id}[/dim]')
-        request_task = await requests_lib.get_request_async(request_id)
+        request_task = await requests_lib.get_request_async(request_id,
+                                                            fields=[
+                                                                'request_id',
+                                                                'name',
+                                                                'schedule_type',
+                                                                'status',
+                                                                'status_msg'
+                                                            ])
         if request_task is None:
             raise fastapi.HTTPException(
                 status_code=404, detail=f'Request {request_id} not found')
         request_id = request_task.request_id
-        # Do not show the waiting spinner if the request is a fast, non-blocking
-        # request.
+        # By default, do not show the waiting spinner for SHORT requests.
+        # If the request has been stuck in pending for
+        # _SHORT_REQUEST_SPINNER_TIMEOUT seconds, we show the waiting spinner
         show_request_waiting_spinner = (not plain_logs and
                                         request_task.schedule_type
                                         == requests_lib.ScheduleType.LONG)
@@ -89,14 +111,23 @@ async def log_streamer(
                        f'scheduled: {request_id}')
         req_status = request_task.status
         req_msg = request_task.status_msg
+        del request_task
         # Slowly back off the database polling up to every 1 second, to avoid
         # overloading the CPU and DB.
         backoff = common_utils.Backoff(initial_backoff=polling_interval,
                                        max_backoff_factor=10,
                                        multiplier=1.2)
         while req_status < requests_lib.RequestStatus.RUNNING:
+            current_time = asyncio.get_event_loop().time()
+            # Show the waiting spinner for a SHORT request if it has been stuck
+            # in pending for _SHORT_REQUEST_SPINNER_TIMEOUT seconds
+            if not show_request_waiting_spinner and (
+                    current_time - start_time > _SHORT_REQUEST_SPINNER_TIMEOUT):
+                show_request_waiting_spinner = True
+                yield status_msg.init()
+                yield status_msg.start()
             if req_msg is not None:
-                waiting_msg = request_task.status_msg
+                waiting_msg = req_msg
             if show_request_waiting_spinner:
                 yield status_msg.update(f'[dim]{waiting_msg}[/dim]')
             elif plain_logs and waiting_msg != last_waiting_msg:
@@ -119,11 +150,57 @@ async def log_streamer(
         if show_request_waiting_spinner:
             yield status_msg.stop()
-    async with aiofiles.open(log_path, 'rb') as f:
-        async for chunk in _tail_log_file(f, request_id, plain_logs, tail,
-                                          follow, cluster_name,
-                                          polling_interval):
-            yield chunk
+    if log_path is not None and log_path.is_dir():
+        # Track which log files we've already streamed
+        streamed_files = set()
+        no_new_files_count = 0
+        while True:
+            # Get all *.log files in the log_path
+            log_files = sorted(log_path.glob('*.log'))
+            # Filter out already streamed files
+            new_files = [f for f in log_files if f not in streamed_files]
+            if len(new_files) == 0:
+                if not follow:
+                    break
+                # Wait a bit to see if new files appear
+                await asyncio.sleep(0.5)
+                no_new_files_count += 1
+                # Check if we've waited too long for new files
+                if no_new_files_count > _MAX_WAIT_FOR_NEW_LOG_FILES * 2:
+                    break
+                continue
+            # Reset the no-new-files counter when we find new files
+            no_new_files_count = 0
+            for log_file_path in new_files:
+                # Add header before each file (similar to tail -f behavior)
+                header = f'\n==> {log_file_path} <==\n\n'
+                yield header
+                async with aiofiles.open(log_file_path, 'rb') as f:
+                    async for chunk in _tail_log_file(f, request_id, plain_logs,
+                                                      tail, follow,
+                                                      cluster_name,
+                                                      polling_interval):
+                        yield chunk
+                # Mark this file as streamed
+                streamed_files.add(log_file_path)
+            # If not following, break after streaming all current files
+            if not follow:
+                break
+    else:
+        assert log_path is not None, (request_id, log_path)
+        async with aiofiles.open(log_path, 'rb') as f:
+            async for chunk in _tail_log_file(f, request_id, plain_logs, tail,
+                                              follow, cluster_name,
+                                              polling_interval):
+                yield chunk
 async def _tail_log_file(
@@ -197,7 +274,7 @@ async def _tail_log_file(
                     if (req_status.status ==
                             requests_lib.RequestStatus.CANCELLED):
                         request_task = await requests_lib.get_request_async(
-                            request_id)
+                            request_id, fields=['name', 'should_retry'])
                         if request_task.should_retry:
                             buffer.append(
                                 message_utils.encode_payload(
@@ -206,6 +283,7 @@ async def _tail_log_file(
                             buffer.append(
                                 f'{request_task.name!r} request {request_id}'
                                 ' cancelled\n')
+                        del request_task
                     break
             if not follow:
                 # The below checks (cluster status, heartbeat) are not needed
@@ -213,21 +291,24 @@ async def _tail_log_file(
                 break
             # Provision logs pass in cluster_name, check cluster status
             # periodically to see if provisioning is done.
-            if cluster_name is not None and should_check_status:
-                last_status_check_time = current_time
-                cluster_status = await (
-                    global_user_state.get_status_from_cluster_name_async(
-                        cluster_name))
-                if cluster_status is None:
-                    logger.debug(
-                        'Stop tailing provision logs for cluster'
-                        f' status for cluster {cluster_name} not found')
-                    break
-                if cluster_status != status_lib.ClusterStatus.INIT:
-                    logger.debug(f'Stop tailing provision logs for cluster'
-                                 f' {cluster_name} has status {cluster_status} '
-                                 '(not in INIT state)')
+            if cluster_name is not None:
+                if current_time - last_flush_time > _PROVISION_LOG_TIMEOUT:
                     break
+                if should_check_status:
+                    last_status_check_time = current_time
+                    cluster_status = await (
+                        global_user_state.get_status_from_cluster_name_async(
+                            cluster_name))
+                    if cluster_status is None:
+                        logger.debug(
+                            'Stop tailing provision logs for cluster'
+                            f' status for cluster {cluster_name} not found')
+                        break
+                    if cluster_status != status_lib.ClusterStatus.INIT:
+                        logger.debug(
+                            f'Stop tailing provision logs for cluster'
+                            f' {cluster_name} has status {cluster_status} '
+                            '(not in INIT state)')
             if current_time - last_heartbeat_time >= _HEARTBEAT_INTERVAL:
                 # Currently just used to keep the connection busy, refer to
                 # https://github.com/skypilot-org/skypilot/issues/5750 for
@@ -267,28 +348,36 @@ def stream_response_for_long_request(
     request_id: str,
     logs_path: pathlib.Path,
     background_tasks: fastapi.BackgroundTasks,
+    kill_request_on_disconnect: bool = True,
 ) -> fastapi.responses.StreamingResponse:
-    return stream_response(request_id,
-                           logs_path,
-                           background_tasks,
-                           polling_interval=LONG_REQUEST_POLL_INTERVAL)
+    """Stream the logs of a long request."""
+    return stream_response(
+        request_id,
+        logs_path,
+        background_tasks,
+        polling_interval=LONG_REQUEST_POLL_INTERVAL,
+        kill_request_on_disconnect=kill_request_on_disconnect,
+    )
 def stream_response(
     request_id: str,
     logs_path: pathlib.Path,
     background_tasks: fastapi.BackgroundTasks,
-    polling_interval: float = DEFAULT_POLL_INTERVAL
+    polling_interval: float = DEFAULT_POLL_INTERVAL,
+    kill_request_on_disconnect: bool = True,
 ) -> fastapi.responses.StreamingResponse:
-    async def on_disconnect():
-        logger.info(f'User terminated the connection for request '
-                    f'{request_id}')
-        requests_lib.kill_requests([request_id])
+    if kill_request_on_disconnect:
+        async def on_disconnect():
+            logger.info(f'User terminated the connection for request '
+                        f'{request_id}')
+            requests_lib.kill_requests([request_id])
-    # The background task will be run after returning a response.
-    # https://fastapi.tiangolo.com/tutorial/background-tasks/
-    background_tasks.add_task(on_disconnect)
+        # The background task will be run after returning a response.
+        # https://fastapi.tiangolo.com/tutorial/background-tasks/
+        background_tasks.add_task(on_disconnect)
     return fastapi.responses.StreamingResponse(
         log_streamer(request_id, logs_path, polling_interval=polling_interval),

sky/server/uvicorn.py CHANGED Viewed

@@ -46,11 +46,11 @@ except ValueError:
 # TODO(aylei): use decorator to register requests that need to be proactively
 # cancelled instead of hardcoding here.
-_RETRIABLE_REQUEST_NAMES = [
+_RETRIABLE_REQUEST_NAMES = {
     'sky.logs',
     'sky.jobs.logs',
     'sky.serve.logs',
-]
+}
 def add_timestamp_prefix_for_server_logs() -> None:
@@ -151,37 +151,38 @@ class Server(uvicorn.Server):
                 requests_lib.RequestStatus.PENDING,
                 requests_lib.RequestStatus.RUNNING,
             ]
-            reqs = requests_lib.get_request_tasks(
-                req_filter=requests_lib.RequestTaskFilter(status=statuses))
-            if not reqs:
+            requests = [(request_task.request_id, request_task.name)
+                        for request_task in requests_lib.get_request_tasks(
+                            req_filter=requests_lib.RequestTaskFilter(
+                                status=statuses, fields=['request_id', 'name']))
+                       ]
+            if not requests:
                 break
-            logger.info(f'{len(reqs)} on-going requests '
+            logger.info(f'{len(requests)} on-going requests '
                         'found, waiting for them to finish...')
             # Proactively cancel internal requests and logs requests since
             # they can run for infinite time.
-            internal_request_ids = [
+            internal_request_ids = {
                 d.id for d in daemons.INTERNAL_REQUEST_DAEMONS
-            ]
+            }
             if time.time() - start_time > _WAIT_REQUESTS_TIMEOUT_SECONDS:
                 logger.warning('Timeout waiting for on-going requests to '
                                'finish, cancelling all on-going requests.')
-                for req in reqs:
-                    self.interrupt_request_for_retry(req.request_id)
+                for request_id, _ in requests:
+                    self.interrupt_request_for_retry(request_id)
                 break
             interrupted = 0
-            for req in reqs:
-                if req.request_id in internal_request_ids:
-                    self.interrupt_request_for_retry(req.request_id)
-                    interrupted += 1
-                elif req.name in _RETRIABLE_REQUEST_NAMES:
-                    self.interrupt_request_for_retry(req.request_id)
+            for request_id, name in requests:
+                if (name in _RETRIABLE_REQUEST_NAMES or
+                        request_id in internal_request_ids):
+                    self.interrupt_request_for_retry(request_id)
                     interrupted += 1
                 # TODO(aylei): interrupt pending requests to accelerate the
                 # shutdown.
             # If some requests are not interrupted, wait for them to finish,
             # otherwise we just check again immediately to accelerate the
             # shutdown process.
-            if interrupted < len(reqs):
+            if interrupted < len(requests):
                 time.sleep(_WAIT_REQUESTS_INTERVAL_SECONDS)
     def interrupt_request_for_retry(self, request_id: str) -> None:

sky/setup_files/alembic.ini CHANGED Viewed

@@ -98,6 +98,10 @@ version_table = alembic_version_spot_jobs_db
 version_locations = %(here)s/../schemas/db/serve_state
 version_table = alembic_version_serve_state_db
+[sky_config_db]
+version_locations = %(here)s/../schemas/db/skypilot_config
+version_table = alembic_version_sky_config_db
 [post_write_hooks]
 # post_write_hooks defines scripts or Python functions that are run
 # on newly generated revision scripts.  See the documentation for further

sky/skylet/services.py CHANGED Viewed

@@ -408,17 +408,17 @@ class ManagedJobsServiceImpl(managed_jobsv1_pb2_grpc.ManagedJobsServiceServicer
     ) -> managed_jobsv1_pb2.GetJobTableResponse:
         try:
             accessible_workspaces = list(request.accessible_workspaces)
-            job_ids = list(request.job_ids.ids) if request.job_ids else None
+            job_ids = (list(request.job_ids.ids)
+                       if request.HasField('job_ids') else None)
             user_hashes: Optional[List[Optional[str]]] = None
-            if request.user_hashes:
+            if request.HasField('user_hashes'):
                 user_hashes = list(request.user_hashes.hashes)
                 # For backwards compatibility, we show jobs that do not have a
                 # user_hash. TODO: Remove before 0.12.0.
                 if request.show_jobs_without_user_hash:
                     user_hashes.append(None)
-            statuses = list(
-                request.statuses.statuses) if request.statuses else None
+            statuses = (list(request.statuses.statuses)
+                        if request.HasField('statuses') else None)
             job_queue = managed_job_utils.get_managed_job_queue(
                 skip_finished=request.skip_finished,
                 accessible_workspaces=accessible_workspaces,

sky/skypilot_config.py CHANGED Viewed

@@ -64,7 +64,6 @@ from sqlalchemy import orm
 from sqlalchemy.dialects import postgresql
 from sqlalchemy.dialects import sqlite
 from sqlalchemy.ext import declarative
-from sqlalchemy.pool import NullPool
 from sky import exceptions
 from sky import sky_logging
@@ -77,6 +76,7 @@ from sky.utils import schemas
 from sky.utils import ux_utils
 from sky.utils import yaml_utils
 from sky.utils.db import db_utils
+from sky.utils.db import migration_utils
 from sky.utils.kubernetes import config_map_utils
 if typing.TYPE_CHECKING:
@@ -121,7 +121,8 @@ _PROJECT_CONFIG_PATH = '.sky.yaml'
 API_SERVER_CONFIG_KEY = 'api_server_config'
-_DB_USE_LOCK = threading.Lock()
+_SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
+_SQLALCHEMY_ENGINE_LOCK = threading.Lock()
 Base = declarative.declarative_base()
@@ -481,7 +482,7 @@ def safe_reload_config() -> None:
         reload_config()
-def reload_config() -> None:
+def reload_config(init_db: bool = False) -> None:
     internal_config_path = os.environ.get(ENV_VAR_SKYPILOT_CONFIG)
     if internal_config_path is not None:
         # {ENV_VAR_SKYPILOT_CONFIG} is used internally.
@@ -493,7 +494,7 @@ def reload_config() -> None:
         return
     if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
-        _reload_config_as_server()
+        _reload_config_as_server(init_db=init_db)
     else:
         _reload_config_as_client()
@@ -564,7 +565,43 @@ def _reload_config_from_internal_file(internal_config_path: str) -> None:
     _set_loaded_config_path(config_path)
-def _reload_config_as_server() -> None:
+def _create_table(engine: sqlalchemy.engine.Engine):
+    """Initialize the config database with migrations."""
+    migration_utils.safe_alembic_upgrade(
+        engine, migration_utils.SKYPILOT_CONFIG_DB_NAME,
+        migration_utils.SKYPILOT_CONFIG_VERSION)
+def _initialize_and_get_db() -> sqlalchemy.engine.Engine:
+    """Initialize and return the config database engine.
+    This function should only be called by the API Server during initialization.
+    Client-side code should never call this function.
+    """
+    assert os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None, (
+        'initialize_and_get_db() can only be called by the API Server')
+    global _SQLALCHEMY_ENGINE
+    if _SQLALCHEMY_ENGINE is not None:
+        return _SQLALCHEMY_ENGINE
+    with _SQLALCHEMY_ENGINE_LOCK:
+        if _SQLALCHEMY_ENGINE is not None:
+            return _SQLALCHEMY_ENGINE
+        # We only store config in the DB when using Postgres,
+        # so no need to pass in db_name here.
+        engine = db_utils.get_engine(None)
+        # Run migrations if needed
+        _create_table(engine)
+        _SQLALCHEMY_ENGINE = engine
+        return _SQLALCHEMY_ENGINE
+def _reload_config_as_server(init_db: bool = False) -> None:
     # Reset the global variables, to avoid using stale values.
     _set_loaded_config(config_utils.Config())
     _set_loaded_config_path(None)
@@ -580,37 +617,24 @@ def _reload_config_as_server() -> None:
             raise ValueError(
                 'If db config is specified, no other config is allowed')
         logger.debug('retrieving config from database')
-        with _DB_USE_LOCK:
-            dispose_engine = False
-            if db_utils.get_max_connections() == 0:
-                dispose_engine = True
-                sqlalchemy_engine = sqlalchemy.create_engine(db_url,
-                                                             poolclass=NullPool)
-            else:
-                sqlalchemy_engine = db_utils.get_engine('config')
-            db_utils.add_all_tables_to_db_sqlalchemy(Base.metadata,
-                                                     sqlalchemy_engine)
-            def _get_config_yaml_from_db(
-                    key: str) -> Optional[config_utils.Config]:
-                assert sqlalchemy_engine is not None
-                with orm.Session(sqlalchemy_engine) as session:
-                    row = session.query(config_yaml_table).filter_by(
-                        key=key).first()
-                if row:
-                    db_config = config_utils.Config(
-                        yaml_utils.safe_load(row.value))
-                    db_config.pop_nested(('db',), None)
-                    return db_config
-                return None
-            db_config = _get_config_yaml_from_db(API_SERVER_CONFIG_KEY)
-            if db_config:
-                server_config = overlay_skypilot_config(server_config,
-                                                        db_config)
-            # Close the engine to avoid connection leaks
-            if dispose_engine:
-                sqlalchemy_engine.dispose()
+        if init_db:
+            _initialize_and_get_db()
+        def _get_config_yaml_from_db(key: str) -> Optional[config_utils.Config]:
+            assert _SQLALCHEMY_ENGINE is not None
+            with orm.Session(_SQLALCHEMY_ENGINE) as session:
+                row = session.query(config_yaml_table).filter_by(
+                    key=key).first()
+            if row:
+                db_config = config_utils.Config(yaml_utils.safe_load(row.value))
+                db_config.pop_nested(('db',), None)
+                return db_config
+            return None
+        db_config = _get_config_yaml_from_db(API_SERVER_CONFIG_KEY)
+        if db_config:
+            server_config = overlay_skypilot_config(server_config, db_config)
     if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
         logger.debug(f'server config: \n'
                      f'{yaml_utils.dump_yaml_str(dict(server_config))}')
@@ -666,7 +690,7 @@ def loaded_config_path_serialized() -> Optional[str]:
 # Load on import, synchronization is guaranteed by python interpreter.
-reload_config()
+reload_config(init_db=True)
 def loaded() -> bool:
@@ -880,44 +904,32 @@ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
         if new_db_url and new_db_url != existing_db_url:
             raise ValueError('Cannot change db url while server is running')
         if existing_db_url:
-            with _DB_USE_LOCK:
-                dispose_engine = False
-                if db_utils.get_max_connections() == 0:
-                    dispose_engine = True
-                    sqlalchemy_engine = sqlalchemy.create_engine(
-                        existing_db_url, poolclass=NullPool)
-                else:
-                    sqlalchemy_engine = db_utils.get_engine('config')
-                db_utils.add_all_tables_to_db_sqlalchemy(
-                    Base.metadata, sqlalchemy_engine)
-                def _set_config_yaml_to_db(key: str,
-                                           config: config_utils.Config):
-                    assert sqlalchemy_engine is not None
-                    config_str = yaml_utils.dump_yaml_str(dict(config))
-                    with orm.Session(sqlalchemy_engine) as session:
-                        if (sqlalchemy_engine.dialect.name ==
-                                db_utils.SQLAlchemyDialect.SQLITE.value):
-                            insert_func = sqlite.insert
-                        elif (sqlalchemy_engine.dialect.name ==
-                              db_utils.SQLAlchemyDialect.POSTGRESQL.value):
-                            insert_func = postgresql.insert
-                        else:
-                            raise ValueError('Unsupported database dialect')
-                        insert_stmnt = insert_func(config_yaml_table).values(
-                            key=key, value=config_str)
-                        do_update_stmt = insert_stmnt.on_conflict_do_update(
-                            index_elements=[config_yaml_table.c.key],
-                            set_={config_yaml_table.c.value: config_str})
-                        session.execute(do_update_stmt)
-                        session.commit()
-                logger.debug('saving api_server config to db')
-                _set_config_yaml_to_db(API_SERVER_CONFIG_KEY, config)
-                db_updated = True
-                # Close the engine to avoid connection leaks
-                if dispose_engine:
-                    sqlalchemy_engine.dispose()
+            def _set_config_yaml_to_db(key: str, config: config_utils.Config):
+                # reload_config(init_db=True) is called when this module is
+                # imported, so the database engine must already be initialized.
+                assert _SQLALCHEMY_ENGINE is not None
+                config_str = yaml_utils.dump_yaml_str(dict(config))
+                with orm.Session(_SQLALCHEMY_ENGINE) as session:
+                    if (_SQLALCHEMY_ENGINE.dialect.name ==
+                            db_utils.SQLAlchemyDialect.SQLITE.value):
+                        insert_func = sqlite.insert
+                    elif (_SQLALCHEMY_ENGINE.dialect.name ==
+                          db_utils.SQLAlchemyDialect.POSTGRESQL.value):
+                        insert_func = postgresql.insert
+                    else:
+                        raise ValueError('Unsupported database dialect')
+                    insert_stmnt = insert_func(config_yaml_table).values(
+                        key=key, value=config_str)
+                    do_update_stmt = insert_stmnt.on_conflict_do_update(
+                        index_elements=[config_yaml_table.c.key],
+                        set_={config_yaml_table.c.value: config_str})
+                    session.execute(do_update_stmt)
+                    session.commit()
+            logger.debug('saving api_server config to db')
+            _set_config_yaml_to_db(API_SERVER_CONFIG_KEY, config)
+            db_updated = True
     if not db_updated:
         # save to the local file (PVC in Kubernetes, local file otherwise)

sky/ssh_node_pools/server.py CHANGED Viewed

@@ -99,7 +99,7 @@ async def deploy_ssh_node_pool(request: fastapi.Request,
     """Deploy SSH Node Pool using existing ssh_up functionality."""
     try:
         ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=False)
-        executor.schedule_request(
+        await executor.schedule_request_async(
             request_id=request.state.request_id,
             request_name='ssh_up',
             request_body=ssh_up_body,
@@ -124,7 +124,7 @@ async def deploy_ssh_node_pool_general(
         ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
     """Deploys all SSH Node Pools."""
     try:
-        executor.schedule_request(
+        await executor.schedule_request_async(
             request_id=request.state.request_id,
             request_name='ssh_up',
             request_body=ssh_up_body,
@@ -150,7 +150,7 @@ async def down_ssh_node_pool(request: fastapi.Request,
     """Cleans up a SSH Node Pools."""
     try:
         ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=True)
-        executor.schedule_request(
+        await executor.schedule_request_async(
             request_id=request.state.request_id,
             request_name='ssh_down',
             request_body=ssh_up_body,
@@ -178,7 +178,7 @@ async def down_ssh_node_pool_general(
     try:
         # Set cleanup=True for down operation
         ssh_up_body.cleanup = True
-        executor.schedule_request(
+        await executor.schedule_request_async(
             request_id=request.state.request_id,
             request_name='ssh_down',
             request_body=ssh_up_body,

sky/users/permission.py CHANGED Viewed

@@ -14,6 +14,7 @@ from sky import models
 from sky import sky_logging
 from sky.skylet import constants
 from sky.users import rbac
+from sky.utils import annotations
 from sky.utils import common_utils
 from sky.utils.db import db_utils
@@ -254,6 +255,9 @@ class PermissionService:
         with _policy_lock():
             self._load_policy_no_lock()
+    # Right now, not a lot of users are using multiple workspaces,
+    # so 5 should be more than enough.
+    @annotations.lru_cache(scope='request', maxsize=5)
     def check_workspace_permission(self, user_id: str,
                                    workspace_name: str) -> bool:
         """Check workspace permission.

skypilot-nightly 1.0.0.dev20251019__py3-none-any.whl → 1.0.0.dev20251022__py3-none-any.whl

Potentially problematic release.

skypilot-nightly 1.0.0.dev20251019py3-none-any.whl → 1.0.0.dev20251022py3-none-any.whl