PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250408__py3-none-any.whl → 1.0.0.dev20250411__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250408py3-none-any.whl → 1.0.0.dev20250411py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

sky/__init__.py +2 -2
sky/adaptors/azure.py +1 -1
sky/adaptors/nebius.py +5 -27
sky/backends/backend.py +9 -7
sky/backends/cloud_vm_ray_backend.py +7 -7
sky/backends/local_docker_backend.py +3 -3
sky/client/common.py +4 -2
sky/client/sdk.py +58 -26
sky/cloud_stores.py +0 -4
sky/clouds/do.py +4 -5
sky/clouds/gcp.py +5 -3
sky/clouds/nebius.py +22 -12
sky/clouds/service_catalog/data_fetchers/fetch_ibm.py +1 -2
sky/clouds/service_catalog/gcp_catalog.py +37 -10
sky/core.py +6 -6
sky/data/data_utils.py +5 -9
sky/data/mounting_utils.py +1 -1
sky/data/storage.py +25 -31
sky/data/storage_utils.py +27 -18
sky/execution.py +11 -4
sky/jobs/client/sdk.py +5 -0
sky/jobs/server/server.py +5 -1
sky/optimizer.py +1 -2
sky/provision/do/utils.py +19 -16
sky/provision/gcp/config.py +30 -20
sky/serve/client/sdk.py +6 -0
sky/server/common.py +16 -1
sky/server/constants.py +5 -0
sky/setup_files/dependencies.py +1 -1
sky/skylet/log_lib.py +4 -0
sky/skypilot_config.py +19 -30
sky/task.py +27 -7
sky/utils/schemas.py +25 -7
{skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/METADATA +2 -2
{skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/RECORD +39 -39
{skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/WHEEL +0 -0
{skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/entry_points.txt +0 -0
{skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/licenses/LICENSE +0 -0
{skypilot_nightly-1.0.0.dev20250408.dist-info → skypilot_nightly-1.0.0.dev20250411.dist-info}/top_level.txt +0 -0

sky/clouds/nebius.py CHANGED Viewed

@@ -24,18 +24,28 @@ _CREDENTIAL_FILES = [
 _INDENT_PREFIX = '    '
-def nebius_profile_in_aws_cred() -> bool:
-    """Checks if Nebius Object Storage profile is set in aws credentials."""
-    profile_path = os.path.expanduser('~/.aws/credentials')
-    nebius_profile_exists = False
-    if os.path.isfile(profile_path):
-        with open(profile_path, 'r', encoding='utf-8') as file:
+def nebius_profile_in_aws_cred_and_config() -> bool:
+    """Checks if Nebius Object Storage profile is set in aws credentials
+    and profile."""
+    credentials_path = os.path.expanduser('~/.aws/credentials')
+    nebius_profile_exists_in_credentials = False
+    if os.path.isfile(credentials_path):
+        with open(credentials_path, 'r', encoding='utf-8') as file:
             for line in file:
                 if f'[{nebius.NEBIUS_PROFILE_NAME}]' in line:
-                    nebius_profile_exists = True
+                    nebius_profile_exists_in_credentials = True
+    config_path = os.path.expanduser('~/.aws/config')
+    nebius_profile_exists_in_config = False
+    if os.path.isfile(config_path):
+        with open(config_path, 'r', encoding='utf-8') as file:
+            for line in file:
+                if f'[profile {nebius.NEBIUS_PROFILE_NAME}]' in line:
+                    nebius_profile_exists_in_config = True
-    return nebius_profile_exists
+    return (nebius_profile_exists_in_credentials and
+            nebius_profile_exists_in_config)
 @registry.CLOUD_REGISTRY.register
@@ -308,12 +318,12 @@ class Nebius(clouds.Cloud):
             with a string on unset credential.
         """
         hints = None
-        if not nebius_profile_in_aws_cred():
+        if not nebius_profile_in_aws_cred_and_config():
             hints = (f'[{nebius.NEBIUS_PROFILE_NAME}] profile '
                      'is not set in ~/.aws/credentials.')
         if hints:
             hints += ' Run the following commands:'
-            if not nebius_profile_in_aws_cred():
+            if not nebius_profile_in_aws_cred_and_config():
                 hints += (
                     f'\n{_INDENT_PREFIX}  $ pip install boto3'
                     f'\n{_INDENT_PREFIX}  $ aws configure --profile nebius')
@@ -329,7 +339,7 @@ class Nebius(clouds.Cloud):
             for filename in _CREDENTIAL_FILES
         }
         credential_file_mounts['~/.aws/credentials'] = '~/.aws/credentials'
+        credential_file_mounts['~/.aws/config'] = '~/.aws/config'
         return credential_file_mounts
     @classmethod

sky/clouds/service_catalog/data_fetchers/fetch_ibm.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""A script that generates the Lambda Cloud catalog.
+"""A script that generates the IBM Cloud catalog.
 Usage:
     python fetch_ibm.py [-h] [--api-key API_KEY]
@@ -19,7 +19,6 @@ import yaml
 TOKEN_ENDPOINT = 'https://iam.cloud.ibm.com/identity/token'
 REGIONS_ENDPOINT = f'https://us-south.iaas.cloud.ibm.com/v1/regions?version={datetime.today().strftime("%Y-%m-%d")}&generation=2'  # pylint: disable=line-too-long
-ENDPOINT = 'https://cloud.lambdalabs.com/api/v1/instance-types'
 DEFAULT_IBM_CREDENTIALS_PATH = os.path.expanduser('~/.ibm/credentials.yaml')

sky/clouds/service_catalog/gcp_catalog.py CHANGED Viewed

@@ -106,6 +106,16 @@ _ACC_INSTANCE_TYPE_DICTS = {
         8: ['a3-megagpu-8g'],
     }
 }
+# Enable GPU type inference from instance types
+_INSTANCE_TYPE_TO_ACC = {
+    instance_type: {
+        acc_name: acc_count
+    } for acc_name, acc_count_to_instance_type in
+    _ACC_INSTANCE_TYPE_DICTS.items()
+    for acc_count, instance_types in acc_count_to_instance_type.items()
+    for instance_type in instance_types
+}
+GCP_ACC_INSTANCE_TYPES = list(_INSTANCE_TYPE_TO_ACC.keys())
 # Number of CPU cores per GPU based on the AWS setting.
 # GCP A100 has its own instance type mapping.
@@ -270,6 +280,26 @@ def get_default_instance_type(
                                                       memory_gb_or_ratio)
+def get_accelerators_from_instance_type(
+        instance_type: str) -> Optional[Dict[str, int]]:
+    """Infer the GPU type from the instance type.
+    This inference logic is GCP-specific. Unlike other clouds, we don't call
+    the internal implementation defined in common.py.
+    Args:
+        instance_type: the instance type to use.
+    Returns:
+        A dictionary mapping from the accelerator name to the accelerator count.
+    """
+    if instance_type in GCP_ACC_INSTANCE_TYPES:
+        return _INSTANCE_TYPE_TO_ACC[instance_type]
+    else:
+        # General CPU instance types don't come with pre-attached accelerators.
+        return None
 def get_instance_type_for_accelerator(
         acc_name: str,
         acc_count: int,
@@ -528,16 +558,13 @@ def check_accelerator_attachable_to_host(instance_type: str,
             attached to the host.
     """
     if accelerators is None:
-        for acc_name, val in _ACC_INSTANCE_TYPE_DICTS.items():
-            if instance_type in sum(val.values(), []):
-                # NOTE: While it is allowed to use A2/G2 VMs as CPU-only nodes,
-                # we exclude this case as it is uncommon and undesirable.
-                with ux_utils.print_exception_no_traceback():
-                    raise exceptions.ResourcesMismatchError(
-                        f'{instance_type} instance types should be used with '
-                        f'{acc_name} GPUs. Either use other instance types or '
-                        f'specify the accelerators as {acc_name}.')
-        return
+        if instance_type in GCP_ACC_INSTANCE_TYPES:
+            # Infer the GPU type from the instance type
+            accelerators = _INSTANCE_TYPE_TO_ACC[instance_type]
+        else:
+            # Skip the following checks if instance_type is a general CPU
+            # instance without accelerators
+            return
     acc = list(accelerators.items())
     assert len(acc) == 1, acc

sky/core.py CHANGED Viewed

@@ -372,12 +372,12 @@ def _start(
     with dag_lib.Dag():
         dummy_task = task_lib.Task().set_resources(handle.launched_resources)
         dummy_task.num_nodes = handle.launched_nodes
-    handle = backend.provision(dummy_task,
-                               to_provision=handle.launched_resources,
-                               dryrun=False,
-                               stream_logs=True,
-                               cluster_name=cluster_name,
-                               retry_until_up=retry_until_up)
+    (handle, _) = backend.provision(dummy_task,
+                                    to_provision=handle.launched_resources,
+                                    dryrun=False,
+                                    stream_logs=True,
+                                    cluster_name=cluster_name,
+                                    retry_until_up=retry_until_up)
     storage_mounts = backend.get_storage_mounts_metadata(handle.cluster_name)
     # Passing all_file_mounts as None ensures the local source set in Storage
     # to not redundantly sync source to the bucket.

sky/data/data_utils.py CHANGED Viewed

@@ -322,14 +322,9 @@ def create_r2_client(region: str = 'auto') -> Client:
     return cloudflare.client('s3', region)
-def create_nebius_client(region: Optional[str]) -> Client:
-    """Helper method that connects to Boto3 client for Nebius Object Storage
-    Args:
-      region: str; Region for Nebius Object Storage
-    """
-    region = region if region is not None else nebius.DEFAULT_REGION
-    return nebius.client('s3', region)
+def create_nebius_client() -> Client:
+    """Helper method that connects to Boto3 client for Nebius Object Storage"""
+    return nebius.client('s3')
 def verify_r2_bucket(name: str) -> bool:
@@ -566,7 +561,8 @@ def run_upload_cli(command: str, access_denied_message: str, bucket_name: str,
         require_outputs=True,
         # We need to use bash as some of the cloud commands uses bash syntax,
         # such as [[ ... ]]
-        executable='/bin/bash')
+        executable='/bin/bash',
+        log_cmd=True)
     if access_denied_message in stderr:
         with ux_utils.print_exception_no_traceback():
             raise PermissionError('Failed to upload files to '

sky/data/mounting_utils.py CHANGED Viewed

@@ -64,8 +64,8 @@ def get_s3_mount_cmd(bucket_name: str,
 def get_nebius_mount_cmd(nebius_profile_name: str,
-                         endpoint_url: str,
                          bucket_name: str,
+                         endpoint_url: str,
                          mount_path: str,
                          _bucket_sub_path: Optional[str] = None) -> str:
     """Returns a command to install Nebius mount utility goofys."""

sky/data/storage.py CHANGED Viewed

@@ -1616,9 +1616,25 @@ class S3Store(AbstractStore):
             # we exclude .git directory from the sync
             excluded_list = storage_utils.get_excluded_files(src_dir_path)
             excluded_list.append('.git/*')
+            # Process exclusion patterns to make them work correctly with aws
+            # s3 sync
+            processed_excludes = []
+            for excluded_path in excluded_list:
+                # Check if the path is a directory exclusion pattern
+                # For AWS S3 sync, directory patterns need to end with "/**" to
+                # exclude all contents
+                if (excluded_path.endswith('/') or os.path.isdir(
+                        os.path.join(src_dir_path, excluded_path.rstrip('/')))):
+                    # Remove any trailing slash and add '/*' to exclude all
+                    # contents
+                    processed_excludes.append(f'{excluded_path.rstrip("/")}/*')
+                else:
+                    processed_excludes.append(excluded_path)
             excludes = ' '.join([
                 f'--exclude {shlex.quote(file_name)}'
-                for file_name in excluded_list
+                for file_name in processed_excludes
             ])
             src_dir_path = shlex.quote(src_dir_path)
             sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
@@ -4676,7 +4692,6 @@ class NebiusStore(AbstractStore):
                  _bucket_sub_path: Optional[str] = None):
         self.client: 'boto3.client.Client'
         self.bucket: 'StorageHandle'
-        self.region = region if region is not None else nebius.DEFAULT_REGION
         super().__init__(name, source, region, is_sky_managed,
                          sync_on_reconstruction, _bucket_sub_path)
@@ -4749,7 +4764,7 @@ class NebiusStore(AbstractStore):
           StorageBucketGetError: If fetching existing bucket fails
           StorageInitError: If general initialization fails.
         """
-        self.client = data_utils.create_nebius_client(self.region)
+        self.client = data_utils.create_nebius_client()
         self.bucket, is_new_bucket = self._get_bucket()
         if self.is_sky_managed is None:
             # If is_sky_managed is not specified, then this is a new storage
@@ -4846,12 +4861,10 @@ class NebiusStore(AbstractStore):
                 f'--include {shlex.quote(file_name)}'
                 for file_name in file_names
             ])
-            endpoint_url = nebius.create_endpoint(self.region)
             base_dir_path = shlex.quote(base_dir_path)
             sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
                             f'{includes} {base_dir_path} '
                             f's3://{self.name}{sub_path} '
-                            f'--endpoint={endpoint_url} '
                             f'--profile={nebius.NEBIUS_PROFILE_NAME}')
             return sync_command
@@ -4863,12 +4876,10 @@ class NebiusStore(AbstractStore):
                 f'--exclude {shlex.quote(file_name)}'
                 for file_name in excluded_list
             ])
-            endpoint_url = nebius.create_endpoint(self.region)
             src_dir_path = shlex.quote(src_dir_path)
             sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
                             f'{src_dir_path} '
                             f's3://{self.name}{sub_path}/{dest_dir_name} '
-                            f'--endpoint={endpoint_url} '
                             f'--profile={nebius.NEBIUS_PROFILE_NAME}')
             return sync_command
@@ -4927,7 +4938,6 @@ class NebiusStore(AbstractStore):
         """
         nebius_s = nebius.resource('s3')
         bucket = nebius_s.Bucket(self.name)
-        endpoint_url = nebius.create_endpoint(self.region)
         try:
             # Try Public bucket case.
             # This line does not error out if the bucket is an external public
@@ -4942,7 +4952,6 @@ class NebiusStore(AbstractStore):
             # user.
             if error_code == '403':
                 command = (f'aws s3 ls s3://{self.name} '
-                           f'--endpoint={endpoint_url} '
                            f'--profile={nebius.NEBIUS_PROFILE_NAME}')
                 with ux_utils.print_exception_no_traceback():
                     raise exceptions.StorageBucketGetError(
@@ -4954,7 +4963,7 @@ class NebiusStore(AbstractStore):
                 raise exceptions.StorageBucketGetError(
                     'Attempted to use a non-existent bucket as a source: '
                     f'{self.source}. Consider using `aws s3 ls '
-                    f's3://{self.name} --endpoint={endpoint_url}'
+                    f's3://{self.name} '
                     f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
         # If bucket cannot be found in both private and public settings,
@@ -4962,7 +4971,7 @@ class NebiusStore(AbstractStore):
         # Store object is being reconstructed for deletion or re-mount with
         # sky start, and error is raised instead.
         if self.sync_on_reconstruction:
-            bucket = self._create_nebius_bucket(self.name, self.region)
+            bucket = self._create_nebius_bucket(self.name)
             return bucket, True
         else:
             # Raised when Storage object is reconstructed for sky storage
@@ -4991,38 +5000,27 @@ class NebiusStore(AbstractStore):
           mount_path: str; Path to mount the bucket to.
         """
         install_cmd = mounting_utils.get_s3_mount_install_cmd()
-        endpoint_url = nebius.create_endpoint(self.region)
         nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
+        endpoint_url = self.client.meta.endpoint_url
         mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
-                                                        endpoint_url,
                                                         self.bucket.name,
+                                                        endpoint_url,
                                                         mount_path,
                                                         self._bucket_sub_path)
         return mounting_utils.get_mounting_command(mount_path, install_cmd,
                                                    mount_cmd)
-    def _create_nebius_bucket(self,
-                              bucket_name: str,
-                              region='auto') -> StorageHandle:
-        """Creates S3 bucket with specific name in specific region
+    def _create_nebius_bucket(self, bucket_name: str) -> StorageHandle:
+        """Creates S3 bucket with specific name
         Args:
           bucket_name: str; Name of bucket
-          region: str; Region name, e.g. us-west-1, us-east-2
         Raises:
           StorageBucketCreateError: If bucket creation fails.
         """
         nebius_client = self.client
         try:
-            if region is None:
-                nebius_client.create_bucket(Bucket=bucket_name)
-            else:
-                location = {'LocationConstraint': region}
-                nebius_client.create_bucket(Bucket=bucket_name,
-                                            CreateBucketConfiguration=location)
-                logger.info(f'  {colorama.Style.DIM}Created Nebius bucket '
-                            f'{bucket_name!r} in {region}'
-                            f'{colorama.Style.RESET_ALL}')
+            nebius_client.create_bucket(Bucket=bucket_name)
         except aws.botocore_exceptions().ClientError as e:
             with ux_utils.print_exception_no_traceback():
                 raise exceptions.StorageBucketCreateError(
@@ -5070,9 +5068,7 @@ class NebiusStore(AbstractStore):
         # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
         # The fastest way to delete is to run `aws s3 rb --force`,
         # which removes the bucket by force.
-        endpoint_url = nebius.create_endpoint(self.region)
         remove_command = (f'aws s3 rb s3://{bucket_name} --force '
-                          f'--endpoint {endpoint_url} '
                           f'--profile={nebius.NEBIUS_PROFILE_NAME}')
         success = self._execute_nebius_remove_command(
@@ -5094,10 +5090,8 @@ class NebiusStore(AbstractStore):
     def _delete_nebius_bucket_sub_path(self, bucket_name: str,
                                        sub_path: str) -> bool:
         """Deletes the sub path from the bucket."""
-        endpoint_url = nebius.create_endpoint(self.region)
         remove_command = (
             f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
-            f'--endpoint {endpoint_url} '
             f'--profile={nebius.NEBIUS_PROFILE_NAME}')
         return self._execute_nebius_remove_command(
             remove_command, bucket_name, f'Removing objects from '

sky/data/storage_utils.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import pathlib
 import shlex
 import subprocess
-from typing import Any, Dict, List, Optional, TextIO, Union
+from typing import Any, Dict, List, Optional, Set, TextIO, Union
 import warnings
 import zipfile
@@ -71,7 +71,7 @@ def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
     """List files and patterns ignored by the .skyignore file
     in the given source directory.
     """
-    excluded_list: List[str] = []
+    excluded_list: Set[str] = set()
     expand_src_dir_path = os.path.expanduser(src_dir_path)
     skyignore_path = os.path.join(expand_src_dir_path,
                                   constants.SKY_IGNORE_FILE)
@@ -95,12 +95,12 @@ def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
                     for i in range(len(matching_files)):
                         matching_files[i] = os.path.relpath(
                             matching_files[i], expand_src_dir_path)
-                    excluded_list.extend(matching_files)
+                    excluded_list.update(matching_files)
     except IOError as e:
         logger.warning(f'Error reading {skyignore_path}: '
                        f'{common_utils.format_exception(e, use_bracket=True)}')
-    return excluded_list
+    return list(excluded_list)
 def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
@@ -111,8 +111,8 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
     This will also be run for all submodules under the src_dir_path.
     Returns:
-        List[str] containing files and patterns to be ignored.  Some of the
-        patterns include, **/mydir/*.txt, !myfile.log, or file-*/.
+        List[str] containing files and folders to be ignored. There won't be any
+        patterns.
     """
     expand_src_dir_path = os.path.expanduser(src_dir_path)
@@ -210,10 +210,6 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
                 return []
             to_be_excluded = os.path.join(repo, item)
-            if item.endswith('/'):
-                # aws s3 sync and gsutil rsync require * to exclude
-                # files/dirs under the specified directory.
-                to_be_excluded += '*'
             excluded_list.append(to_be_excluded)
@@ -223,11 +219,21 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
 def get_excluded_files(src_dir_path: str) -> List[str]:
     # TODO: this could return a huge list of files,
     # should think of ways to optimize.
-    """List files and directories to be excluded."""
+    """List files and directories to be excluded.
+    Args:
+        src_dir_path (str): The path to the source directory.
+    Returns:
+        A list of relative paths to files and directories to be excluded from
+        the source directory.
+    """
     expand_src_dir_path = os.path.expanduser(src_dir_path)
     skyignore_path = os.path.join(expand_src_dir_path,
                                   constants.SKY_IGNORE_FILE)
     # Fail fast if the source is a file.
+    if not os.path.exists(expand_src_dir_path):
+        raise ValueError(f'{src_dir_path} does not exist.')
     if os.path.isfile(expand_src_dir_path):
         raise ValueError(f'{src_dir_path} is a file, not a directory.')
     if os.path.exists(skyignore_path):
@@ -235,12 +241,14 @@ def get_excluded_files(src_dir_path: str) -> List[str]:
                      f'Excluded files to sync to cluster based on '
                      f'{constants.SKY_IGNORE_FILE}.'
                      f'{colorama.Style.RESET_ALL}')
-        return get_excluded_files_from_skyignore(src_dir_path)
-    logger.debug(f'  {colorama.Style.DIM}'
-                 f'Excluded files to sync to cluster based on '
-                 f'{constants.GIT_IGNORE_FILE}.'
-                 f'{colorama.Style.RESET_ALL}')
-    return get_excluded_files_from_gitignore(src_dir_path)
+        excluded_paths = get_excluded_files_from_skyignore(src_dir_path)
+    else:
+        logger.debug(f'  {colorama.Style.DIM}'
+                     f'Excluded files to sync to cluster based on '
+                     f'{constants.GIT_IGNORE_FILE}.'
+                     f'{colorama.Style.RESET_ALL}')
+        excluded_paths = get_excluded_files_from_gitignore(src_dir_path)
+    return excluded_paths
 def zip_files_and_folders(items: List[str],
@@ -277,7 +285,8 @@ def zip_files_and_folders(items: List[str],
                     zipf.write(item)
                 elif os.path.isdir(item):
                     excluded_files = set([
-                        os.path.join(item, f) for f in get_excluded_files(item)
+                        os.path.join(item, f.rstrip('/'))
+                        for f in get_excluded_files(item)
                     ])
                     for root, dirs, files in os.walk(item, followlinks=False):
                         # Modify dirs in-place to control os.walk()'s traversal

sky/execution.py CHANGED Viewed

@@ -159,9 +159,9 @@ def _execute(
       no_setup: bool; whether to skip setup commands or not when (re-)launching.
       clone_disk_from: Optional[str]; if set, clone the disk from the specified
         cluster.
-      skip_unecessary_provisioning: bool; if True, compare the calculated
+      skip_unnecessary_provisioning: bool; if True, compare the calculated
         cluster config to the current cluster's config. If they match, shortcut
-        provisioning even if we have Stage.PROVISION.
+        provisioning and setup, even if we have Stage.PROVISION and Stage.SETUP.
     Returns:
       job_id: Optional[int]; the job ID of the submitted job. None if the
@@ -303,12 +303,13 @@ def _execute(
         task.sync_storage_mounts()
     try:
+        provisioning_skipped = False
         if Stage.PROVISION in stages:
             assert handle is None or skip_unnecessary_provisioning, (
                 'Provisioning requested, but handle is already set. PROVISION '
                 'should be excluded from stages or '
                 'skip_unecessary_provisioning should be set. ')
-            handle = backend.provision(
+            (handle, provisioning_skipped) = backend.provision(
                 task,
                 task.best_resources,
                 dryrun=dryrun,
@@ -341,7 +342,11 @@ def _execute(
         if no_setup:
             logger.info('Setup commands skipped.')
         elif Stage.SETUP in stages and not dryrun:
-            backend.setup(handle, task, detach_setup=detach_setup)
+            if skip_unnecessary_provisioning and provisioning_skipped:
+                logger.debug('Unnecessary provisioning was skipped, so '
+                             'skipping setup as well.')
+            else:
+                backend.setup(handle, task, detach_setup=detach_setup)
         if Stage.PRE_EXEC in stages and not dryrun:
             if idle_minutes_to_autostop is not None:
@@ -523,6 +528,8 @@ def launch(
                 Stage.PROVISION,
                 Stage.SYNC_WORKDIR,
                 Stage.SYNC_FILE_MOUNTS,
+                # Setup will be skipped if provisioning was skipped.
+                Stage.SETUP,
                 Stage.PRE_EXEC,
                 Stage.EXEC,
                 Stage.DOWN,

sky/jobs/client/sdk.py CHANGED Viewed

@@ -82,6 +82,7 @@ def launch(
         f'{server_common.get_server_url()}/jobs/launch',
         json=json.loads(body.model_dump_json()),
         timeout=(5, None),
+        cookies=server_common.get_api_cookie_jar(),
     )
     return server_common.get_request_id(response)
@@ -138,6 +139,7 @@ def queue(refresh: bool,
         f'{server_common.get_server_url()}/jobs/queue',
         json=json.loads(body.model_dump_json()),
         timeout=(5, None),
+        cookies=server_common.get_api_cookie_jar(),
     )
     return server_common.get_request_id(response=response)
@@ -177,6 +179,7 @@ def cancel(
         f'{server_common.get_server_url()}/jobs/cancel',
         json=json.loads(body.model_dump_json()),
         timeout=(5, None),
+        cookies=server_common.get_api_cookie_jar(),
     )
     return server_common.get_request_id(response=response)
@@ -224,6 +227,7 @@ def tail_logs(name: Optional[str] = None,
         json=json.loads(body.model_dump_json()),
         stream=True,
         timeout=(5, None),
+        cookies=server_common.get_api_cookie_jar(),
     )
     request_id = server_common.get_request_id(response)
     return sdk.stream_response(request_id, response, output_stream)
@@ -267,6 +271,7 @@ def download_logs(
         f'{server_common.get_server_url()}/jobs/download_logs',
         json=json.loads(body.model_dump_json()),
         timeout=(5, None),
+        cookies=server_common.get_api_cookie_jar(),
     )
     job_id_remote_path_dict = sdk.stream_and_get(
         server_common.get_request_id(response))

sky/jobs/server/server.py CHANGED Viewed

@@ -161,7 +161,11 @@ async def dashboard(request: fastapi.Request,
                     response = await client.request('GET',
                                                     dashboard_url,
                                                     timeout=5)
-                break  # Connection successful, proceed with the request
+                if response.is_success:
+                    break  # Connection successful, proceed with the request
+                # Raise an HTTPException here which will be caught by the
+                # following except block to retry with new connection
+                response.raise_for_status()
             except Exception as e:  # pylint: disable=broad-except
                 # We catch all exceptions to gracefully handle unknown
                 # errors and retry or raise an HTTPException to the client.

sky/optimizer.py CHANGED Viewed

@@ -6,6 +6,7 @@ import typing
 from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
 import colorama
+import numpy as np
 import prettytable
 from sky import check as sky_check
@@ -28,12 +29,10 @@ from sky.utils import ux_utils
 if typing.TYPE_CHECKING:
     import networkx as nx
-    import numpy as np
     from sky import dag as dag_lib
 else:
     nx = adaptors_common.LazyImport('networkx')
-    np = adaptors_common.LazyImport('numpy')
 logger = sky_logging.init_logger(__name__)

skypilot-nightly 1.0.0.dev20250408__py3-none-any.whl → 1.0.0.dev20250411__py3-none-any.whl

skypilot-nightly 1.0.0.dev20250408py3-none-any.whl → 1.0.0.dev20250411py3-none-any.whl