PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250814__py3-none-any.whl → 1.0.0.dev20250816__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250814py3-none-any.whl → 1.0.0.dev20250816py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (136) hide show

sky/jobs/utils.py CHANGED Viewed

@@ -85,6 +85,12 @@ _JOB_CANCELLED_MESSAGE = (
 _FINAL_JOB_STATUS_WAIT_TIMEOUT_SECONDS = 40
+class ManagedJobQueueResultType(enum.Enum):
+    """The type of the managed job queue result."""
+    DICT = 'DICT'
+    LIST = 'LIST'
 class UserSignal(enum.Enum):
     """The signal to be sent to the user."""
     CANCEL = 'CANCEL'
@@ -337,9 +343,6 @@ def update_managed_jobs_statuses(job_id: Optional[int] = None):
             if handle is not None:
                 try:
                     if pool is None:
-                        global_user_state.add_cluster_event(
-                            cluster_name, None, 'Cluster was cleaned up.',
-                            global_user_state.ClusterEventType.STATUS_CHANGE)
                         terminate_cluster(cluster_name)
                 except Exception as e:  # pylint: disable=broad-except
                     error_msg = (
@@ -1120,7 +1123,17 @@ def stream_logs(job_id: Optional[int],
     return stream_logs_by_id(job_id, follow, tail)
-def dump_managed_job_queue() -> str:
+def dump_managed_job_queue(
+    skip_finished: bool = False,
+    accessible_workspaces: Optional[List[str]] = None,
+    job_ids: Optional[List[int]] = None,
+    workspace_match: Optional[str] = None,
+    name_match: Optional[str] = None,
+    pool_match: Optional[str] = None,
+    page: Optional[int] = None,
+    limit: Optional[int] = None,
+    user_hashes: Optional[List[Optional[str]]] = None,
+) -> str:
     # Make sure to get all jobs - some logic below (e.g. high priority job
     # detection) requires a full view of the jobs table.
     jobs = managed_job_state.get_managed_jobs()
@@ -1147,6 +1160,31 @@ def dump_managed_job_queue() -> str:
         if priority is not None and priority > highest_blocking_priority:
             highest_blocking_priority = priority
+    if user_hashes:
+        jobs = [
+            job for job in jobs if job.get('user_hash', None) in user_hashes
+        ]
+    if accessible_workspaces:
+        jobs = [
+            job for job in jobs
+            if job.get('workspace', constants.SKYPILOT_DEFAULT_WORKSPACE) in
+            accessible_workspaces
+        ]
+    if skip_finished:
+        # Filter out the finished jobs. If a multi-task job is partially
+        # finished, we will include all its tasks.
+        non_finished_tasks = list(
+            filter(
+                lambda job: not managed_job_state.ManagedJobStatus(job[
+                    'status']).is_terminal(), jobs))
+        non_finished_job_ids = {job['job_id'] for job in non_finished_tasks}
+        jobs = list(
+            filter(lambda job: job['job_id'] in non_finished_job_ids, jobs))
+    if job_ids:
+        jobs = [job for job in jobs if job['job_id'] in job_ids]
+    jobs, total = filter_jobs(jobs, workspace_match, name_match, pool_match,
+                              page, limit)
     for job in jobs:
         end_at = job['end_at']
         if end_at is None:
@@ -1220,12 +1258,96 @@ def dump_managed_job_queue() -> str:
         else:
             job['details'] = None
-    return message_utils.encode_payload(jobs)
+    return message_utils.encode_payload({'jobs': jobs, 'total': total})
+def filter_jobs(
+    jobs: List[Dict[str, Any]],
+    workspace_match: Optional[str],
+    name_match: Optional[str],
+    pool_match: Optional[str],
+    page: Optional[int],
+    limit: Optional[int],
+    user_match: Optional[str] = None,
+    enable_user_match: bool = False,
+) -> Tuple[List[Dict[str, Any]], int]:
+    """Filter jobs based on the given criteria.
+    Args:
+        jobs: List of jobs to filter.
+        workspace_match: Workspace name to filter.
+        name_match: Job name to filter.
+        pool_match: Pool name to filter.
+        page: Page to filter.
+        limit: Limit to filter.
+        user_match: User name to filter.
+        enable_user_match: Whether to enable user match.
+    Returns:
+        List of filtered jobs and total number of jobs.
+    """
+    # TODO(hailong): refactor the whole function including the
+    # `dump_managed_job_queue()` to use DB filtering.
+    def _pattern_matches(job: Dict[str, Any], key: str,
+                         pattern: Optional[str]) -> bool:
+        if pattern is None:
+            return True
+        if key not in job:
+            return False
+        value = job[key]
+        if not value:
+            return False
+        return pattern in str(value)
+    def _handle_page_and_limit(
+        result: List[Dict[str, Any]],
+        page: Optional[int],
+        limit: Optional[int],
+    ) -> List[Dict[str, Any]]:
+        if page is None and limit is None:
+            return result
+        assert page is not None and limit is not None, (page, limit)
+        # page starts from 1
+        start = (page - 1) * limit
+        end = min(start + limit, len(result))
+        return result[start:end]
+    result = []
+    checks = [
+        ('workspace', workspace_match),
+        ('job_name', name_match),
+        ('pool', pool_match),
+    ]
+    if enable_user_match:
+        checks.append(('user_name', user_match))
+    for job in jobs:
+        if not all(
+                _pattern_matches(job, key, pattern) for key, pattern in checks):
+            continue
+        result.append(job)
+    total = len(result)
+    return _handle_page_and_limit(result, page, limit), total
-def load_managed_job_queue(payload: str) -> List[Dict[str, Any]]:
+def load_managed_job_queue(
+    payload: str
+) -> Tuple[List[Dict[str, Any]], int, ManagedJobQueueResultType]:
     """Load job queue from json string."""
-    jobs = message_utils.decode_payload(payload)
+    result = message_utils.decode_payload(payload)
+    result_type = ManagedJobQueueResultType.DICT
+    if isinstance(result, dict):
+        jobs = result['jobs']
+        total = result['total']
+    else:
+        jobs = result
+        total = len(jobs)
+        result_type = ManagedJobQueueResultType.LIST
     for job in jobs:
         job['status'] = managed_job_state.ManagedJobStatus(job['status'])
         if 'user_hash' in job and job['user_hash'] is not None:
@@ -1233,7 +1355,7 @@ def load_managed_job_queue(payload: str) -> List[Dict[str, Any]]:
             # TODO(cooperc): Remove check before 0.12.0.
             user = global_user_state.get_user(job['user_hash'])
             job['user_name'] = user.name if user is not None else None
-    return jobs
+    return jobs, total, result_type
 def _get_job_status_from_tasks(
@@ -1580,9 +1702,35 @@ class ManagedJobCodeGen:
         """)
     @classmethod
-    def get_job_table(cls) -> str:
-        code = textwrap.dedent("""\
-        job_table = utils.dump_managed_job_queue()
+    def get_job_table(
+        cls,
+        skip_finished: bool = False,
+        accessible_workspaces: Optional[List[str]] = None,
+        job_ids: Optional[List[int]] = None,
+        workspace_match: Optional[str] = None,
+        name_match: Optional[str] = None,
+        pool_match: Optional[str] = None,
+        page: Optional[int] = None,
+        limit: Optional[int] = None,
+        user_hashes: Optional[List[Optional[str]]] = None,
+    ) -> str:
+        code = textwrap.dedent(f"""\
+        if managed_job_version < 9:
+            # For backward compatibility, since filtering is not supported
+            # before #6652.
+            # TODO(hailong): Remove compatibility before 0.12.0
+            job_table = utils.dump_managed_job_queue()
+        else:
+            job_table = utils.dump_managed_job_queue(
+                                skip_finished={skip_finished},
+                                accessible_workspaces={accessible_workspaces!r},
+                                job_ids={job_ids!r},
+                                workspace_match={workspace_match!r},
+                                name_match={name_match!r},
+                                pool_match={pool_match!r},
+                                page={page!r},
+                                limit={limit!r},
+                                user_hashes={user_hashes!r})
         print(job_table, flush=True)
         """)
         return cls._build(code)

sky/provision/aws/config.py CHANGED Viewed

@@ -105,13 +105,29 @@ def bootstrap_instances(
                                                        expected_sg_name,
                                                        extended_ip_rules)
         if expected_sg_name != aws_cloud.DEFAULT_SECURITY_GROUP_NAME:
-            # Ensure the default security group is created. This is needed
+            logger.debug('Attempting to create the default security group.')
+            # Attempt to create the default security group. This is needed
             # to enable us to use the default security group to quickly
             # delete the cluster. If the default security group is not created,
             # we will need to block on instance termination to delete the
             # security group.
-            _configure_security_group(ec2, vpc_id,
-                                      aws_cloud.DEFAULT_SECURITY_GROUP_NAME, [])
+            try:
+                _configure_security_group(ec2, vpc_id,
+                                          aws_cloud.DEFAULT_SECURITY_GROUP_NAME,
+                                          [])
+                logger.debug('Default security group created.')
+            except exceptions.NoClusterLaunchedError as e:
+                if 'not authorized to perform: ec2:CreateSecurityGroup' in str(
+                        e):
+                    # User does not have permission to create the default
+                    # security group.
+                    logger.debug('User does not have permission to create '
+                                 'the default security group. '
+                                 f'{e}')
+                    pass
+                else:
+                    raise e
         end_time = time.time()
         elapsed = end_time - start_time
         logger.info(

sky/provision/aws/instance.py CHANGED Viewed

@@ -713,7 +713,8 @@ def terminate_instances(
         instances.terminate()
     else:
         # Case 4: We are managing the non-default sg. The default SG does not
-        # exist. We must block on instance termination.
+        # exist. We must block on instance termination so that we can
+        # delete the security group.
         instances.terminate()
         for instance in instances:
             instance.wait_until_terminated()

sky/provision/kubernetes/instance.py CHANGED Viewed

@@ -1465,7 +1465,8 @@ def query_instances(
                                              target_pod_name)
             reason = (f'{target_pod_name}: {reason}'
                       if reason is not None else None)
-            cluster_status[target_pod_name] = (None, reason)
+            if not non_terminated_only:
+                cluster_status[target_pod_name] = (None, reason)
     return cluster_status

sky/provision/nebius/utils.py CHANGED Viewed

@@ -36,8 +36,10 @@ def retry(func):
 def get_project_by_region(region: str) -> str:
     service = nebius.iam().ProjectServiceClient(nebius.sdk())
-    projects = service.list(nebius.iam().ListProjectsRequest(
-        parent_id=nebius.get_tenant_id())).wait()
+    projects = nebius.sync_call(
+        service.list(
+            nebius.iam().ListProjectsRequest(parent_id=nebius.get_tenant_id()),
+            timeout=nebius.READ_TIMEOUT))
     #  Check is there project if in config
     project_id = skypilot_config.get_effective_region_config(
@@ -56,19 +58,21 @@ def get_or_create_gpu_cluster(name: str, project_id: str, fabric: str) -> str:
     """
     service = nebius.compute().GpuClusterServiceClient(nebius.sdk())
     try:
-        cluster = service.get_by_name(nebius.nebius_common().GetByNameRequest(
-            parent_id=project_id,
-            name=name,
-        )).wait()
-        cluster_id = cluster.metadata.id
-    except nebius.request_error():
-        cluster = service.create(nebius.compute().CreateGpuClusterRequest(
-            metadata=nebius.nebius_common().ResourceMetadata(
+        cluster = nebius.sync_call(
+            service.get_by_name(nebius.nebius_common().GetByNameRequest(
                 parent_id=project_id,
                 name=name,
-            ),
-            spec=nebius.compute().GpuClusterSpec(
-                infiniband_fabric=fabric))).wait()
+            )))
+        cluster_id = cluster.metadata.id
+    except nebius.request_error():
+        cluster = nebius.sync_call(
+            service.create(nebius.compute().CreateGpuClusterRequest(
+                metadata=nebius.nebius_common().ResourceMetadata(
+                    parent_id=project_id,
+                    name=name,
+                ),
+                spec=nebius.compute().GpuClusterSpec(
+                    infiniband_fabric=fabric))))
         cluster_id = cluster.resource_id
     return cluster_id
@@ -78,14 +82,16 @@ def delete_cluster(name: str, region: str) -> None:
     project_id = get_project_by_region(region)
     service = nebius.compute().GpuClusterServiceClient(nebius.sdk())
     try:
-        cluster = service.get_by_name(nebius.nebius_common().GetByNameRequest(
-            parent_id=project_id,
-            name=name,
-        )).wait()
+        cluster = nebius.sync_call(
+            service.get_by_name(nebius.nebius_common().GetByNameRequest(
+                parent_id=project_id,
+                name=name,
+            )))
         cluster_id = cluster.metadata.id
         logger.debug(f'Found GPU Cluster : {cluster_id}.')
-        service.delete(
-            nebius.compute().DeleteGpuClusterRequest(id=cluster_id)).wait()
+        nebius.sync_call(
+            service.delete(
+                nebius.compute().DeleteGpuClusterRequest(id=cluster_id)))
         logger.debug(f'Deleted GPU Cluster : {cluster_id}.')
     except nebius.request_error():
         logger.debug('GPU Cluster does not exist.')
@@ -94,8 +100,10 @@ def delete_cluster(name: str, region: str) -> None:
 def list_instances(project_id: str) -> Dict[str, Dict[str, Any]]:
     """Lists instances associated with API key."""
     service = nebius.compute().InstanceServiceClient(nebius.sdk())
-    result = service.list(
-        nebius.compute().ListInstancesRequest(parent_id=project_id)).wait()
+    result = nebius.sync_call(
+        service.list(
+            nebius.compute().ListInstancesRequest(parent_id=project_id),
+            timeout=nebius.READ_TIMEOUT))
     instances = result
@@ -116,12 +124,13 @@ def list_instances(project_id: str) -> Dict[str, Dict[str, Any]]:
 def stop(instance_id: str) -> None:
     service = nebius.compute().InstanceServiceClient(nebius.sdk())
-    service.stop(nebius.compute().StopInstanceRequest(id=instance_id)).wait()
+    nebius.sync_call(
+        service.stop(nebius.compute().StopInstanceRequest(id=instance_id)))
     retry_count = 0
     while retry_count < nebius.MAX_RETRIES_TO_INSTANCE_STOP:
         service = nebius.compute().InstanceServiceClient(nebius.sdk())
-        instance = service.get(nebius.compute().GetInstanceRequest(
-            id=instance_id,)).wait()
+        instance = nebius.sync_call(
+            service.get(nebius.compute().GetInstanceRequest(id=instance_id,)))
         if instance.status.state.name == 'STOPPED':
             break
         time.sleep(POLL_INTERVAL)
@@ -138,12 +147,13 @@ def stop(instance_id: str) -> None:
 def start(instance_id: str) -> None:
     service = nebius.compute().InstanceServiceClient(nebius.sdk())
-    service.start(nebius.compute().StartInstanceRequest(id=instance_id)).wait()
+    nebius.sync_call(
+        service.start(nebius.compute().StartInstanceRequest(id=instance_id)))
     retry_count = 0
     while retry_count < nebius.MAX_RETRIES_TO_INSTANCE_START:
         service = nebius.compute().InstanceServiceClient(nebius.sdk())
-        instance = service.get(nebius.compute().GetInstanceRequest(
-            id=instance_id,)).wait()
+        instance = nebius.sync_call(
+            service.get(nebius.compute().GetInstanceRequest(id=instance_id,)))
         if instance.status.state.name == 'RUNNING':
             break
         time.sleep(POLL_INTERVAL)
@@ -212,24 +222,26 @@ def launch(cluster_name_on_cloud: str,
                                                        project_id, fabric)
     service = nebius.compute().DiskServiceClient(nebius.sdk())
-    disk = service.create(nebius.compute().CreateDiskRequest(
-        metadata=nebius.nebius_common().ResourceMetadata(
-            parent_id=project_id,
-            name=disk_name,
-        ),
-        spec=nebius.compute().DiskSpec(
-            source_image_family=nebius.compute().SourceImageFamily(
-                image_family=image_family),
-            size_gibibytes=disk_size,
-            type=nebius.compute().DiskSpec.DiskType.NETWORK_SSD,
-        ))).wait()
+    disk = nebius.sync_call(
+        service.create(nebius.compute().CreateDiskRequest(
+            metadata=nebius.nebius_common().ResourceMetadata(
+                parent_id=project_id,
+                name=disk_name,
+            ),
+            spec=nebius.compute().DiskSpec(
+                source_image_family=nebius.compute().SourceImageFamily(
+                    image_family=image_family),
+                size_gibibytes=disk_size,
+                type=nebius.compute().DiskSpec.DiskType.NETWORK_SSD,
+            ))))
     disk_id = disk.resource_id
     retry_count = 0
     while retry_count < nebius.MAX_RETRIES_TO_DISK_CREATE:
-        disk = service.get_by_name(nebius.nebius_common().GetByNameRequest(
-            parent_id=project_id,
-            name=disk_name,
-        )).wait()
+        disk = nebius.sync_call(
+            service.get_by_name(nebius.nebius_common().GetByNameRequest(
+                parent_id=project_id,
+                name=disk_name,
+            )))
         if disk.status.state.name == 'READY':
             break
         logger.debug(f'Waiting for disk {disk_name} to be ready.')
@@ -254,50 +266,53 @@ def launch(cluster_name_on_cloud: str,
                     id=fs['filesystem_id'])))
     service = nebius.vpc().SubnetServiceClient(nebius.sdk())
-    sub_net = service.list(nebius.vpc().ListSubnetsRequest(
-        parent_id=project_id,)).wait()
+    sub_net = nebius.sync_call(
+        service.list(nebius.vpc().ListSubnetsRequest(parent_id=project_id,)))
     service = nebius.compute().InstanceServiceClient(nebius.sdk())
-    service.create(nebius.compute().CreateInstanceRequest(
-        metadata=nebius.nebius_common().ResourceMetadata(
-            parent_id=project_id,
-            name=instance_name,
-        ),
-        spec=nebius.compute().InstanceSpec(
-            gpu_cluster=nebius.compute().InstanceGpuClusterSpec(id=cluster_id,)
-            if cluster_id is not None else None,
-            boot_disk=nebius.compute().AttachedDiskSpec(
-                attach_mode=nebius.compute(
-                ).AttachedDiskSpec.AttachMode.READ_WRITE,
-                existing_disk=nebius.compute().ExistingDisk(id=disk_id)),
-            cloud_init_user_data=user_data,
-            resources=nebius.compute().ResourcesSpec(platform=platform,
-                                                     preset=preset),
-            filesystems=filesystems_spec if filesystems_spec else None,
-            network_interfaces=[
-                nebius.compute().NetworkInterfaceSpec(
-                    subnet_id=sub_net.items[0].metadata.id,
-                    ip_address=nebius.compute().IPAddress(),
-                    name='network-interface-0',
-                    public_ip_address=nebius.compute().PublicIPAddress()
-                    if associate_public_ip_address else None,
-                )
-            ],
-            recovery_policy=nebius.compute().InstanceRecoveryPolicy.FAIL
-            if use_spot else None,
-            preemptible=nebius.compute().PreemptibleSpec(
-                priority=1,
-                on_preemption=nebius.compute(
-                ).PreemptibleSpec.PreemptionPolicy.STOP) if use_spot else None,
-        ))).wait()
+    logger.debug(f'Creating instance {instance_name} in project {project_id}.')
+    nebius.sync_call(
+        service.create(nebius.compute().CreateInstanceRequest(
+            metadata=nebius.nebius_common().ResourceMetadata(
+                parent_id=project_id,
+                name=instance_name,
+            ),
+            spec=nebius.compute().InstanceSpec(
+                gpu_cluster=nebius.compute().InstanceGpuClusterSpec(
+                    id=cluster_id,) if cluster_id is not None else None,
+                boot_disk=nebius.compute().AttachedDiskSpec(
+                    attach_mode=nebius.compute(
+                    ).AttachedDiskSpec.AttachMode.READ_WRITE,
+                    existing_disk=nebius.compute().ExistingDisk(id=disk_id)),
+                cloud_init_user_data=user_data,
+                resources=nebius.compute().ResourcesSpec(platform=platform,
+                                                         preset=preset),
+                filesystems=filesystems_spec if filesystems_spec else None,
+                network_interfaces=[
+                    nebius.compute().NetworkInterfaceSpec(
+                        subnet_id=sub_net.items[0].metadata.id,
+                        ip_address=nebius.compute().IPAddress(),
+                        name='network-interface-0',
+                        public_ip_address=nebius.compute().PublicIPAddress()
+                        if associate_public_ip_address else None,
+                    )
+                ],
+                recovery_policy=nebius.compute().InstanceRecoveryPolicy.FAIL
+                if use_spot else None,
+                preemptible=nebius.compute().PreemptibleSpec(
+                    priority=1,
+                    on_preemption=nebius.compute().PreemptibleSpec.
+                    PreemptionPolicy.STOP) if use_spot else None,
+            ))))
     instance_id = ''
     retry_count = 0
     while retry_count < nebius.MAX_RETRIES_TO_INSTANCE_READY:
         service = nebius.compute().InstanceServiceClient(nebius.sdk())
-        instance = service.get_by_name(nebius.nebius_common().GetByNameRequest(
-            parent_id=project_id,
-            name=instance_name,
-        )).wait()
+        instance = nebius.sync_call(
+            service.get_by_name(nebius.nebius_common().GetByNameRequest(
+                parent_id=project_id,
+                name=instance_name,
+            )))
         if instance.status.state.name == 'STARTING':
             instance_id = instance.metadata.id
             break
@@ -317,19 +332,19 @@ def launch(cluster_name_on_cloud: str,
 def remove(instance_id: str) -> None:
     """Terminates the given instance."""
     service = nebius.compute().InstanceServiceClient(nebius.sdk())
-    result = service.get(
-        nebius.compute().GetInstanceRequest(id=instance_id)).wait()
+    result = nebius.sync_call(
+        service.get(nebius.compute().GetInstanceRequest(id=instance_id)))
     disk_id = result.spec.boot_disk.existing_disk.id
-    service.delete(
-        nebius.compute().DeleteInstanceRequest(id=instance_id)).wait()
+    nebius.sync_call(
+        service.delete(nebius.compute().DeleteInstanceRequest(id=instance_id)))
     retry_count = 0
     # The instance begins deleting and attempts to delete the disk.
     # Must wait until the disk is unlocked and becomes deletable.
     while retry_count < nebius.MAX_RETRIES_TO_DISK_DELETE:
         try:
             service = nebius.compute().DiskServiceClient(nebius.sdk())
-            service.delete(
-                nebius.compute().DeleteDiskRequest(id=disk_id)).wait()
+            nebius.sync_call(
+                service.delete(nebius.compute().DeleteDiskRequest(id=disk_id)))
             break
         except nebius.request_error():
             logger.debug('Waiting for disk deletion.')

sky/provision/provisioner.py CHANGED Viewed

@@ -76,7 +76,8 @@ def _bulk_provision(
     logger.debug(f'\nWaiting for instances of {cluster_name!r} to be ready...')
     rich_utils.force_update_status(
         ux_utils.spinner_message('Launching - Checking instance status',
-                                 str(provision_logging.config.log_path)))
+                                 str(provision_logging.config.log_path),
+                                 cluster_name=str(cluster_name)))
     # AWS would take a very short time (<<1s) updating the state of the
     # instance.
     time.sleep(1)
@@ -462,9 +463,9 @@ def _post_provision_setup(
     docker_config = config_from_yaml.get('docker', {})
     with rich_utils.safe_status(
-            ux_utils.spinner_message(
-                'Launching - Waiting for SSH access',
-                provision_logging.config.log_path)) as status:
+            ux_utils.spinner_message('Launching - Waiting for SSH access',
+                                     provision_logging.config.log_path,
+                                     cluster_name=str(cluster_name))) as status:
         # If on Kubernetes, skip SSH check since the pods are guaranteed to be
         # ready by the provisioner, and we use kubectl instead of SSH to run the
         # commands and rsync on the pods. SSH will still be ready after a while
@@ -493,7 +494,8 @@ def _post_provision_setup(
             status.update(
                 ux_utils.spinner_message(
                     'Launching - Initializing docker container',
-                    provision_logging.config.log_path))
+                    provision_logging.config.log_path,
+                    cluster_name=str(cluster_name)))
             docker_user = instance_setup.initialize_docker(
                 cluster_name.name_on_cloud,
                 docker_config=docker_config,
@@ -541,7 +543,8 @@ def _post_provision_setup(
         runtime_preparation_str = (ux_utils.spinner_message(
             'Preparing SkyPilot runtime ({step}/3 - {step_name})',
-            provision_logging.config.log_path))
+            provision_logging.config.log_path,
+            cluster_name=str(cluster_name)))
         status.update(
             runtime_preparation_str.format(step=1, step_name='initializing'))
         instance_setup.internal_file_mounts(cluster_name.name_on_cloud,
@@ -679,7 +682,8 @@ def _post_provision_setup(
         if logging_agent:
             status.update(
                 ux_utils.spinner_message('Setting up logging agent',
-                                         provision_logging.config.log_path))
+                                         provision_logging.config.log_path,
+                                         cluster_name=str(cluster_name)))
             instance_setup.setup_logging_on_cluster(logging_agent, cluster_name,
                                                     cluster_info,
                                                     ssh_credentials)
@@ -689,7 +693,8 @@ def _post_provision_setup(
     logger.info(
         ux_utils.finishing_message(f'Cluster launched: {cluster_name}.',
-                                   provision_logging.config.log_path))
+                                   provision_logging.config.log_path,
+                                   cluster_name=str(cluster_name)))
     return cluster_info

sky/resources.py CHANGED Viewed

@@ -37,7 +37,7 @@ if typing.TYPE_CHECKING:
 logger = sky_logging.init_logger(__name__)
-_DEFAULT_DISK_SIZE_GB = 256
+DEFAULT_DISK_SIZE_GB = 256
 RESOURCE_CONFIG_ALIASES = {
     'gpus': 'accelerators',
@@ -319,7 +319,7 @@ class Resources:
             self._disk_size = int(
                 resources_utils.parse_memory_resource(disk_size, 'disk_size'))
         else:
-            self._disk_size = _DEFAULT_DISK_SIZE_GB
+            self._disk_size = DEFAULT_DISK_SIZE_GB
         self._image_id: Optional[Dict[Optional[str], str]] = None
         if isinstance(image_id, str):
@@ -482,7 +482,7 @@ class Resources:
             network_tier = f', network_tier={self.network_tier.value}'
         disk_size = ''
-        if self.disk_size != _DEFAULT_DISK_SIZE_GB:
+        if self.disk_size != DEFAULT_DISK_SIZE_GB:
             disk_size = f', disk_size={self.disk_size}'
         ports = ''
@@ -1766,7 +1766,7 @@ class Resources:
             self._accelerators is None,
             self._accelerator_args is None,
             not self._use_spot_specified,
-            self._disk_size == _DEFAULT_DISK_SIZE_GB,
+            self._disk_size == DEFAULT_DISK_SIZE_GB,
             self._disk_tier is None,
             self._network_tier is None,
             self._image_id is None,
@@ -2255,7 +2255,7 @@ class Resources:
             accelerator_args = state.pop('accelerator_args', None)
             state['_accelerator_args'] = accelerator_args
-            disk_size = state.pop('disk_size', _DEFAULT_DISK_SIZE_GB)
+            disk_size = state.pop('disk_size', DEFAULT_DISK_SIZE_GB)
             state['_disk_size'] = disk_size
         if version < 2:

skypilot-nightly 1.0.0.dev20250814__py3-none-any.whl → 1.0.0.dev20250816__py3-none-any.whl

Potentially problematic release.

skypilot-nightly 1.0.0.dev20250814py3-none-any.whl → 1.0.0.dev20250816py3-none-any.whl