PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250328__py3-none-any.whl → 1.0.0.dev20250330__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250328py3-none-any.whl → 1.0.0.dev20250330py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

sky/__init__.py +2 -2
sky/cli.py +3 -0
sky/client/cli.py +3 -0
sky/clouds/do.py +2 -0
sky/data/storage_utils.py +2 -8
sky/optimizer.py +2 -45
sky/serve/__init__.py +1 -0
sky/serve/autoscalers.py +26 -11
sky/serve/replica_managers.py +77 -6
sky/serve/serve_utils.py +80 -0
sky/serve/server/core.py +4 -73
sky/serve/service_spec.py +35 -3
sky/serve/spot_placer.py +278 -0
sky/server/common.py +15 -7
sky/server/requests/executor.py +1 -1
sky/server/requests/queues/mp_queue.py +8 -1
sky/server/server.py +1 -1
sky/utils/registry.py +2 -0
sky/utils/resources_utils.py +50 -0
sky/utils/schemas.py +10 -0
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250330.dist-info}/METADATA +3 -2
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250330.dist-info}/RECORD +26 -25
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250330.dist-info}/WHEEL +0 -0
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250330.dist-info}/entry_points.txt +0 -0
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250330.dist-info}/licenses/LICENSE +0 -0
{skypilot_nightly-1.0.0.dev20250328.dist-info → skypilot_nightly-1.0.0.dev20250330.dist-info}/top_level.txt +0 -0

sky/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Optional
 import urllib.request
 # Replaced with the current commit when building the wheels.
-_SKYPILOT_COMMIT_SHA = '4dab9ac5b17fd6dc807b98dcbccecf481011835a'
+_SKYPILOT_COMMIT_SHA = 'f90ccc1757680ccbff2fb8d86fc6dfd4242bd182'
 def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
 __commit__ = _get_git_commit()
-__version__ = '1.0.0.dev20250328'
+__version__ = '1.0.0.dev20250330'
 __root_dir__ = os.path.dirname(os.path.abspath(__file__))

sky/cli.py CHANGED Viewed

@@ -3943,6 +3943,7 @@ def jobs_launch(
               required=False,
               help='Show jobs from all users.')
 @click.option('--all',
+              '-a',
               default=False,
               is_flag=True,
               required=False,
@@ -4394,6 +4395,7 @@ def serve_up(
     )
     click.secho('Service spec:', fg='cyan')
     click.echo(task.service)
+    serve_lib.validate_service_task(task)
     click.secho('Each replica will use the following resources (estimated):',
                 fg='cyan')
@@ -4493,6 +4495,7 @@ def serve_update(service_name: str, service_yaml: Tuple[str, ...],
     )
     click.secho('Service spec:', fg='cyan')
     click.echo(task.service)
+    serve_lib.validate_service_task(task)
     click.secho('New replica will use the following resources (estimated):',
                 fg='cyan')

sky/client/cli.py CHANGED Viewed

@@ -3943,6 +3943,7 @@ def jobs_launch(
               required=False,
               help='Show jobs from all users.')
 @click.option('--all',
+              '-a',
               default=False,
               is_flag=True,
               required=False,
@@ -4394,6 +4395,7 @@ def serve_up(
     )
     click.secho('Service spec:', fg='cyan')
     click.echo(task.service)
+    serve_lib.validate_service_task(task)
     click.secho('Each replica will use the following resources (estimated):',
                 fg='cyan')
@@ -4493,6 +4495,7 @@ def serve_update(service_name: str, service_yaml: Tuple[str, ...],
     )
     click.secho('Service spec:', fg='cyan')
     click.echo(task.service)
+    serve_lib.validate_service_task(task)
     click.secho('New replica will use the following resources (estimated):',
                 fg='cyan')

sky/clouds/do.py CHANGED Viewed

@@ -280,6 +280,8 @@ class DO(clouds.Cloud):
         return True, None
     def get_credential_file_mounts(self) -> Dict[str, str]:
+        if do_utils.CREDENTIALS_PATH is None:
+            return {}
         if not os.path.exists(os.path.expanduser(do_utils.CREDENTIALS_PATH)):
             return {}
         return {

sky/data/storage_utils.py CHANGED Viewed

@@ -18,12 +18,6 @@ from sky.utils import log_utils
 logger = sky_logging.init_logger(__name__)
-_FILE_EXCLUSION_FROM_GITIGNORE_FAILURE_MSG = (
-    f'{colorama.Fore.YELLOW}Warning: Files/dirs '
-    'specified in .gitignore will be uploaded '
-    'to the cloud storage for {path!r}'
-    'due to the following error: {error_msg!r}')
 _USE_SKYIGNORE_HINT = (
     'To avoid using .gitignore, you can create a .skyignore file instead.')
@@ -172,7 +166,7 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
     submodules = submodules_output.stdout.split('\0')[:-1]
     # The empty string is the relative reference to the src_dir_path.
-    all_git_repos = ['.'] + [
+    all_git_repos = [''] + [
         # We only care about submodules that are a subdirectory of src_dir_path.
         submodule for submodule in submodules if not submodule.startswith('../')
     ]
@@ -208,7 +202,7 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
         for item in output_list:
-            if repo == '.' and item == './':
+            if repo == '' and item == './':
                 logger.warning(f'{src_dir_path} is within a git repo, but the '
                                'entire directory is ignored by git. We will '
                                'ignore all git exclusions. '

sky/optimizer.py CHANGED Viewed

@@ -1140,50 +1140,6 @@ class DummyCloud(clouds.Cloud):
     pass
-def _make_launchables_for_valid_region_zones(
-    launchable_resources: resources_lib.Resources
-) -> List[resources_lib.Resources]:
-    assert launchable_resources.is_launchable()
-    # In principle, all provisioning requests should be made at the granularity
-    # of a single zone. However, for on-demand instances, we batch the requests
-    # to the zones in the same region in order to leverage the region-level
-    # provisioning APIs of AWS and Azure. This way, we can reduce the number of
-    # API calls, and thus the overall failover time. Note that this optimization
-    # does not affect the user cost since the clouds charge the same prices for
-    # on-demand instances in the same region regardless of the zones. On the
-    # other hand, for spot instances, we do not batch the requests because the
-    # "AWS" spot prices may vary across zones.
-    # For GCP, we do not batch the requests because GCP reservation system is
-    # zone based. Therefore, price estimation is potentially different across
-    # zones.
-    # NOTE(woosuk): GCP does not support region-level provisioning APIs. Thus,
-    # while we return per-region resources here, the provisioner will still
-    # issue the request for one zone at a time.
-    # NOTE(woosuk): If we support Azure spot instances, we should batch the
-    # requests since Azure spot prices are region-level.
-    # TODO(woosuk): Batch the per-zone AWS spot instance requests if they are
-    # in the same region and have the same price.
-    # TODO(woosuk): A better design is to implement batching at a higher level
-    # (e.g., in provisioner or optimizer), not here.
-    launchables = []
-    regions = launchable_resources.get_valid_regions_for_launchable()
-    for region in regions:
-        if (launchable_resources.use_spot and region.zones is not None or
-                launchable_resources.cloud.optimize_by_zone()):
-            # Spot instances.
-            # Do not batch the per-zone requests.
-            for zone in region.zones:
-                launchables.append(
-                    launchable_resources.copy(region=region.name,
-                                              zone=zone.name))
-        else:
-            # On-demand instances.
-            # Batch the requests at the granularity of a single region.
-            launchables.append(launchable_resources.copy(region=region.name))
-    return launchables
 def _filter_out_blocked_launchable_resources(
         launchable_resources: Iterable[resources_lib.Resources],
         blocked_resources: Iterable[resources_lib.Resources]):
@@ -1313,7 +1269,8 @@ def _fill_in_launchable_resources(
                 cheapest = feasible_resources.resources_list[0]
                 # Generate region/zone-specified resources.
                 launchable[resources].extend(
-                    _make_launchables_for_valid_region_zones(cheapest))
+                    resources_utils.make_launchables_for_valid_region_zones(
+                        cheapest))
                 cloud_candidates[cloud] = feasible_resources.resources_list
             else:
                 all_fuzzy_candidates.update(

sky/serve/__init__.py CHANGED Viewed

@@ -21,6 +21,7 @@ from sky.serve.serve_utils import generate_service_name
 from sky.serve.serve_utils import ServeCodeGen
 from sky.serve.serve_utils import ServiceComponent
 from sky.serve.serve_utils import UpdateMode
+from sky.serve.serve_utils import validate_service_task
 from sky.serve.service_spec import SkyServiceSpec
 os.makedirs(os.path.expanduser(SKYSERVE_METADATA_DIR), exist_ok=True)

sky/serve/autoscalers.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Autoscalers: perform autoscaling by monitoring metrics."""
 import bisect
+import copy
 import dataclasses
 import enum
 import math
@@ -56,8 +57,8 @@ class AutoscalerDecision:
 def _generate_scale_up_decisions(
         num: int, target: Optional[Dict[str, Any]]) -> List[AutoscalerDecision]:
     return [
-        AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP, target)
-        for _ in range(num)
+        AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP,
+                           copy.copy(target)) for _ in range(num)
     ]
@@ -134,6 +135,7 @@ class Autoscaler:
         self.min_replicas: int = spec.min_replicas
         self.max_replicas: int = (spec.max_replicas if spec.max_replicas
                                   is not None else spec.min_replicas)
+        self.num_overprovision: Optional[int] = spec.num_overprovision
         # Target number of replicas is initialized to min replicas
         self.target_num_replicas: int = spec.min_replicas
         self.latest_version: int = constants.INITIAL_VERSION
@@ -143,6 +145,12 @@ class Autoscaler:
         self.latest_version_ever_ready: int = self.latest_version - 1
         self.update_mode = serve_utils.DEFAULT_UPDATE_MODE
+    def get_final_target_num_replicas(self) -> int:
+        """Get the final target number of replicas."""
+        if self.num_overprovision is None:
+            return self.target_num_replicas
+        return self.target_num_replicas + self.num_overprovision
     def _calculate_target_num_replicas(self) -> int:
         """Calculate target number of replicas."""
         raise NotImplementedError
@@ -207,7 +215,7 @@ class Autoscaler:
         0, to make the service scale faster when the service is not running.
         This will happen when min_replicas = 0 and no traffic.
         """
-        if self.target_num_replicas == 0:
+        if self.get_final_target_num_replicas() == 0:
             return constants.AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS
         else:
             return constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS
@@ -236,13 +244,14 @@ class Autoscaler:
             # old and latest versions are allowed in rolling update, this will
             # not affect the time it takes for the service to updated to the
             # latest version.
-            if num_latest_ready_replicas >= self.target_num_replicas:
+            if (num_latest_ready_replicas >=
+                    self.get_final_target_num_replicas()):
                 # Once the number of ready new replicas is greater than or equal
                 # to the target, we can scale down all old replicas.
                 return [info.replica_id for info in old_nonterminal_replicas]
             # If rolling update is in progress, we scale down old replicas
             # based on the number of ready new replicas.
-            num_old_replicas_to_keep = (self.target_num_replicas -
+            num_old_replicas_to_keep = (self.get_final_target_num_replicas() -
                                         num_latest_ready_replicas)
             # Remove old replicas (especially old launching replicas) and only
             # keep the required number of replicas, as we want to let the new
@@ -422,6 +431,7 @@ class _AutoscalerWithHysteresis(Autoscaler):
             f'Old target number of replicas: {old_target_num_replicas}. '
             f'Current target number of replicas: {target_num_replicas}. '
             f'Final target number of replicas: {self.target_num_replicas}. '
+            f'Num overprovision: {self.num_overprovision}. '
             f'Upscale counter: {self.upscale_counter}/'
             f'{self.scale_up_threshold}. '
             f'Downscale counter: {self.downscale_counter}/'
@@ -505,8 +515,9 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
         # Case 1. when latest_nonterminal_replicas is less
         # than num_to_provision, we always scale up new replicas.
-        if len(latest_nonterminal_replicas) < self.target_num_replicas:
-            num_replicas_to_scale_up = (self.target_num_replicas -
+        target_num_replicas = self.get_final_target_num_replicas()
+        if len(latest_nonterminal_replicas) < target_num_replicas:
+            num_replicas_to_scale_up = (target_num_replicas -
                                         len(latest_nonterminal_replicas))
             logger.info('Number of replicas to scale up: '
                         f'{num_replicas_to_scale_up}')
@@ -514,11 +525,11 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
                 _generate_scale_up_decisions(num_replicas_to_scale_up, None))
         # Case 2: when latest_nonterminal_replicas is more
-        # than self.target_num_replicas, we scale down new replicas.
+        # than target_num_replicas, we scale down new replicas.
         replicas_to_scale_down = []
-        if len(latest_nonterminal_replicas) > self.target_num_replicas:
+        if len(latest_nonterminal_replicas) > target_num_replicas:
             num_replicas_to_scale_down = (len(latest_nonterminal_replicas) -
-                                          self.target_num_replicas)
+                                          target_num_replicas)
             replicas_to_scale_down = (
                 _select_nonterminal_replicas_to_scale_down(
                     num_replicas_to_scale_down, latest_nonterminal_replicas))
@@ -633,7 +644,7 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
         all_replica_ids_to_scale_down: List[int] = []
         # Decide how many spot instances to launch.
-        num_spot_to_provision = (self.target_num_replicas -
+        num_spot_to_provision = (self.get_final_target_num_replicas() -
                                  self.base_ondemand_fallback_replicas)
         if num_nonterminal_spot < num_spot_to_provision:
             # Not enough spot instances, scale up.
@@ -668,6 +679,10 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
             num_ondemand_to_provision += (num_spot_to_provision -
                                           num_ready_spot)
+        # Make sure we don't launch on-demand fallback for
+        # overprovisioned replicas.
+        num_ondemand_to_provision = min(num_ondemand_to_provision,
+                                        self.target_num_replicas)
         if num_ondemand_to_provision > num_nonterminal_ondemand:
             num_ondemand_to_scale_up = (num_ondemand_to_provision -
                                         num_nonterminal_ondemand)

sky/serve/replica_managers.py CHANGED Viewed

@@ -27,6 +27,7 @@ from sky.serve import constants as serve_constants
 from sky.serve import serve_state
 from sky.serve import serve_utils
 from sky.serve import service
+from sky.serve import spot_placer
 from sky.skylet import constants
 from sky.skylet import job_lib
 from sky.usage import usage_lib
@@ -60,6 +61,7 @@ def launch_cluster(replica_id: int,
                    task_yaml_path: str,
                    cluster_name: str,
                    resources_override: Optional[Dict[str, Any]] = None,
+                   retry_until_up: bool = True,
                    max_retry: int = 3) -> None:
     """Launch a sky serve replica cluster.
@@ -71,6 +73,10 @@ def launch_cluster(replica_id: int,
             or some error happened before provisioning and will happen again
             if retry.
     """
+    if resources_override is not None:
+        logger.info(f'Scaling up replica (id: {replica_id}) cluster '
+                    f'{cluster_name} with resources override: '
+                    f'{resources_override}')
     try:
         config = common_utils.read_yaml(os.path.expanduser(task_yaml_path))
         task = sky.Task.from_yaml_config(config)
@@ -98,7 +104,7 @@ def launch_cluster(replica_id: int,
             usage_lib.messages.usage.set_internal()
             execution.launch(task,
                              cluster_name,
-                             retry_until_up=True,
+                             retry_until_up=retry_until_up,
                              _is_launched_by_sky_serve_controller=True)
             logger.info(f'Replica cluster {cluster_name} launched.')
         except (exceptions.InvalidClusterNameError,
@@ -246,6 +252,10 @@ class ReplicaStatusProperty:
     preempted: bool = False
     # Whether the replica is purged.
     purged: bool = False
+    # Whether the replica failed to launch due to spot availability.
+    # This is only possible when spot placer is enabled, so the retry until up
+    # is set to True and it can fail immediately due to spot availability.
+    failed_spot_availability: bool = False
     def remove_terminated_replica(self) -> bool:
         """Whether to remove the replica record from the replica table.
@@ -385,10 +395,11 @@ class ReplicaStatusProperty:
 class ReplicaInfo:
     """Replica info for each replica."""
-    _VERSION = 0
+    _VERSION = 1
     def __init__(self, replica_id: int, cluster_name: str, replica_port: str,
-                 is_spot: bool, version: int) -> None:
+                 is_spot: bool, location: Optional[spot_placer.Location],
+                 version: int) -> None:
         self._version = self._VERSION
         self.replica_id: int = replica_id
         self.cluster_name: str = cluster_name
@@ -398,6 +409,11 @@ class ReplicaInfo:
         self.consecutive_failure_times: List[float] = []
         self.status_property: ReplicaStatusProperty = ReplicaStatusProperty()
         self.is_spot: bool = is_spot
+        self.location: Optional[Dict[str, Optional[str]]] = (
+            location.to_pickleable() if location is not None else None)
+    def get_spot_location(self) -> Optional[spot_placer.Location]:
+        return spot_placer.Location.from_pickleable(self.location)
     def handle(
         self,
@@ -483,6 +499,7 @@ class ReplicaInfo:
                 f'version={self.version}, '
                 f'replica_port={self.replica_port}, '
                 f'is_spot={self.is_spot}, '
+                f'location={self.location}, '
                 f'status={self.status}, '
                 f'launched_at={info_dict["launched_at"]}{handle_str})')
         return info
@@ -557,6 +574,9 @@ class ReplicaInfo:
             # Treated similar to on-demand instances.
             self.is_spot = False
+        if version < 1:
+            self.location = None
         self.__dict__.update(state)
@@ -620,6 +640,9 @@ class SkyPilotReplicaManager(ReplicaManager):
                  task_yaml_path: str) -> None:
         super().__init__(service_name, spec)
         self._task_yaml_path = task_yaml_path
+        task = sky.Task.from_yaml(task_yaml_path)
+        self._spot_placer: Optional[spot_placer.SpotPlacer] = (
+            spot_placer.SpotPlacer.from_task(spec, task))
         # TODO(tian): Store launch/down pid in the replica table, to make the
         # manager more persistent. Current blocker is that we need to manually
         # poll the Process (by join or is_launch), otherwise, it will never
@@ -639,6 +662,9 @@ class SkyPilotReplicaManager(ReplicaManager):
     # Replica management functions #
     ################################
+    # Adding lock here to make sure spot placer's current locations are
+    # consistent with the replicas' status.
+    @with_lock
     def _launch_replica(
         self,
         replica_id: int,
@@ -653,19 +679,41 @@ class SkyPilotReplicaManager(ReplicaManager):
             self._service_name, replica_id)
         log_file_name = serve_utils.generate_replica_launch_log_file_name(
             self._service_name, replica_id)
+        use_spot = _should_use_spot(self._task_yaml_path, resources_override)
+        retry_until_up = True
+        location = None
+        if use_spot and self._spot_placer is not None:
+            # For spot placer, we don't retry until up so any launch failed
+            # due to availability issue will be handled by the placer.
+            retry_until_up = False
+            # TODO(tian): Currently, the resources_override can only be
+            # `use_spot=True/False`, which will not cause any conflict with
+            # spot placer's cloud, region & zone. When we add more resources
+            # to the resources_override, we need to make sure they won't
+            # conflict with the spot placer's selection.
+            if resources_override is None:
+                resources_override = {}
+            current_spot_locations: List[spot_placer.Location] = []
+            for info in serve_state.get_replica_infos(self._service_name):
+                if info.is_spot:
+                    spot_location = info.get_spot_location()
+                    if spot_location is not None:
+                        current_spot_locations.append(spot_location)
+            location = self._spot_placer.select_next_location(
+                current_spot_locations)
+            resources_override.update(location.to_dict())
         p = multiprocessing.Process(
             target=ux_utils.RedirectOutputForProcess(
                 launch_cluster,
                 log_file_name,
             ).run,
             args=(replica_id, self._task_yaml_path, cluster_name,
-                  resources_override),
+                  resources_override, retry_until_up),
         )
         replica_port = _get_resources_ports(self._task_yaml_path)
-        use_spot = _should_use_spot(self._task_yaml_path, resources_override)
         info = ReplicaInfo(replica_id, cluster_name, replica_port, use_spot,
-                           self.latest_version)
+                           location, self.latest_version)
         serve_state.add_or_update_replica(self._service_name, replica_id, info)
         # Don't start right now; we will start it later in _refresh_process_pool
         # to avoid too many sky.launch running at the same time.
@@ -814,6 +862,10 @@ class SkyPilotReplicaManager(ReplicaManager):
         logger.info(
             f'Replica {info.replica_id} is preempted{cluster_status_str}.')
         info.status_property.preempted = True
+        if self._spot_placer is not None:
+            spot_location = info.get_spot_location()
+            assert spot_location is not None
+            self._spot_placer.set_preemptive(spot_location)
         serve_state.add_or_update_replica(self._service_name, info.replica_id,
                                           info)
         self._terminate_replica(info.replica_id,
@@ -868,6 +920,23 @@ class SkyPilotReplicaManager(ReplicaManager):
                     else:
                         info.status_property.sky_launch_status = (
                             ProcessStatus.SUCCEEDED)
+                    if self._spot_placer is not None and info.is_spot:
+                        # TODO(tian): Currently, we set the location to
+                        # preemptive if the launch process failed. This is
+                        # because if the error is not related to the
+                        # availability of the location, then all locations
+                        # should failed for same reason. So it does not matter
+                        # which location is preemptive or not, instead, all
+                        # locations would fail. We should implement a log parser
+                        # to detect if the error is actually related to the
+                        # availability of the location later.
+                        location = info.get_spot_location()
+                        assert location is not None
+                        if p.exitcode != 0:
+                            self._spot_placer.set_preemptive(location)
+                            info.status_property.failed_spot_availability = True
+                        else:
+                            self._spot_placer.set_active(location)
                 serve_state.add_or_update_replica(self._service_name,
                                                   replica_id, info)
                 if error_in_sky_launch:
@@ -918,6 +987,8 @@ class SkyPilotReplicaManager(ReplicaManager):
                     removal_reason = 'for version outdated'
                 elif info.status_property.purged:
                     removal_reason = 'for purge'
+                elif info.status_property.failed_spot_availability:
+                    removal_reason = 'for spot availability failure'
                 else:
                     logger.info(f'Termination of replica {replica_id} '
                                 'finished. Replica info is kept since some '

sky/serve/serve_utils.py CHANGED Viewed

@@ -25,6 +25,7 @@ from sky import global_user_state
 from sky.adaptors import common as adaptors_common
 from sky.serve import constants
 from sky.serve import serve_state
+from sky.serve import spot_placer
 from sky.skylet import constants as skylet_constants
 from sky.skylet import job_lib
 from sky.utils import annotations
@@ -40,6 +41,7 @@ if typing.TYPE_CHECKING:
     import psutil
     import requests
+    import sky
     from sky.serve import replica_managers
 else:
     psutil = adaptors_common.LazyImport('psutil')
@@ -210,6 +212,84 @@ class RequestTimestamp(RequestsAggregator):
         return f'RequestTimestamp(timestamps={self.timestamps})'
+def validate_service_task(task: 'sky.Task') -> None:
+    """Validate the task for Sky Serve.
+    Args:
+        task: sky.Task to validate
+    Raises:
+        ValueError: if the arguments are invalid.
+        RuntimeError: if the task.serve is not found.
+    """
+    spot_resources: List['sky.Resources'] = [
+        resource for resource in task.resources if resource.use_spot
+    ]
+    # TODO(MaoZiming): Allow mixed on-demand and spot specification in resources
+    # On-demand fallback should go to the resources specified as on-demand.
+    if len(spot_resources) not in [0, len(task.resources)]:
+        with ux_utils.print_exception_no_traceback():
+            raise ValueError(
+                'Resources must either all use spot or none use spot. '
+                'To use on-demand and spot instances together, '
+                'use `dynamic_ondemand_fallback` or set '
+                'base_ondemand_fallback_replicas.')
+    if task.service is None:
+        with ux_utils.print_exception_no_traceback():
+            raise RuntimeError('Service section not found.')
+    policy_description = ('on-demand'
+                          if task.service.dynamic_ondemand_fallback else 'spot')
+    for resource in list(task.resources):
+        if resource.job_recovery is not None:
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError('job_recovery is disabled for SkyServe. '
+                                 'SkyServe will replenish preempted spot '
+                                 f'with {policy_description} instances.')
+    # Try to create a spot placer from the task yaml. Check if the task yaml
+    # is valid for spot placer.
+    spot_placer.SpotPlacer.from_task(task.service, task)
+    replica_ingress_port: Optional[int] = int(
+        task.service.ports) if (task.service.ports is not None) else None
+    for requested_resources in task.resources:
+        if (task.service.use_ondemand_fallback and
+                not requested_resources.use_spot):
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    '`use_ondemand_fallback` is only supported '
+                    'for spot resources. Please explicitly specify '
+                    '`use_spot: true` in resources for on-demand fallback.')
+        if (task.service.spot_placer is not None and
+                not requested_resources.use_spot):
+            with ux_utils.print_exception_no_traceback():
+                raise ValueError(
+                    '`spot_placer` is only supported for spot resources. '
+                    'Please explicitly specify `use_spot: true` in resources.')
+        if task.service.ports is None:
+            requested_ports = list(
+                resources_utils.port_ranges_to_set(requested_resources.ports))
+            if len(requested_ports) != 1:
+                with ux_utils.print_exception_no_traceback():
+                    raise ValueError(
+                        'To open multiple ports on the replica, please set the '
+                        '`service.ports` field to specify a main service port. '
+                        'Must only specify one port in resources otherwise. '
+                        'Each replica will use the port specified as '
+                        'application ingress port.')
+            service_port = requested_ports[0]
+            if replica_ingress_port is None:
+                replica_ingress_port = service_port
+            elif service_port != replica_ingress_port:
+                with ux_utils.print_exception_no_traceback():
+                    raise ValueError(
+                        f'Got multiple ports: {service_port} and '
+                        f'{replica_ingress_port} in different resources. '
+                        'Please specify the same port instead.')
 def generate_service_name():
     return f'sky-service-{uuid.uuid4().hex[:4]}'

skypilot-nightly 1.0.0.dev20250328__py3-none-any.whl → 1.0.0.dev20250330__py3-none-any.whl

skypilot-nightly 1.0.0.dev20250328py3-none-any.whl → 1.0.0.dev20250330py3-none-any.whl