skypilot-nightly 1.0.0.dev20250514__py3-none-any.whl → 1.0.0.dev20250516__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend.py +3 -2
  3. sky/backends/backend_utils.py +19 -17
  4. sky/backends/cloud_vm_ray_backend.py +30 -11
  5. sky/clouds/aws.py +11 -9
  6. sky/clouds/azure.py +16 -13
  7. sky/clouds/cloud.py +4 -3
  8. sky/clouds/cudo.py +3 -2
  9. sky/clouds/do.py +3 -2
  10. sky/clouds/fluidstack.py +3 -3
  11. sky/clouds/gcp.py +1 -1
  12. sky/clouds/ibm.py +12 -10
  13. sky/clouds/kubernetes.py +3 -2
  14. sky/clouds/lambda_cloud.py +6 -6
  15. sky/clouds/nebius.py +6 -5
  16. sky/clouds/oci.py +9 -7
  17. sky/clouds/paperspace.py +3 -2
  18. sky/clouds/runpod.py +9 -9
  19. sky/clouds/scp.py +5 -3
  20. sky/clouds/vast.py +8 -7
  21. sky/clouds/vsphere.py +4 -2
  22. sky/core.py +18 -12
  23. sky/dashboard/out/404.html +1 -1
  24. sky/dashboard/out/_next/static/chunks/pages/index-6b0d9e5031b70c58.js +1 -0
  25. sky/dashboard/out/_next/static/{tdxxQrPV6NW90a983oHXe → y1yf6Xc0zwam5fFluIyUm}/_buildManifest.js +1 -1
  26. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  27. sky/dashboard/out/clusters/[cluster].html +1 -1
  28. sky/dashboard/out/clusters.html +1 -1
  29. sky/dashboard/out/index.html +1 -1
  30. sky/dashboard/out/jobs/[job].html +1 -1
  31. sky/dashboard/out/jobs.html +1 -1
  32. sky/execution.py +33 -0
  33. sky/global_user_state.py +2 -0
  34. sky/jobs/recovery_strategy.py +4 -1
  35. sky/jobs/server/core.py +6 -12
  36. sky/optimizer.py +19 -13
  37. sky/provision/kubernetes/utils.py +26 -1
  38. sky/resources.py +203 -44
  39. sky/serve/server/core.py +0 -5
  40. sky/serve/spot_placer.py +3 -0
  41. sky/server/requests/executor.py +114 -22
  42. sky/server/requests/requests.py +15 -0
  43. sky/server/server.py +63 -20
  44. sky/server/uvicorn.py +12 -2
  45. sky/setup_files/dependencies.py +4 -1
  46. sky/sky_logging.py +40 -2
  47. sky/skylet/log_lib.py +60 -11
  48. sky/skylet/log_lib.pyi +5 -0
  49. sky/task.py +8 -6
  50. sky/utils/cli_utils/status_utils.py +6 -5
  51. sky/utils/command_runner.py +3 -0
  52. sky/utils/context.py +264 -0
  53. sky/utils/context_utils.py +172 -0
  54. sky/utils/controller_utils.py +39 -43
  55. sky/utils/dag_utils.py +4 -2
  56. sky/utils/resources_utils.py +3 -0
  57. sky/utils/rich_utils.py +81 -37
  58. sky/utils/schemas.py +33 -24
  59. sky/utils/subprocess_utils.py +8 -2
  60. {skypilot_nightly-1.0.0.dev20250514.dist-info → skypilot_nightly-1.0.0.dev20250516.dist-info}/METADATA +2 -2
  61. {skypilot_nightly-1.0.0.dev20250514.dist-info → skypilot_nightly-1.0.0.dev20250516.dist-info}/RECORD +66 -64
  62. {skypilot_nightly-1.0.0.dev20250514.dist-info → skypilot_nightly-1.0.0.dev20250516.dist-info}/WHEEL +1 -1
  63. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  64. /sky/dashboard/out/_next/static/{tdxxQrPV6NW90a983oHXe → y1yf6Xc0zwam5fFluIyUm}/_ssgManifest.js +0 -0
  65. {skypilot_nightly-1.0.0.dev20250514.dist-info → skypilot_nightly-1.0.0.dev20250516.dist-info}/entry_points.txt +0 -0
  66. {skypilot_nightly-1.0.0.dev20250514.dist-info → skypilot_nightly-1.0.0.dev20250516.dist-info}/licenses/LICENSE +0 -0
  67. {skypilot_nightly-1.0.0.dev20250514.dist-info → skypilot_nightly-1.0.0.dev20250516.dist-info}/top_level.txt +0 -0
sky/resources.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """Resources: compute requirements of Tasks."""
2
2
  import dataclasses
3
3
  import textwrap
4
- from typing import Any, Dict, List, Optional, Set, Tuple, Union
4
+ import typing
5
+ from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
5
6
 
6
7
  import colorama
7
8
 
@@ -34,6 +35,50 @@ RESOURCE_CONFIG_ALIASES = {
34
35
  }
35
36
 
36
37
 
38
+ @dataclasses.dataclass
39
+ class AutostopConfig:
40
+ """Configuration for autostop."""
41
+ # enabled isn't present in the yaml config, but it's needed for this class
42
+ # to be complete.
43
+ enabled: bool
44
+ # If enabled is False, these values are ignored.
45
+ idle_minutes: int = 5
46
+ down: bool = False
47
+
48
+ def to_yaml_config(self) -> Union[Literal[False], Dict[str, Any]]:
49
+ if not self.enabled:
50
+ return False
51
+ return {
52
+ 'idle_minutes': self.idle_minutes,
53
+ 'down': self.down,
54
+ }
55
+
56
+ @classmethod
57
+ def from_yaml_config(
58
+ cls, config: Union[bool, int, Dict[str, Any], None]
59
+ ) -> Optional['AutostopConfig']:
60
+ if isinstance(config, bool):
61
+ if config:
62
+ return cls(enabled=True)
63
+ else:
64
+ return cls(enabled=False)
65
+
66
+ if isinstance(config, int):
67
+ return cls(idle_minutes=config, down=False, enabled=True)
68
+
69
+ if isinstance(config, dict):
70
+ # If we have a dict, autostop is enabled. (Only way to disable is
71
+ # with `false`, a bool.)
72
+ autostop_config = cls(enabled=True)
73
+ if 'idle_minutes' in config:
74
+ autostop_config.idle_minutes = config['idle_minutes']
75
+ if 'down' in config:
76
+ autostop_config.down = config['down']
77
+ return autostop_config
78
+
79
+ return None
80
+
81
+
37
82
  class Resources:
38
83
  """Resources: compute requirements of Tasks.
39
84
 
@@ -51,7 +96,7 @@ class Resources:
51
96
  """
52
97
  # If any fields changed, increment the version. For backward compatibility,
53
98
  # modify the __setstate__ method to handle the old version.
54
- _VERSION = 22
99
+ _VERSION = 23
55
100
 
56
101
  def __init__(
57
102
  self,
@@ -59,17 +104,19 @@ class Resources:
59
104
  instance_type: Optional[str] = None,
60
105
  cpus: Union[None, int, float, str] = None,
61
106
  memory: Union[None, int, float, str] = None,
62
- accelerators: Union[None, str, Dict[str, int]] = None,
107
+ accelerators: Union[None, str, Dict[str, Union[int, float]]] = None,
63
108
  accelerator_args: Optional[Dict[str, str]] = None,
64
109
  use_spot: Optional[bool] = None,
65
- job_recovery: Optional[Union[Dict[str, Union[str, int]], str]] = None,
110
+ job_recovery: Optional[Union[Dict[str, Optional[Union[str, int]]],
111
+ str]] = None,
66
112
  region: Optional[str] = None,
67
113
  zone: Optional[str] = None,
68
- image_id: Union[Dict[str, str], str, None] = None,
114
+ image_id: Union[Dict[Optional[str], str], str, None] = None,
69
115
  disk_size: Optional[int] = None,
70
116
  disk_tier: Optional[Union[str, resources_utils.DiskTier]] = None,
71
117
  ports: Optional[Union[int, str, List[str], Tuple[str]]] = None,
72
118
  labels: Optional[Dict[str, str]] = None,
119
+ autostop: Union[bool, int, Dict[str, Any], None] = None,
73
120
  # Internal use only.
74
121
  # pylint: disable=invalid-name
75
122
  _docker_login_config: Optional[docker_utils.DockerLoginConfig] = None,
@@ -152,6 +199,8 @@ class Resources:
152
199
  instance tags. On GCP, labels map to instance labels. On
153
200
  Kubernetes, labels map to pod labels. On other clouds, labels are
154
201
  not supported and will be ignored.
202
+ autostop: the autostop configuration to use. For launched resources,
203
+ may or may not correspond to the actual current autostop config.
155
204
  _docker_login_config: the docker configuration to use. This includes
156
205
  the docker username, password, and registry server. If None, skip
157
206
  docker login.
@@ -177,7 +226,8 @@ class Resources:
177
226
 
178
227
  self._use_spot_specified = use_spot is not None
179
228
  self._use_spot = use_spot if use_spot is not None else False
180
- self._job_recovery: Optional[Dict[str, Union[str, int]]] = None
229
+ self._job_recovery: Optional[Dict[str, Optional[Union[str,
230
+ int]]]] = None
181
231
  if job_recovery is not None:
182
232
  if isinstance(job_recovery, str):
183
233
  job_recovery = {'strategy': job_recovery}
@@ -188,7 +238,7 @@ class Resources:
188
238
  if strategy_name == 'none':
189
239
  self._job_recovery = None
190
240
  else:
191
- if strategy_name is not None:
241
+ if isinstance(strategy_name, str):
192
242
  job_recovery['strategy'] = strategy_name.upper()
193
243
  self._job_recovery = job_recovery
194
244
 
@@ -201,7 +251,7 @@ class Resources:
201
251
  else:
202
252
  self._disk_size = _DEFAULT_DISK_SIZE_GB
203
253
 
204
- self._image_id = image_id
254
+ self._image_id: Optional[Dict[Optional[str], str]] = None
205
255
  if isinstance(image_id, str):
206
256
  self._image_id = {self._region: image_id.strip()}
207
257
  elif isinstance(image_id, dict):
@@ -209,8 +259,11 @@ class Resources:
209
259
  self._image_id = {self._region: image_id[None].strip()}
210
260
  else:
211
261
  self._image_id = {
212
- k.strip(): v.strip() for k, v in image_id.items()
262
+ typing.cast(str, k).strip(): v.strip()
263
+ for k, v in image_id.items()
213
264
  }
265
+ else:
266
+ self._image_id = image_id
214
267
  self._is_image_managed = _is_image_managed
215
268
 
216
269
  if isinstance(disk_tier, str):
@@ -228,7 +281,7 @@ class Resources:
228
281
  if isinstance(ports, tuple):
229
282
  ports = list(ports)
230
283
  if not isinstance(ports, list):
231
- ports = [ports]
284
+ ports = [str(ports)]
232
285
  ports = resources_utils.simplify_ports(
233
286
  [str(port) for port in ports])
234
287
  if not ports:
@@ -250,11 +303,12 @@ class Resources:
250
303
  self._requires_fuse = _requires_fuse
251
304
 
252
305
  self._cluster_config_overrides = _cluster_config_overrides
253
- self._cached_repr = None
306
+ self._cached_repr: Optional[str] = None
254
307
 
255
308
  self._set_cpus(cpus)
256
309
  self._set_memory(memory)
257
310
  self._set_accelerators(accelerators, accelerator_args)
311
+ self._set_autostop_config(autostop)
258
312
 
259
313
  def validate(self):
260
314
  """Validate the resources and infer the missing fields if possible."""
@@ -378,19 +432,19 @@ class Resources:
378
432
  return repr_str
379
433
 
380
434
  @property
381
- def cloud(self):
435
+ def cloud(self) -> Optional[clouds.Cloud]:
382
436
  return self._cloud
383
437
 
384
438
  @property
385
- def region(self):
439
+ def region(self) -> Optional[str]:
386
440
  return self._region
387
441
 
388
442
  @property
389
- def zone(self):
443
+ def zone(self) -> Optional[str]:
390
444
  return self._zone
391
445
 
392
446
  @property
393
- def instance_type(self):
447
+ def instance_type(self) -> Optional[str]:
394
448
  return self._instance_type
395
449
 
396
450
  @property
@@ -444,7 +498,7 @@ class Resources:
444
498
  return None
445
499
 
446
500
  @property
447
- def accelerator_args(self) -> Optional[Dict[str, str]]:
501
+ def accelerator_args(self) -> Optional[Dict[str, Any]]:
448
502
  return self._accelerator_args
449
503
 
450
504
  @property
@@ -456,7 +510,7 @@ class Resources:
456
510
  return self._use_spot_specified
457
511
 
458
512
  @property
459
- def job_recovery(self) -> Optional[Dict[str, Union[str, int]]]:
513
+ def job_recovery(self) -> Optional[Dict[str, Optional[Union[str, int]]]]:
460
514
  return self._job_recovery
461
515
 
462
516
  @property
@@ -464,11 +518,11 @@ class Resources:
464
518
  return self._disk_size
465
519
 
466
520
  @property
467
- def image_id(self) -> Optional[Dict[str, str]]:
521
+ def image_id(self) -> Optional[Dict[Optional[str], str]]:
468
522
  return self._image_id
469
523
 
470
524
  @property
471
- def disk_tier(self) -> resources_utils.DiskTier:
525
+ def disk_tier(self) -> Optional[resources_utils.DiskTier]:
472
526
  return self._disk_tier
473
527
 
474
528
  @property
@@ -479,6 +533,16 @@ class Resources:
479
533
  def labels(self) -> Optional[Dict[str, str]]:
480
534
  return self._labels
481
535
 
536
+ @property
537
+ def autostop_config(self) -> Optional[AutostopConfig]:
538
+ """The requested autostop config.
539
+
540
+ Warning: This is the autostop config that was originally used to
541
+ launch the resources. It may not correspond to the actual current
542
+ autostop config.
543
+ """
544
+ return self._autostop_config
545
+
482
546
  @property
483
547
  def is_image_managed(self) -> Optional[bool]:
484
548
  return self._is_image_managed
@@ -489,16 +553,22 @@ class Resources:
489
553
  return False
490
554
  return self._requires_fuse
491
555
 
556
+ def set_requires_fuse(self, value: bool) -> None:
557
+ """Sets whether this resource requires FUSE mounting support.
558
+
559
+ Args:
560
+ value: Whether the resource requires FUSE mounting support.
561
+ """
562
+ # TODO(zeping): This violates the immutability of Resources.
563
+ # Refactor to use Resources.copy instead.
564
+ self._requires_fuse = value
565
+
492
566
  @property
493
567
  def cluster_config_overrides(self) -> Dict[str, Any]:
494
568
  if self._cluster_config_overrides is None:
495
569
  return {}
496
570
  return self._cluster_config_overrides
497
571
 
498
- @requires_fuse.setter
499
- def requires_fuse(self, value: Optional[bool]) -> None:
500
- self._requires_fuse = value
501
-
502
572
  @property
503
573
  def docker_login_config(self) -> Optional[docker_utils.DockerLoginConfig]:
504
574
  return self._docker_login_config
@@ -572,8 +642,8 @@ class Resources:
572
642
 
573
643
  def _set_accelerators(
574
644
  self,
575
- accelerators: Union[None, str, Dict[str, int]],
576
- accelerator_args: Optional[Dict[str, str]],
645
+ accelerators: Union[None, str, Dict[str, Union[int, float]]],
646
+ accelerator_args: Optional[Dict[str, Any]],
577
647
  ) -> None:
578
648
  """Sets accelerators.
579
649
 
@@ -608,10 +678,11 @@ class Resources:
608
678
  self._cloud = clouds.Kubernetes()
609
679
  else:
610
680
  self._cloud = clouds.GCP()
611
- assert (self.cloud.is_same_cloud(clouds.GCP()) or
612
- self.cloud.is_same_cloud(clouds.Kubernetes())), (
613
- 'Cloud must be GCP or Kubernetes for TPU '
614
- 'accelerators.')
681
+ assert self.cloud is not None and (
682
+ self.cloud.is_same_cloud(clouds.GCP()) or
683
+ self.cloud.is_same_cloud(clouds.Kubernetes())), (
684
+ 'Cloud must be GCP or Kubernetes for TPU '
685
+ 'accelerators.')
615
686
 
616
687
  if accelerator_args is None:
617
688
  accelerator_args = {}
@@ -645,15 +716,34 @@ class Resources:
645
716
  'Cannot specify instance type (got '
646
717
  f'{self.instance_type!r}) for TPU VM.')
647
718
 
648
- self._accelerators = accelerators
649
- self._accelerator_args = accelerator_args
719
+ self._accelerators: Optional[Dict[str, Union[int,
720
+ float]]] = accelerators
721
+ self._accelerator_args: Optional[Dict[str, Any]] = accelerator_args
722
+
723
+ def _set_autostop_config(
724
+ self,
725
+ autostop: Union[bool, int, Dict[str, Any], None],
726
+ ) -> None:
727
+ self._autostop_config = AutostopConfig.from_yaml_config(autostop)
650
728
 
651
729
  def is_launchable(self) -> bool:
730
+ """Returns whether the resource is launchable."""
652
731
  return self.cloud is not None and self._instance_type is not None
653
732
 
733
+ def assert_launchable(self) -> 'LaunchableResources':
734
+ """A workaround to make mypy understand that is_launchable() is true.
735
+
736
+ Note: The `cast` to `LaunchableResources` is only for static type
737
+ checking with MyPy. At runtime, the Python interpreter does not enforce
738
+ types, and the returned object will still be an instance of `Resources`.
739
+ """
740
+ assert self.is_launchable(), self
741
+ return typing.cast(LaunchableResources, self)
742
+
654
743
  def need_cleanup_after_preemption_or_failure(self) -> bool:
655
744
  """Whether a resource needs cleanup after preemption or failure."""
656
745
  assert self.is_launchable(), self
746
+ assert self.cloud is not None, 'Cloud must be specified'
657
747
  return self.cloud.need_cleanup_after_preemption_or_failure(self)
658
748
 
659
749
  def _try_canonicalize_accelerators(self) -> None:
@@ -710,10 +800,10 @@ class Resources:
710
800
  else:
711
801
  table = log_utils.create_table(['Cloud', 'Hint'])
712
802
  table.add_row(['-----', '----'])
713
- for cloud, error in cloud_to_errors.items():
803
+ for cloud_msg, error in cloud_to_errors.items():
714
804
  reason_str = '\n'.join(textwrap.wrap(
715
805
  str(error), 80))
716
- table.add_row([str(cloud), reason_str])
806
+ table.add_row([cloud_msg, reason_str])
717
807
  hint = table.get_string()
718
808
  raise ValueError(
719
809
  f'Invalid (region {self._region!r}, zone '
@@ -745,11 +835,13 @@ class Resources:
745
835
  ssh_proxy_command dict with region names as keys).
746
836
  """
747
837
  assert self.is_launchable(), self
748
-
749
- regions = self._cloud.regions_with_offering(self._instance_type,
750
- self.accelerators,
751
- self._use_spot,
752
- self._region, self._zone)
838
+ assert self.cloud is not None, 'Cloud must be specified'
839
+ assert self._instance_type is not None, (
840
+ 'Instance type must be specified')
841
+ regions = self.cloud.regions_with_offering(self._instance_type,
842
+ self.accelerators,
843
+ self._use_spot, self._region,
844
+ self._zone)
753
845
  if self._image_id is not None and None not in self._image_id:
754
846
  regions = [r for r in regions if r.name in self._image_id]
755
847
 
@@ -849,6 +941,10 @@ class Resources:
849
941
  cpus, mem = self.cloud.get_vcpus_mem_from_instance_type(
850
942
  self._instance_type)
851
943
  if self._cpus is not None:
944
+ assert cpus is not None, (
945
+ f'Can\'t get vCPUs from instance type: '
946
+ f'{self._instance_type}, check catalog or '
947
+ f'specify cpus directly.')
852
948
  if self._cpus.endswith('+'):
853
949
  if cpus < float(self._cpus[:-1]):
854
950
  with ux_utils.print_exception_no_traceback():
@@ -863,6 +959,10 @@ class Resources:
863
959
  f'number of vCPUs. {self.instance_type} has {cpus} '
864
960
  f'vCPUs, but {self._cpus} is requested.')
865
961
  if self.memory is not None:
962
+ assert mem is not None, (
963
+ f'Can\'t get memory from instance type: '
964
+ f'{self._instance_type}, check catalog or '
965
+ f'specify memory directly.')
866
966
  if self.memory.endswith(('+', 'x')):
867
967
  if mem < float(self.memory[:-1]):
868
968
  with ux_utils.print_exception_no_traceback():
@@ -886,6 +986,8 @@ class Resources:
886
986
  if self._job_recovery is None or self._job_recovery['strategy'] is None:
887
987
  return
888
988
  # Validate the job recovery strategy
989
+ assert isinstance(self._job_recovery['strategy'],
990
+ str), 'Job recovery strategy must be a string'
889
991
  registry.JOBS_RECOVERY_STRATEGY_REGISTRY.from_str(
890
992
  self._job_recovery['strategy'])
891
993
 
@@ -920,7 +1022,7 @@ class Resources:
920
1022
  'Cloud must be specified when image_id is provided.')
921
1023
 
922
1024
  try:
923
- self._cloud.check_features_are_supported(
1025
+ self.cloud.check_features_are_supported(
924
1026
  self,
925
1027
  requested_features={
926
1028
  clouds.CloudImplementationFeatures.IMAGE_ID
@@ -943,14 +1045,14 @@ class Resources:
943
1045
  # Check the image_id's are valid.
944
1046
  for region, image_id in self._image_id.items():
945
1047
  if (image_id.startswith('skypilot:') and
946
- not self._cloud.is_image_tag_valid(image_id, region)):
1048
+ not self.cloud.is_image_tag_valid(image_id, region)):
947
1049
  region_str = f' ({region})' if region else ''
948
1050
  with ux_utils.print_exception_no_traceback():
949
1051
  raise ValueError(
950
1052
  f'Image tag {image_id!r} is not valid, please make sure'
951
1053
  f' the tag exists in {self._cloud}{region_str}.')
952
1054
 
953
- if (self._cloud.is_same_cloud(clouds.AWS()) and
1055
+ if (self.cloud.is_same_cloud(clouds.AWS()) and
954
1056
  not image_id.startswith('skypilot:') and region is None):
955
1057
  with ux_utils.print_exception_no_traceback():
956
1058
  raise ValueError(
@@ -1055,6 +1157,9 @@ class Resources:
1055
1157
  """Returns cost in USD for the runtime in seconds."""
1056
1158
  hours = seconds / 3600
1057
1159
  # Instance.
1160
+ assert self.cloud is not None, 'Cloud must be specified'
1161
+ assert self._instance_type is not None, (
1162
+ 'Instance type must be specified')
1058
1163
  hourly_cost = self.cloud.instance_type_to_hourly_cost(
1059
1164
  self._instance_type, self.use_spot, self._region, self._zone)
1060
1165
  # Accelerators (if any).
@@ -1099,6 +1204,7 @@ class Resources:
1099
1204
  docker_image = self.extract_docker_image()
1100
1205
 
1101
1206
  # Cloud specific variables
1207
+ assert self.cloud is not None, 'Cloud must be specified'
1102
1208
  cloud_specific_variables = self.cloud.make_deploy_resources_variables(
1103
1209
  self, cluster_name, region, zones, num_nodes, dryrun)
1104
1210
 
@@ -1153,9 +1259,12 @@ class Resources:
1153
1259
  specific_reservations = set(
1154
1260
  skypilot_config.get_nested(
1155
1261
  (str(self.cloud).lower(), 'specific_reservations'), set()))
1262
+
1263
+ assert (self.cloud is not None and self.instance_type is not None and
1264
+ self.region
1265
+ is not None), ('Cloud, instance type, region must be specified')
1156
1266
  return self.cloud.get_reservations_available_resources(
1157
- self._instance_type, self._region, self._zone,
1158
- specific_reservations)
1267
+ self.instance_type, self.region, self.zone, specific_reservations)
1159
1268
 
1160
1269
  def less_demanding_than(
1161
1270
  self,
@@ -1175,6 +1284,9 @@ class Resources:
1175
1284
  if isinstance(other, list):
1176
1285
  resources_list = [self.less_demanding_than(o) for o in other]
1177
1286
  return requested_num_nodes <= sum(resources_list)
1287
+
1288
+ assert other.cloud is not None, 'Other cloud must be specified'
1289
+
1178
1290
  if self.cloud is not None and not self.cloud.is_same_cloud(other.cloud):
1179
1291
  return False
1180
1292
  # self.cloud <= other.cloud
@@ -1263,6 +1375,7 @@ class Resources:
1263
1375
  If a field in `blocked` is None, it should be considered as a wildcard
1264
1376
  for that field.
1265
1377
  """
1378
+ assert self.cloud is not None, 'Cloud must be specified'
1266
1379
  is_matched = True
1267
1380
  if (blocked.cloud is not None and
1268
1381
  not self.cloud.is_same_cloud(blocked.cloud)):
@@ -1301,7 +1414,7 @@ class Resources:
1301
1414
  use_spot = self.use_spot if self._use_spot_specified else None
1302
1415
 
1303
1416
  current_override_configs = self._cluster_config_overrides
1304
- if self._cluster_config_overrides is None:
1417
+ if current_override_configs is None:
1305
1418
  current_override_configs = {}
1306
1419
  new_override_configs = override.pop('_cluster_config_overrides', {})
1307
1420
  overlaid_configs = skypilot_config.overlay_skypilot_config(
@@ -1314,6 +1427,10 @@ class Resources:
1314
1427
  if elem is not None:
1315
1428
  override_configs.set_nested(key, elem)
1316
1429
 
1430
+ current_autostop_config = None
1431
+ if self.autostop_config is not None:
1432
+ current_autostop_config = self.autostop_config.to_yaml_config()
1433
+
1317
1434
  override_configs = dict(override_configs) if override_configs else None
1318
1435
  resources = Resources(
1319
1436
  cloud=override.pop('cloud', self.cloud),
@@ -1332,6 +1449,7 @@ class Resources:
1332
1449
  disk_tier=override.pop('disk_tier', self.disk_tier),
1333
1450
  ports=override.pop('ports', self.ports),
1334
1451
  labels=override.pop('labels', self.labels),
1452
+ autostop=override.pop('autostop', current_autostop_config),
1335
1453
  _docker_login_config=override.pop('_docker_login_config',
1336
1454
  self._docker_login_config),
1337
1455
  _docker_username_for_runpod=override.pop(
@@ -1529,6 +1647,7 @@ class Resources:
1529
1647
  resources_fields['disk_tier'] = config.pop('disk_tier', None)
1530
1648
  resources_fields['ports'] = config.pop('ports', None)
1531
1649
  resources_fields['labels'] = config.pop('labels', None)
1650
+ resources_fields['autostop'] = config.pop('autostop', None)
1532
1651
  resources_fields['_docker_login_config'] = config.pop(
1533
1652
  '_docker_login_config', None)
1534
1653
  resources_fields['_docker_username_for_runpod'] = config.pop(
@@ -1578,6 +1697,8 @@ class Resources:
1578
1697
  config['disk_tier'] = self.disk_tier.value
1579
1698
  add_if_not_none('ports', self.ports)
1580
1699
  add_if_not_none('labels', self.labels)
1700
+ if self._autostop_config is not None:
1701
+ config['autostop'] = self._autostop_config.to_yaml_config()
1581
1702
  if self._docker_login_config is not None:
1582
1703
  config['_docker_login_config'] = dataclasses.asdict(
1583
1704
  self._docker_login_config)
@@ -1733,4 +1854,42 @@ class Resources:
1733
1854
  self._docker_username_for_runpod = state.pop(
1734
1855
  '_docker_username_for_runpod', None)
1735
1856
 
1857
+ if version < 23:
1858
+ self._autostop_config = None
1859
+
1736
1860
  self.__dict__.update(state)
1861
+
1862
+
1863
+ class LaunchableResources(Resources):
1864
+ """A class representing resources that can be launched on a cloud provider.
1865
+
1866
+ This class is primarily a type hint for MyPy to indicate that an instance
1867
+ of `Resources` is launchable (i.e., `cloud` and `instance_type` are not
1868
+ None). It should not be instantiated directly.
1869
+ """
1870
+
1871
+ def __init__(self, *args, **kwargs) -> None: # pylint: disable=super-init-not-called,unused-argument
1872
+ assert False, (
1873
+ 'LaunchableResources should not be instantiated directly. '
1874
+ 'It is only used for type checking by MyPy.')
1875
+
1876
+ @property
1877
+ def cloud(self) -> clouds.Cloud:
1878
+ assert self._cloud is not None, 'Cloud must be specified'
1879
+ return self._cloud
1880
+
1881
+ @property
1882
+ def instance_type(self) -> str:
1883
+ assert self._instance_type is not None, (
1884
+ 'Instance type must be specified')
1885
+ return self._instance_type
1886
+
1887
+ def copy(self, **override) -> 'LaunchableResources':
1888
+ """Ensure MyPy understands the return type is LaunchableResources.
1889
+
1890
+ This method is not expected to be called at runtime, as
1891
+ LaunchableResources should not be directly instantiated. It primarily
1892
+ serves as a type hint for static analysis.
1893
+ """
1894
+ self.assert_launchable()
1895
+ return typing.cast(LaunchableResources, super().copy(**override))
sky/serve/server/core.py CHANGED
@@ -219,17 +219,12 @@ def up(
219
219
  # whether the service is already running. If the id is the same
220
220
  # with the current job id, we know the service is up and running
221
221
  # for the first time; otherwise it is a name conflict.
222
- controller_idle_minutes_to_autostop, controller_down = (
223
- controller_utils.get_controller_autostop_config(
224
- controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER))
225
222
  # Since the controller may be shared among multiple users, launch the
226
223
  # controller with the API server's user hash.
227
224
  with common.with_server_user_hash():
228
225
  controller_job_id, controller_handle = execution.launch(
229
226
  task=controller_task,
230
227
  cluster_name=controller_name,
231
- idle_minutes_to_autostop=controller_idle_minutes_to_autostop,
232
- down=controller_down,
233
228
  retry_until_up=True,
234
229
  _disable_controller_check=True,
235
230
  )
sky/serve/spot_placer.py CHANGED
@@ -46,6 +46,8 @@ class Location:
46
46
 
47
47
  @classmethod
48
48
  def from_resources(cls, resources: 'resources_lib.Resources') -> 'Location':
49
+ assert resources.cloud is not None, 'Cloud must be specified'
50
+ assert resources.region is not None, 'Region must be specified'
49
51
  return cls(resources.cloud, resources.region, resources.zone)
50
52
 
51
53
  def to_dict(self) -> Dict[str, Any]:
@@ -147,6 +149,7 @@ def _get_possible_location_from_task(task: 'task_lib.Task') -> List[Location]:
147
149
  cloud_str = str(launchable.cloud)
148
150
  region = launchable.region
149
151
  zone = launchable.zone
152
+ assert region is not None, 'Region must be specified'
150
153
  if (cloud_str not in location_requirements and
151
154
  location_requirements):
152
155
  continue