skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/serve/autoscalers.py CHANGED
@@ -42,11 +42,10 @@ class AutoscalerDecision:
42
42
  # TODO(MaoZiming): Add a doc to elaborate on autoscaling policies.
43
43
  def __init__(self, operator: AutoscalerDecisionOperator,
44
44
  target: Union[Optional[Dict[str, Any]], int]):
45
-
46
- assert (operator == AutoscalerDecisionOperator.SCALE_UP and
47
- (target is None or isinstance(target, dict))) or (
48
- operator == AutoscalerDecisionOperator.SCALE_DOWN and
49
- isinstance(target, int))
45
+ if operator == AutoscalerDecisionOperator.SCALE_UP:
46
+ assert (target is None or isinstance(target, dict))
47
+ else:
48
+ assert isinstance(target, int)
50
49
  self.operator = operator
51
50
  self.target = target
52
51
 
@@ -54,9 +53,70 @@ class AutoscalerDecision:
54
53
  return f'AutoscalerDecision({self.operator}, {self.target})'
55
54
 
56
55
 
56
+ def _generate_scale_up_decisions(
57
+ num: int, target: Optional[Dict[str, Any]]) -> List[AutoscalerDecision]:
58
+ return [
59
+ AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP, target)
60
+ for _ in range(num)
61
+ ]
62
+
63
+
64
+ def _generate_scale_down_decisions(
65
+ replica_ids: List[int]) -> List[AutoscalerDecision]:
66
+ return [
67
+ AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN, replica_id)
68
+ for replica_id in replica_ids
69
+ ]
70
+
71
+
72
+ def _select_nonterminal_replicas_to_scale_down(
73
+ num_replica_to_scale_down: int,
74
+ replica_infos: Iterable['replica_managers.ReplicaInfo'],
75
+ ) -> List[int]:
76
+ """Select nonterminal replicas to scale down.
77
+
78
+ We sort the replicas based on the following order:
79
+ 1. Based on the `scale_down_decision_order` of the status. We terminate
80
+ the replicas that is in earlier stage first, as the replicas in
81
+ later stage may become ready soon.
82
+ 2. Based on the version in ascending order, so we scale down the older
83
+ versions first.
84
+ 3. Based on the replica_id in descending order, which is also the order
85
+ of the replicas being launched. We scale down the replicas that are
86
+ launched earlier first, as the replicas that are launched later may
87
+ become ready soon.
88
+
89
+ Args:
90
+ num_replica_to_scale_down: The number of replicas to scale down.
91
+ replica_infos: The list of replica informations to select from.
92
+
93
+ Returns:
94
+ The list of replica ids to scale down.
95
+ """
96
+ replicas = list(replica_infos)
97
+ status_order = serve_state.ReplicaStatus.scale_down_decision_order()
98
+ assert all(info.status in status_order for info in replicas), (
99
+ 'All replicas to scale down should be in provisioning or launched '
100
+ 'status.', replicas)
101
+ replicas = sorted(
102
+ replicas,
103
+ key=lambda info: (
104
+ status_order.index(info.status),
105
+ # version in ascending order
106
+ info.version,
107
+ # replica_id in descending order, i.e. launched order
108
+ -info.replica_id))
109
+ assert len(replicas) >= num_replica_to_scale_down, (
110
+ 'Not enough replicas to scale down. Available replicas: ',
111
+ f'{replicas}, num_replica_to_scale_down: {num_replica_to_scale_down}.')
112
+ return [info.replica_id for info in replicas][:num_replica_to_scale_down]
113
+
114
+
57
115
  class Autoscaler:
58
116
  """Abstract class for autoscalers."""
59
117
 
118
+ # --------------- APIs to implement for custom autoscaler ---------------
119
+
60
120
  def __init__(self, service_name: str,
61
121
  spec: 'service_spec.SkyServiceSpec') -> None:
62
122
  """Initialize the autoscaler.
@@ -67,6 +127,8 @@ class Autoscaler:
67
127
  number of replicas, i.e. min_replicas == max_replicas.
68
128
  target_num_replicas: Target number of replicas output by autoscaler.
69
129
  latest_version: latest version of the service.
130
+ latest_version_ever_ready: The latest version that is ever ready.
131
+ update_mode: Update mode for the service.
70
132
  """
71
133
  self._service_name: str = service_name
72
134
  self.min_replicas: int = spec.min_replicas
@@ -81,6 +143,10 @@ class Autoscaler:
81
143
  self.latest_version_ever_ready: int = self.latest_version - 1
82
144
  self.update_mode = serve_utils.DEFAULT_UPDATE_MODE
83
145
 
146
+ def _calculate_target_num_replicas(self) -> int:
147
+ """Calculate target number of replicas."""
148
+ raise NotImplementedError
149
+
84
150
  def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
85
151
  update_mode: serve_utils.UpdateMode) -> None:
86
152
  if version <= self.latest_version:
@@ -91,9 +157,9 @@ class Autoscaler:
91
157
  self.min_replicas = spec.min_replicas
92
158
  self.max_replicas = (spec.max_replicas if spec.max_replicas is not None
93
159
  else spec.min_replicas)
94
- # Reclip self.target_num_replicas with new min and max replicas.
95
- self.target_num_replicas = max(
96
- self.min_replicas, min(self.max_replicas, self.target_num_replicas))
160
+ # Re-clip self.target_num_replicas with new min and max replicas.
161
+ self.target_num_replicas = self._clip_target_num_replicas(
162
+ self.target_num_replicas)
97
163
  self.update_mode = update_mode
98
164
 
99
165
  def collect_request_information(
@@ -101,223 +167,61 @@ class Autoscaler:
101
167
  """Collect request information from aggregator for autoscaling."""
102
168
  raise NotImplementedError
103
169
 
104
- def evaluate_scaling(
170
+ def _generate_scaling_decisions(
105
171
  self,
106
172
  replica_infos: List['replica_managers.ReplicaInfo'],
107
173
  ) -> List[AutoscalerDecision]:
108
- """Evaluate autoscale options based on replica information."""
174
+ """Generate Autoscaling decisions based on replica information."""
109
175
  raise NotImplementedError
110
176
 
111
- @classmethod
112
- def from_spec(cls, service_name: str,
113
- spec: 'service_spec.SkyServiceSpec') -> 'Autoscaler':
114
- # TODO(MaoZiming): use NAME to get the class.
115
- if spec.use_ondemand_fallback:
116
- return FallbackRequestRateAutoscaler(service_name, spec)
117
- else:
118
- return RequestRateAutoscaler(service_name, spec)
119
-
120
177
  def _dump_dynamic_states(self) -> Dict[str, Any]:
121
178
  """Dump dynamic states from autoscaler."""
122
179
  raise NotImplementedError
123
180
 
124
- def dump_dynamic_states(self) -> Dict[str, Any]:
125
- """Dump dynamic states from autoscaler."""
126
- states = {'latest_version_ever_ready': self.latest_version_ever_ready}
127
- states.update(self._dump_dynamic_states())
128
- return states
129
-
130
181
  def _load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
131
182
  """Load dynamic states to autoscaler."""
132
183
  raise NotImplementedError
133
184
 
134
- def load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
135
- """Load dynamic states to autoscaler."""
136
- self.latest_version_ever_ready = dynamic_states.pop(
137
- 'latest_version_ever_ready', constants.INITIAL_VERSION)
138
- self._load_dynamic_states(dynamic_states)
139
-
140
-
141
- class RequestRateAutoscaler(Autoscaler):
142
- """RequestRateAutoscaler: Autoscale according to request rate.
143
-
144
- Scales when the number of requests per replica in the given interval
145
- is above or below the target qps per replica. The instance can be
146
- either spot or on-demand, but not both.
147
- """
148
-
149
- def __init__(self, service_name: str,
150
- spec: 'service_spec.SkyServiceSpec') -> None:
151
- """Initialize the request rate autoscaler.
185
+ # --------------- Utility Functions ---------------
152
186
 
153
- Variables:
154
- target_qps_per_replica: Target qps per replica for autoscaling.
155
- qps_window_size: Window size for qps calculating.
156
- request_timestamps: All request timestamps within the window.
157
- upscale_counter: counter for upscale number of replicas.
158
- downscale_counter: counter for downscale number of replicas.
159
- scale_up_consecutive_periods: period for scaling up.
160
- scale_down_consecutive_periods: period for scaling down.
187
+ def _clip_target_num_replicas(self, target_num_replicas: int) -> int:
188
+ """Clip target number of replicas with current minimal and maximum
189
+ number of replicas.
161
190
  """
162
- super().__init__(service_name, spec)
163
- self.target_qps_per_replica: Optional[
164
- float] = spec.target_qps_per_replica
165
- self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
166
- self.request_timestamps: List[float] = []
167
- self.upscale_counter: int = 0
168
- self.downscale_counter: int = 0
169
- upscale_delay_seconds = (
170
- spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
171
- else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
172
- self.scale_up_consecutive_periods: int = int(
173
- upscale_delay_seconds /
174
- constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
175
- downscale_delay_seconds = (
176
- spec.downscale_delay_seconds
177
- if spec.downscale_delay_seconds is not None else
178
- constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
179
- self.scale_down_consecutive_periods: int = int(
180
- downscale_delay_seconds /
181
- constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
182
-
183
- def _cal_target_num_replicas_based_on_qps(self) -> int:
184
- # Recalculate target_num_replicas based on QPS.
185
- # Reclip self.target_num_replicas with new min and max replicas.
186
- if self.target_qps_per_replica is None:
187
- return self.min_replicas
188
- target_num_replicas = math.ceil(
189
- len(self.request_timestamps) / self.qps_window_size /
190
- self.target_qps_per_replica)
191
191
  return max(self.min_replicas, min(self.max_replicas,
192
192
  target_num_replicas))
193
193
 
194
- def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
195
- update_mode: serve_utils.UpdateMode) -> None:
196
- super().update_version(version, spec, update_mode)
197
- self.target_qps_per_replica = spec.target_qps_per_replica
198
- upscale_delay_seconds = (
199
- spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
200
- else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
201
- self.scale_up_consecutive_periods = int(
202
- upscale_delay_seconds /
203
- constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
204
- downscale_delay_seconds = (
205
- spec.downscale_delay_seconds
206
- if spec.downscale_delay_seconds is not None else
207
- constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
208
- self.scale_down_consecutive_periods = int(
209
- downscale_delay_seconds /
210
- constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
211
-
212
- # We directly set the target_num_replicas here instead of
213
- # calling `_set_target_num_replica_with_hysteresis` to have the replicas
214
- # quickly scale after each update.
215
- self.target_num_replicas = self._cal_target_num_replicas_based_on_qps()
216
- # Cleanup hysteretic counters.
217
- self.upscale_counter = 0
218
- self.downscale_counter = 0
219
-
220
- def collect_request_information(
221
- self, request_aggregator_info: Dict[str, Any]) -> None:
222
- """Collect request information from aggregator for autoscaling.
223
-
224
- request_aggregator_info should be a dict with the following format:
225
-
226
- {
227
- 'timestamps': [timestamp1 (float), timestamp2 (float), ...]
228
- }
229
- """
230
- self.request_timestamps.extend(
231
- request_aggregator_info.get('timestamps', []))
232
- current_time = time.time()
233
- index = bisect.bisect_left(self.request_timestamps,
234
- current_time - self.qps_window_size)
235
- self.request_timestamps = self.request_timestamps[index:]
236
- logger.info(f'Num of requests in the last {self.qps_window_size} '
237
- f'seconds: {len(self.request_timestamps)}')
238
-
239
- def _set_target_num_replica_with_hysteresis(self) -> None:
240
- """Set target_num_replicas based on request rate with hysteresis."""
241
- # Keep self.target_num_replicas unchange when autoscaling
242
- # is not enabled, i.e. self.target_qps_per_replica is None.
243
- # In this case, self.target_num_replicas will be min_replicas.
244
- if self.target_qps_per_replica is None:
245
- return
246
-
247
- # Convert to requests per second.
248
- target_num_replicas = self._cal_target_num_replicas_based_on_qps()
249
- old_target_num_replicas = self.target_num_replicas
250
-
251
- # Faster scale up when there is no replica.
252
- if self.target_num_replicas == 0:
253
- self.target_num_replicas = target_num_replicas
254
- elif target_num_replicas > self.target_num_replicas:
255
- self.upscale_counter += 1
256
- self.downscale_counter = 0
257
- if self.upscale_counter >= self.scale_up_consecutive_periods:
258
- self.upscale_counter = 0
259
- self.target_num_replicas = target_num_replicas
260
- elif target_num_replicas < self.target_num_replicas:
261
- self.downscale_counter += 1
262
- self.upscale_counter = 0
263
- if self.downscale_counter >= self.scale_down_consecutive_periods:
264
- self.downscale_counter = 0
265
- self.target_num_replicas = target_num_replicas
266
- else:
267
- self.upscale_counter = self.downscale_counter = 0
268
-
269
- num_requests_per_second = len(
270
- self.request_timestamps) / self.qps_window_size
271
- logger.info(
272
- f'Requests per second: {num_requests_per_second}. '
273
- f'Current target number of replicas: {old_target_num_replicas}. '
274
- f'Final target number of replicas: {self.target_num_replicas}. '
275
- f'Upscale counter: {self.upscale_counter}/'
276
- f'{self.scale_up_consecutive_periods}. '
277
- f'Downscale counter: {self.downscale_counter}/'
278
- f'{self.scale_down_consecutive_periods}')
279
-
280
194
  @classmethod
281
- def _select_nonterminal_replicas_to_scale_down(
282
- cls, num_limit: int,
283
- replica_infos: Iterable['replica_managers.ReplicaInfo']
284
- ) -> List[int]:
285
- status_order = serve_state.ReplicaStatus.scale_down_decision_order()
286
- replicas = list(replica_infos)
287
- assert all(info.status in status_order for info in replicas), (
288
- 'All replicas to scale down should be in provisioning or launched '
289
- 'status.', replicas)
290
- replicas = sorted(
291
- replicas,
292
- key=lambda info: (
293
- status_order.index(info.status),
294
- # Sort by version in ascending order, so we scale down the older
295
- # versions first.
296
- info.version,
297
- # Sort `info.replica_id` in descending order so that the
298
- # replicas in the same version starts to provisioning later are
299
- # scaled down first.
300
- -info.replica_id))
301
- assert len(replicas) >= num_limit, (
302
- 'Not enough replicas to scale down.', replicas, num_limit)
303
- return [info.replica_id for info in replicas][:num_limit]
195
+ def from_spec(cls, service_name: str,
196
+ spec: 'service_spec.SkyServiceSpec') -> 'Autoscaler':
197
+ # TODO(MaoZiming): use NAME to get the class.
198
+ if spec.use_ondemand_fallback:
199
+ return FallbackRequestRateAutoscaler(service_name, spec)
200
+ else:
201
+ return RequestRateAutoscaler(service_name, spec)
304
202
 
305
203
  def get_decision_interval(self) -> int:
306
- # Reduce autoscaler interval when target_num_replicas = 0.
307
- # This will happen when min_replicas = 0 and no traffic.
204
+ """Get the decision interval for the autoscaler.
205
+
206
+ We reduce the decision interval when the desired number of replicas is
207
+ 0, to make the service scale faster when the service is not running.
208
+ This will happen when min_replicas = 0 and no traffic.
209
+ """
308
210
  if self.target_num_replicas == 0:
309
211
  return constants.AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS
310
212
  else:
311
213
  return constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS
312
214
 
313
- def select_outdated_replicas_to_scale_down(
314
- self,
315
- replica_infos: List['replica_managers.ReplicaInfo']) -> List[int]:
215
+ def _select_outdated_replicas_to_scale_down(
216
+ self,
217
+ replica_infos: List['replica_managers.ReplicaInfo'],
218
+ active_versions: List[int],
219
+ ) -> List[int]:
316
220
  """Select outdated replicas to scale down."""
317
221
 
318
222
  if self.update_mode == serve_utils.UpdateMode.ROLLING:
319
- latest_ready_replicas = []
320
- old_nonterminal_replicas = []
223
+ latest_ready_replicas: List['replica_managers.ReplicaInfo'] = []
224
+ old_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
321
225
  for info in replica_infos:
322
226
  if info.version == self.latest_version:
323
227
  if info.is_ready:
@@ -346,19 +250,12 @@ class RequestRateAutoscaler(Autoscaler):
346
250
  # `_select_replicas_to_scale_down` will make sure we scale the
347
251
  # replicas in initializing statuses first before scaling down the
348
252
  # READY old replicas.
349
- return self._select_nonterminal_replicas_to_scale_down(
253
+ return _select_nonterminal_replicas_to_scale_down(
350
254
  max(0,
351
255
  len(old_nonterminal_replicas) - num_old_replicas_to_keep),
352
256
  old_nonterminal_replicas,
353
257
  )
354
258
 
355
- # Use the active versions set by replica manager to make sure we only
356
- # scale down the outdated replicas that are not used by the load
357
- # balancer.
358
- record = serve_state.get_service_from_name(self._service_name)
359
- assert record is not None, (f'No service record found for '
360
- f'{self._service_name}')
361
- active_versions = record['active_versions']
362
259
  if not active_versions:
363
260
  # active_versions can be empty when none of the replicas are ready
364
261
  # when the load balancer sync with the controller.
@@ -372,36 +269,35 @@ class RequestRateAutoscaler(Autoscaler):
372
269
  # number of ready new replicas is greater than or equal to the min
373
270
  # replicas instead of the target, to ensure the service being updated
374
271
  # to the latest version faster.
375
- all_replica_ids_to_scale_down: List[int] = []
376
- for info in replica_infos:
377
- if info.version < latest_version_with_min_replicas:
378
- all_replica_ids_to_scale_down.append(info.replica_id)
379
-
380
- return all_replica_ids_to_scale_down
272
+ return [
273
+ info.replica_id
274
+ for info in replica_infos
275
+ if info.version < latest_version_with_min_replicas
276
+ ]
381
277
 
382
- def evaluate_scaling(
278
+ def generate_scaling_decisions(
383
279
  self,
384
280
  replica_infos: List['replica_managers.ReplicaInfo'],
281
+ active_versions: List[int],
385
282
  ) -> List[AutoscalerDecision]:
386
- """Evaluate Autoscaling decisions based on replica information.
387
- If the number of launched replicas is less than the target,
388
- Trigger a scale up. Else, trigger a scale down.
283
+ """Generate Autoscaling decisions based on replica information.
284
+ If the number of launched replicas is less than the target, trigger a
285
+ scale up. Else, trigger a scale down. This function also handles the
286
+ version control of the replicas.
389
287
 
390
288
  For future compatibility, we return a list of AutoscalerDecision.
391
289
  Scale-up could include both spot and on-demand, each with a resource
392
290
  override dict. Active migration could require returning both SCALE_UP
393
291
  and SCALE_DOWN.
394
292
  """
395
- latest_replicas: List['replica_managers.ReplicaInfo'] = []
396
- latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
397
293
 
294
+ # Handle latest version unrecoverable failure first.
295
+ latest_replicas: List['replica_managers.ReplicaInfo'] = []
398
296
  for info in replica_infos:
399
297
  if info.version == self.latest_version:
400
298
  latest_replicas.append(info)
401
- if not info.is_terminal:
402
- latest_nonterminal_replicas.append(info)
403
- if info.is_ready:
404
- self.latest_version_ever_ready = self.latest_version
299
+ if info.is_ready:
300
+ self.latest_version_ever_ready = self.latest_version
405
301
  if self.latest_version_ever_ready < self.latest_version:
406
302
  for info in latest_replicas:
407
303
  if info.status_property.unrecoverable_failure():
@@ -411,55 +307,229 @@ class RequestRateAutoscaler(Autoscaler):
411
307
  # and restart.
412
308
  return []
413
309
 
414
- self._set_target_num_replica_with_hysteresis()
310
+ scaling_decisions = []
415
311
 
416
- scaling_options: List[AutoscalerDecision] = []
417
- all_replica_ids_to_scale_down: List[int] = []
312
+ # If rolling update is in progress, we scale down old replicas based on
313
+ # the number of ready new replicas and the traffic is directed to both
314
+ # old and new replicas. Or, for blue_green update, once there is
315
+ # min_replicas number of ready new replicas, we will direct all traffic
316
+ # to them, we can scale down all old replicas.
317
+ # TODO(MaoZiming,zhwu): corner case: We should make sure the fallback
318
+ # replicas are ready before scaling down the old replicas to avoid the
319
+ # situation that all the ready new replicas are preempted together.
320
+ scaling_decisions.extend(
321
+ _generate_scale_down_decisions(
322
+ self._select_outdated_replicas_to_scale_down(
323
+ replica_infos, active_versions)))
324
+
325
+ # If the latest version is ever ready, we can proceed to generate
326
+ # decisions from the implementations in subclasses.
327
+ scaling_decisions.extend(
328
+ self._generate_scaling_decisions(replica_infos))
418
329
 
419
- # Case 1. If rolling update is in progress, we scale down old replicas
420
- # based on the number of ready new replicas and the traffic is directed
421
- # to both old and new replicas.
422
- # Or, for blue_green update, once there is min_replicas number of ready
423
- # new replicas, we will direct all traffic to them, we can scale down
424
- # all old replicas.
425
- all_replica_ids_to_scale_down.extend(
426
- self.select_outdated_replicas_to_scale_down(replica_infos))
330
+ if not scaling_decisions:
331
+ logger.info('No scaling needed.')
427
332
 
428
- # Case 2. when latest_nonterminal_replicas is less
333
+ return scaling_decisions
334
+
335
+ def dump_dynamic_states(self) -> Dict[str, Any]:
336
+ """Dump dynamic states from autoscaler."""
337
+ states = {'latest_version_ever_ready': self.latest_version_ever_ready}
338
+ states.update(self._dump_dynamic_states())
339
+ return states
340
+
341
+ def load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
342
+ """Load dynamic states to autoscaler."""
343
+ self.latest_version_ever_ready = dynamic_states.pop(
344
+ 'latest_version_ever_ready', constants.INITIAL_VERSION)
345
+ self._load_dynamic_states(dynamic_states)
346
+
347
+
348
+ class _AutoscalerWithHysteresis(Autoscaler):
349
+ """_AutoscalerWithHysteresis: Autoscale with hysteresis.
350
+
351
+ This is an internal class for developing autoscalers with hysteresis. It
352
+ only scales when the number of replicas is above or below the target number
353
+ of replicas for a certain number of consecutive periods.
354
+ """
355
+
356
+ def _setup_thresholds(self, spec: 'service_spec.SkyServiceSpec') -> None:
357
+ upscale_delay_seconds = (
358
+ spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
359
+ else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
360
+ self.scale_up_threshold: int = int(
361
+ upscale_delay_seconds /
362
+ constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
363
+ downscale_delay_seconds = (
364
+ spec.downscale_delay_seconds
365
+ if spec.downscale_delay_seconds is not None else
366
+ constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
367
+ self.scale_down_threshold: int = int(
368
+ downscale_delay_seconds /
369
+ constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
370
+
371
+ def __init__(self, service_name: str,
372
+ spec: 'service_spec.SkyServiceSpec') -> None:
373
+ """Initialize the hysteresis autoscaler.
374
+
375
+ Variables:
376
+ upscale_counter: Counter for upscale decisions of replicas.
377
+ downscale_counter: Counter for downscale decisions of replicas.
378
+ scale_up_threshold: The threshold to trigger a scale up.
379
+ scale_down_threshold: The threshold to trigger a scale down.
380
+ """
381
+ super().__init__(service_name, spec)
382
+ self.upscale_counter: int = 0
383
+ self.downscale_counter: int = 0
384
+ self._setup_thresholds(spec)
385
+
386
+ def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
387
+ update_mode: serve_utils.UpdateMode) -> None:
388
+ super().update_version(version, spec, update_mode)
389
+ # We directly set the target_num_replicas here instead of calling
390
+ # `_set_target_num_replicas_with_hysteresis` to have the replicas
391
+ # quickly scale after each update.
392
+ self.target_num_replicas = self._calculate_target_num_replicas()
393
+ # Cleanup hysteresis counters.
394
+ self.upscale_counter = 0
395
+ self.downscale_counter = 0
396
+ self._setup_thresholds(spec)
397
+
398
+ def _set_target_num_replicas_with_hysteresis(self) -> None:
399
+ """Set target_num_replicas based on request rate with hysteresis."""
400
+ target_num_replicas = self._calculate_target_num_replicas()
401
+ old_target_num_replicas = self.target_num_replicas
402
+
403
+ # Faster scale up when there is no replica.
404
+ if self.target_num_replicas == 0:
405
+ self.target_num_replicas = target_num_replicas
406
+ elif target_num_replicas > self.target_num_replicas:
407
+ self.upscale_counter += 1
408
+ self.downscale_counter = 0
409
+ if self.upscale_counter >= self.scale_up_threshold:
410
+ self.upscale_counter = 0
411
+ self.target_num_replicas = target_num_replicas
412
+ elif target_num_replicas < self.target_num_replicas:
413
+ self.downscale_counter += 1
414
+ self.upscale_counter = 0
415
+ if self.downscale_counter >= self.scale_down_threshold:
416
+ self.downscale_counter = 0
417
+ self.target_num_replicas = target_num_replicas
418
+ else:
419
+ self.upscale_counter = self.downscale_counter = 0
420
+
421
+ logger.info(
422
+ f'Old target number of replicas: {old_target_num_replicas}. '
423
+ f'Current target number of replicas: {target_num_replicas}. '
424
+ f'Final target number of replicas: {self.target_num_replicas}. '
425
+ f'Upscale counter: {self.upscale_counter}/'
426
+ f'{self.scale_up_threshold}. '
427
+ f'Downscale counter: {self.downscale_counter}/'
428
+ f'{self.scale_down_threshold}. ')
429
+
430
+
431
+ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
432
+ """RequestRateAutoscaler: Autoscale according to request rate.
433
+
434
+ Scales when the number of requests per replica in the given interval
435
+ is above or below the target qps per replica. The instance can be
436
+ either spot or on-demand, but not both.
437
+ """
438
+
439
+ def __init__(self, service_name: str,
440
+ spec: 'service_spec.SkyServiceSpec') -> None:
441
+ """Initialize the request rate autoscaler.
442
+
443
+ Variables:
444
+ target_qps_per_replica: Target qps per replica for autoscaling.
445
+ qps_window_size: Window size for qps calculating.
446
+ request_timestamps: All request timestamps within the window.
447
+ """
448
+ super().__init__(service_name, spec)
449
+ self.target_qps_per_replica: Optional[
450
+ float] = spec.target_qps_per_replica
451
+ self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
452
+ self.request_timestamps: List[float] = []
453
+
454
+ def _calculate_target_num_replicas(self) -> int:
455
+ if self.target_qps_per_replica is None:
456
+ return self.min_replicas
457
+ num_requests_per_second = len(
458
+ self.request_timestamps) / self.qps_window_size
459
+ target_num_replicas = math.ceil(num_requests_per_second /
460
+ self.target_qps_per_replica)
461
+ logger.info(f'Requests per second: {num_requests_per_second}. '
462
+ f'Target number of replicas: {target_num_replicas}.')
463
+ return self._clip_target_num_replicas(target_num_replicas)
464
+
465
+ def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
466
+ update_mode: serve_utils.UpdateMode) -> None:
467
+ super().update_version(version, spec, update_mode)
468
+ self.target_qps_per_replica = spec.target_qps_per_replica
469
+
470
+ def collect_request_information(
471
+ self, request_aggregator_info: Dict[str, Any]) -> None:
472
+ """Collect request information from aggregator for autoscaling.
473
+
474
+ request_aggregator_info should be a dict with the following format:
475
+
476
+ {
477
+ 'timestamps': [timestamp1 (float), timestamp2 (float), ...]
478
+ }
479
+ """
480
+ self.request_timestamps.extend(
481
+ request_aggregator_info.get('timestamps', []))
482
+ current_time = time.time()
483
+ index = bisect.bisect_left(self.request_timestamps,
484
+ current_time - self.qps_window_size)
485
+ self.request_timestamps = self.request_timestamps[index:]
486
+ logger.info(f'Num of requests in the last {self.qps_window_size} '
487
+ f'seconds: {len(self.request_timestamps)}')
488
+
489
+ def _generate_scaling_decisions(
490
+ self,
491
+ replica_infos: List['replica_managers.ReplicaInfo'],
492
+ ) -> List[AutoscalerDecision]:
493
+ """Generate Autoscaling decisions based on request rate."""
494
+
495
+ self._set_target_num_replicas_with_hysteresis()
496
+
497
+ latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
498
+
499
+ for info in replica_infos:
500
+ if info.version == self.latest_version:
501
+ if not info.is_terminal:
502
+ latest_nonterminal_replicas.append(info)
503
+
504
+ scaling_decisions: List[AutoscalerDecision] = []
505
+
506
+ # Case 1. when latest_nonterminal_replicas is less
429
507
  # than num_to_provision, we always scale up new replicas.
430
508
  if len(latest_nonterminal_replicas) < self.target_num_replicas:
431
509
  num_replicas_to_scale_up = (self.target_num_replicas -
432
510
  len(latest_nonterminal_replicas))
433
511
  logger.info('Number of replicas to scale up: '
434
512
  f'{num_replicas_to_scale_up}')
435
- for _ in range(num_replicas_to_scale_up):
436
- scaling_options.append(
437
- AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP,
438
- target=None))
513
+ scaling_decisions.extend(
514
+ _generate_scale_up_decisions(num_replicas_to_scale_up, None))
439
515
 
440
- # Case 3: when latest_nonterminal_replicas is more
516
+ # Case 2: when latest_nonterminal_replicas is more
441
517
  # than self.target_num_replicas, we scale down new replicas.
518
+ replicas_to_scale_down = []
442
519
  if len(latest_nonterminal_replicas) > self.target_num_replicas:
443
520
  num_replicas_to_scale_down = (len(latest_nonterminal_replicas) -
444
521
  self.target_num_replicas)
445
522
  replicas_to_scale_down = (
446
- RequestRateAutoscaler.
447
523
  _select_nonterminal_replicas_to_scale_down(
448
- num_limit=num_replicas_to_scale_down,
449
- replica_infos=latest_nonterminal_replicas))
524
+ num_replicas_to_scale_down, latest_nonterminal_replicas))
450
525
  logger.info(
451
526
  'Number of replicas to scale down: '
452
527
  f'{num_replicas_to_scale_down} {replicas_to_scale_down}')
453
- all_replica_ids_to_scale_down.extend(replicas_to_scale_down)
454
528
 
455
- for replica_id in all_replica_ids_to_scale_down:
456
- scaling_options.append(
457
- AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
458
- target=replica_id))
529
+ scaling_decisions.extend(
530
+ _generate_scale_down_decisions(replicas_to_scale_down))
459
531
 
460
- if not scaling_options:
461
- logger.info('No scaling needed.')
462
- return scaling_options
532
+ return scaling_decisions
463
533
 
464
534
  def _dump_dynamic_states(self) -> Dict[str, Any]:
465
535
  return {
@@ -481,16 +551,19 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
481
551
 
482
552
  When spec.base_ondemand_fallback_replicas is set, we make sure
483
553
  there are at least spec.base_ondemand_fallback_replicas on-demands
484
- to be always there to provide basic gurantee for the availability.
554
+ to be always there to provide basic guarantee for the availability.
485
555
 
486
556
  When spec.dynamic_ondemand_fallback is set, on-demand instances
487
557
  will be scheduled to provision for any preempted spot instance, i.e.,
488
558
  on-demand instance are used as dynamic fallback of spot.
489
559
  """
490
560
 
491
- def __init__(self, service_name: str,
492
- spec: 'service_spec.SkyServiceSpec') -> None:
493
- super().__init__(service_name, spec)
561
+ # job_recovery field is checked earlier in core
562
+ SPOT_OVERRIDE = {'use_spot': True}
563
+ ONDEMAND_OVERRIDE = {'use_spot': False}
564
+
565
+ def _setup_fallback_options(self,
566
+ spec: 'service_spec.SkyServiceSpec') -> None:
494
567
  self.base_ondemand_fallback_replicas: int = (
495
568
  spec.base_ondemand_fallback_replicas
496
569
  if spec.base_ondemand_fallback_replicas is not None else 0)
@@ -501,37 +574,42 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
501
574
  spec.dynamic_ondemand_fallback
502
575
  if spec.dynamic_ondemand_fallback is not None else False)
503
576
 
577
+ def __init__(self, service_name: str,
578
+ spec: 'service_spec.SkyServiceSpec') -> None:
579
+ """Initialize the fallback request rate autoscaler.
580
+
581
+ Variables:
582
+ base_ondemand_fallback_replicas: Minimum number of on-demand
583
+ replicas to be always there.
584
+ dynamic_ondemand_fallback: Whether to dynamically provision
585
+ on-demand instances for preempted spot instances.
586
+ """
587
+ super().__init__(service_name, spec)
588
+ self._setup_fallback_options(spec)
589
+
504
590
  def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
505
591
  update_mode: serve_utils.UpdateMode) -> None:
506
592
  super().update_version(version, spec, update_mode=update_mode)
507
- self.base_ondemand_fallback_replicas = (
508
- spec.base_ondemand_fallback_replicas
509
- if spec.base_ondemand_fallback_replicas is not None else 0)
510
- # Assert: Either dynamic_ondemand_fallback is set
511
- # or base_ondemand_fallback_replicas is greater than 0.
512
- assert spec.use_ondemand_fallback
513
- self.dynamic_ondemand_fallback = (spec.dynamic_ondemand_fallback
514
- if spec.dynamic_ondemand_fallback
515
- is not None else False)
593
+ self._setup_fallback_options(spec)
516
594
 
517
- # job_recovery field is checked earlier in core
518
- def _get_spot_resources_override_dict(self) -> Dict[str, Any]:
519
- return {'use_spot': True}
520
-
521
- def _get_ondemand_resources_override_dict(self) -> Dict[str, Any]:
522
- return {'use_spot': False}
523
-
524
- def evaluate_scaling(
595
+ def _generate_scaling_decisions(
525
596
  self,
526
597
  replica_infos: List['replica_managers.ReplicaInfo'],
527
598
  ) -> List[AutoscalerDecision]:
599
+ """Generate Autoscaling decisions based on request rate, with on-demand
600
+ fallback.
601
+
602
+ The autoscaler will make sure there are at least
603
+ `base_ondemand_fallback_replicas` on-demand replicas to be always there,
604
+ so the service can provide basic guarantee for the availability.
605
+ """
606
+
607
+ self._set_target_num_replicas_with_hysteresis()
528
608
 
529
609
  latest_nonterminal_replicas = list(
530
610
  filter(
531
611
  lambda info: not info.is_terminal and info.version == self.
532
612
  latest_version, replica_infos))
533
-
534
- self._set_target_num_replica_with_hysteresis()
535
613
  num_nonterminal_spot, num_ready_spot = 0, 0
536
614
  num_nonterminal_ondemand, num_ready_ondemand = 0, 0
537
615
 
@@ -546,22 +624,14 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
546
624
  num_nonterminal_ondemand += 1
547
625
 
548
626
  logger.info(
549
- 'Number of alive spot instances: '
550
- f'{num_nonterminal_spot}, '
627
+ f'Number of alive spot instances: {num_nonterminal_spot}, '
551
628
  f'Number of ready spot instances: {num_ready_spot}, '
552
- 'Number of alive on-demand instances: '
553
- f' {num_nonterminal_ondemand}, '
629
+ f'Number of alive on-demand instances: {num_nonterminal_ondemand}, '
554
630
  f'Number of ready on-demand instances: {num_ready_ondemand}')
555
631
 
556
- scaling_options: List[AutoscalerDecision] = []
632
+ scaling_decisions: List[AutoscalerDecision] = []
557
633
  all_replica_ids_to_scale_down: List[int] = []
558
634
 
559
- # TODO(MaoZiming,zhwu): coner case: We should make sure the fallback
560
- # replicas are ready before scaling down the old replicas to avoid the
561
- # situation that all the ready new replicas are preempted together.
562
- all_replica_ids_to_scale_down.extend(
563
- self.select_outdated_replicas_to_scale_down(replica_infos))
564
-
565
635
  # Decide how many spot instances to launch.
566
636
  num_spot_to_provision = (self.target_num_replicas -
567
637
  self.base_ondemand_fallback_replicas)
@@ -571,18 +641,15 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
571
641
  num_nonterminal_spot)
572
642
  logger.info('Number of spot instances to scale up: '
573
643
  f'{num_spot_to_scale_up}')
574
- for _ in range(num_spot_to_scale_up):
575
- scaling_options.append(
576
- AutoscalerDecision(
577
- AutoscalerDecisionOperator.SCALE_UP,
578
- target=self._get_spot_resources_override_dict()))
644
+ scaling_decisions.extend(
645
+ _generate_scale_up_decisions(num_spot_to_scale_up,
646
+ self.SPOT_OVERRIDE))
579
647
  elif num_nonterminal_spot > num_spot_to_provision:
580
648
  # Too many spot instances, scale down.
581
649
  # Get the replica to scale down with _select_replicas_to_scale_down
582
650
  num_spot_to_scale_down = (num_nonterminal_spot -
583
651
  num_spot_to_provision)
584
652
  replicas_to_scale_down = (
585
- RequestRateAutoscaler.
586
653
  _select_nonterminal_replicas_to_scale_down(
587
654
  num_spot_to_scale_down,
588
655
  filter(lambda info: info.is_spot,
@@ -606,16 +673,13 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
606
673
  num_nonterminal_ondemand)
607
674
  logger.info('Number of on-demand instances to scale up: '
608
675
  f'{num_ondemand_to_scale_up}')
609
- for _ in range(num_ondemand_to_scale_up):
610
- scaling_options.append(
611
- AutoscalerDecision(
612
- AutoscalerDecisionOperator.SCALE_UP,
613
- target=self._get_ondemand_resources_override_dict()))
676
+ scaling_decisions.extend(
677
+ _generate_scale_up_decisions(num_ondemand_to_scale_up,
678
+ self.ONDEMAND_OVERRIDE))
614
679
  else:
615
680
  num_ondemand_to_scale_down = (num_nonterminal_ondemand -
616
681
  num_ondemand_to_provision)
617
682
  replicas_to_scale_down = (
618
- RequestRateAutoscaler.
619
683
  _select_nonterminal_replicas_to_scale_down(
620
684
  num_ondemand_to_scale_down,
621
685
  filter(lambda info: not info.is_spot,
@@ -626,9 +690,7 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
626
690
 
627
691
  all_replica_ids_to_scale_down.extend(replicas_to_scale_down)
628
692
 
629
- for replica_id in all_replica_ids_to_scale_down:
630
- scaling_options.append(
631
- AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
632
- target=replica_id))
693
+ scaling_decisions.extend(
694
+ _generate_scale_down_decisions(all_replica_ids_to_scale_down))
633
695
 
634
- return scaling_options
696
+ return scaling_decisions