skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,20 @@
1
1
  """Utility functions for resources."""
2
+ import dataclasses
2
3
  import enum
3
4
  import itertools
5
+ import json
6
+ import math
4
7
  import re
5
8
  import typing
6
- from typing import List, Optional, Set
9
+ from typing import Dict, List, Optional, Set, Union
7
10
 
11
+ from sky import skypilot_config
12
+ from sky.utils import registry
8
13
  from sky.utils import ux_utils
9
14
 
10
15
  if typing.TYPE_CHECKING:
11
16
  from sky import backends
17
+ from sky import resources as resources_lib
12
18
 
13
19
  _PORT_RANGE_HINT_MSG = ('Invalid port range {}. Please use the format '
14
20
  '"from-to", in which from <= to. e.g. "1-3".')
@@ -22,6 +28,7 @@ class DiskTier(enum.Enum):
22
28
  LOW = 'low'
23
29
  MEDIUM = 'medium'
24
30
  HIGH = 'high'
31
+ ULTRA = 'ultra'
25
32
  BEST = 'best'
26
33
 
27
34
  @classmethod
@@ -43,6 +50,18 @@ class DiskTier(enum.Enum):
43
50
  return types.index(self) <= types.index(other)
44
51
 
45
52
 
53
+ @dataclasses.dataclass
54
+ class ClusterName:
55
+ display_name: str
56
+ name_on_cloud: str
57
+
58
+ def __repr__(self) -> str:
59
+ return repr(self.display_name)
60
+
61
+ def __str__(self) -> str:
62
+ return self.display_name
63
+
64
+
46
65
  def check_port_str(port: str) -> None:
47
66
  if not port.isdigit():
48
67
  with ux_utils.print_exception_no_traceback():
@@ -118,29 +137,82 @@ def simplify_ports(ports: List[str]) -> List[str]:
118
137
  return port_set_to_ranges(port_ranges_to_set(ports))
119
138
 
120
139
 
140
+ def format_resource(resource: 'resources_lib.Resources',
141
+ simplify: bool = False) -> str:
142
+ if simplify:
143
+ cloud = resource.cloud
144
+ if resource.accelerators is None:
145
+ vcpu, _ = cloud.get_vcpus_mem_from_instance_type(
146
+ resource.instance_type)
147
+ hardware = f'vCPU={int(vcpu)}'
148
+ else:
149
+ hardware = f'{resource.accelerators}'
150
+ spot = '[Spot]' if resource.use_spot else ''
151
+ return f'{cloud}({spot}{hardware})'
152
+ else:
153
+ # accelerator_args is way too long.
154
+ # Convert from:
155
+ # GCP(n1-highmem-8, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '2.12.0'} # pylint: disable=line-too-long
156
+ # to:
157
+ # GCP(n1-highmem-8, {'tpu-v2-8': 1}...)
158
+ pattern = ', accelerator_args={.*}'
159
+ launched_resource_str = re.sub(pattern, '...', str(resource))
160
+ return launched_resource_str
161
+
162
+
121
163
  def get_readable_resources_repr(handle: 'backends.CloudVmRayResourceHandle',
122
164
  simplify: bool = False) -> str:
123
165
  if (handle.launched_nodes is not None and
124
166
  handle.launched_resources is not None):
125
- if simplify:
126
- cloud = handle.launched_resources.cloud
127
- if handle.launched_resources.accelerators is None:
128
- vcpu, _ = cloud.get_vcpus_mem_from_instance_type(
129
- handle.launched_resources.instance_type)
130
- hardware = f'vCPU={int(vcpu)}'
131
- else:
132
- hardware = f'{handle.launched_resources.accelerators}'
133
- spot = '[Spot]' if handle.launched_resources.use_spot else ''
134
- return f'{handle.launched_nodes}x {cloud}({spot}{hardware})'
135
- else:
136
- launched_resource_str = str(handle.launched_resources)
137
- # accelerator_args is way too long.
138
- # Convert from:
139
- # GCP(n1-highmem-8, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '2.12.0'} # pylint: disable=line-too-long
140
- # to:
141
- # GCP(n1-highmem-8, {'tpu-v2-8': 1}...)
142
- pattern = ', accelerator_args={.*}'
143
- launched_resource_str = re.sub(pattern, '...',
144
- launched_resource_str)
145
- return f'{handle.launched_nodes}x {launched_resource_str}'
167
+ return (f'{handle.launched_nodes}x '
168
+ f'{format_resource(handle.launched_resources, simplify)}')
146
169
  return _DEFAULT_MESSAGE_HANDLE_INITIALIZING
170
+
171
+
172
+ def make_ray_custom_resources_str(
173
+ resource_dict: Optional[Dict[str, Union[int, float]]]) -> Optional[str]:
174
+ """Convert resources to Ray custom resources format."""
175
+ if resource_dict is None:
176
+ return None
177
+ # Ray does not allow fractional resources, so we need to ceil the values.
178
+ ceiled_dict = {k: math.ceil(v) for k, v in resource_dict.items()}
179
+ return json.dumps(ceiled_dict, separators=(',', ':'))
180
+
181
+
182
+ @dataclasses.dataclass
183
+ class FeasibleResources:
184
+ """Feasible resources returned by cloud.
185
+
186
+ Used to represent a collection of feasible resources returned by cloud,
187
+ any fuzzy candidates, and optionally a string hint if no feasible resources
188
+ are found.
189
+
190
+ Fuzzy candidates example: when the requested GPU is A100:1 but is not
191
+ available in a cloud/region, the fuzzy candidates are results of a fuzzy
192
+ search in the catalog that are offered in the location. E.g.,
193
+ ['A100-80GB:1', 'A100-80GB:2', 'A100-80GB:4', 'A100:8']
194
+ """
195
+ resources_list: List['resources_lib.Resources']
196
+ fuzzy_candidate_list: List[str]
197
+ hint: Optional[str]
198
+
199
+
200
+ def need_to_query_reservations() -> bool:
201
+ """Checks if we need to query reservations from cloud APIs.
202
+
203
+ We need to query reservations if:
204
+ - The cloud has specific reservations.
205
+ - The cloud prioritizes reservations over on-demand instances.
206
+
207
+ This is useful to skip the potentially expensive reservation query for
208
+ clouds that do not use reservations.
209
+ """
210
+ for cloud_str in registry.CLOUD_REGISTRY.keys():
211
+ cloud_specific_reservations = skypilot_config.get_nested(
212
+ (cloud_str, 'specific_reservations'), None)
213
+ cloud_prioritize_reservations = skypilot_config.get_nested(
214
+ (cloud_str, 'prioritize_reservations'), False)
215
+ if (cloud_specific_reservations is not None or
216
+ cloud_prioritize_reservations):
217
+ return True
218
+ return False
sky/utils/rich_utils.py CHANGED
@@ -1,16 +1,107 @@
1
1
  """Rich status spinner utils."""
2
2
  import contextlib
3
+ import enum
4
+ import logging
3
5
  import threading
4
- from typing import Union
6
+ import typing
7
+ from typing import Dict, Iterator, Optional, Tuple, Union
5
8
 
6
9
  import rich.console as rich_console
7
10
 
8
- console = rich_console.Console()
9
- _status = None
11
+ from sky.utils import annotations
12
+ from sky.utils import message_utils
13
+
14
+ if typing.TYPE_CHECKING:
15
+ import requests
16
+
17
+ console = rich_console.Console(soft_wrap=True)
18
+ _statuses: Dict[str, Optional[Union['EncodedStatus',
19
+ 'rich_console.Status']]] = {
20
+ 'server': None,
21
+ 'client': None,
22
+ }
23
+ _status_nesting_level = 0
10
24
 
11
25
  _logging_lock = threading.RLock()
12
26
 
13
27
 
28
+ class Control(enum.Enum):
29
+ """Control codes for the status spinner."""
30
+ INIT = 'rich_init'
31
+ START = 'rich_start'
32
+ STOP = 'rich_stop'
33
+ EXIT = 'rich_exit'
34
+ UPDATE = 'rich_update'
35
+
36
+ def encode(self, msg: str) -> str:
37
+ return f'<{self.value}>{msg}</{self.value}>'
38
+
39
+ @classmethod
40
+ def decode(cls, encoded_msg: str) -> Tuple[Optional['Control'], str]:
41
+ # Find the control code
42
+ control_str = None
43
+ for control in cls:
44
+ if f'<{control.value}>' in encoded_msg:
45
+ control_str = control.value
46
+ encoded_msg = encoded_msg.replace(f'<{control.value}>', '')
47
+ encoded_msg = encoded_msg.replace(f'</{control.value}>', '')
48
+ break
49
+ else:
50
+ return None, encoded_msg
51
+ return cls(control_str), encoded_msg
52
+
53
+
54
+ class EncodedStatusMessage:
55
+ """A class to encode status messages."""
56
+
57
+ def __init__(self, msg: str):
58
+ self.msg = msg
59
+
60
+ def init(self) -> str:
61
+ return message_utils.encode_payload(Control.INIT.encode(self.msg))
62
+
63
+ def enter(self) -> str:
64
+ return message_utils.encode_payload(Control.START.encode(self.msg))
65
+
66
+ def exit(self) -> str:
67
+ return message_utils.encode_payload(Control.EXIT.encode(''))
68
+
69
+ def update(self, msg: str) -> str:
70
+ return message_utils.encode_payload(Control.UPDATE.encode(msg))
71
+
72
+ def stop(self) -> str:
73
+ return message_utils.encode_payload(Control.STOP.encode(''))
74
+
75
+ def start(self) -> str:
76
+ return message_utils.encode_payload(Control.START.encode(self.msg))
77
+
78
+
79
+ class EncodedStatus:
80
+ """A class to encode status messages."""
81
+
82
+ def __init__(self, msg: str):
83
+ self.status = msg
84
+ self.encoded_msg = EncodedStatusMessage(msg)
85
+ print(self.encoded_msg.init(), end='', flush=True)
86
+
87
+ def __enter__(self):
88
+ print(self.encoded_msg.enter(), end='', flush=True)
89
+ return self
90
+
91
+ def __exit__(self, exc_type, exc_val, exc_tb):
92
+ print(self.encoded_msg.exit(), end='', flush=True)
93
+
94
+ def update(self, msg: str):
95
+ self.status = msg
96
+ print(self.encoded_msg.update(msg), end='', flush=True)
97
+
98
+ def stop(self):
99
+ print(self.encoded_msg.stop(), end='', flush=True)
100
+
101
+ def start(self):
102
+ print(self.encoded_msg.start(), end='', flush=True)
103
+
104
+
14
105
  class _NoOpConsoleStatus:
15
106
  """An empty class for multi-threaded console.status."""
16
107
 
@@ -30,34 +121,172 @@ class _NoOpConsoleStatus:
30
121
  pass
31
122
 
32
123
 
124
+ # TODO(SKY-1216): we need a wrapper for the rich.progress in our code as well.
125
+ class _RevertibleStatus:
126
+ """A wrapper for status that can revert to previous message after exit."""
127
+
128
+ def __init__(self, message: str, status_type: str):
129
+ self.previous_message = None
130
+ self.status_type = status_type
131
+ status = _statuses[status_type]
132
+ if status is not None:
133
+ self.previous_message = status.status
134
+ self.message = message
135
+
136
+ def __enter__(self):
137
+ global _status_nesting_level
138
+ _statuses[self.status_type].update(self.message)
139
+ _status_nesting_level += 1
140
+ _statuses[self.status_type].__enter__()
141
+ return _statuses[self.status_type]
142
+
143
+ def __exit__(self, exc_type, exc_val, exc_tb):
144
+ global _status_nesting_level
145
+ _status_nesting_level -= 1
146
+ if _status_nesting_level <= 0:
147
+ _status_nesting_level = 0
148
+ if _statuses[self.status_type] is not None:
149
+ _statuses[self.status_type].__exit__(exc_type, exc_val, exc_tb)
150
+ _statuses[self.status_type] = None
151
+ else:
152
+ _statuses[self.status_type].update(self.previous_message)
153
+
154
+ def update(self, *args, **kwargs):
155
+ _statuses[self.status_type].update(*args, **kwargs)
156
+
157
+ def stop(self):
158
+ _statuses[self.status_type].stop()
159
+
160
+ def start(self):
161
+ _statuses[self.status_type].start()
162
+
163
+
33
164
  def safe_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
34
165
  """A wrapper for multi-threaded console.status."""
35
166
  from sky import sky_logging # pylint: disable=import-outside-toplevel
36
- if (threading.current_thread() is threading.main_thread() and
167
+ if (annotations.is_on_api_server and
168
+ threading.current_thread() is threading.main_thread() and
37
169
  not sky_logging.is_silent()):
38
- global _status
39
- if _status is None:
40
- _status = console.status(msg)
41
- _status.update(msg)
42
- return _status
170
+ if _statuses['server'] is None:
171
+ _statuses['server'] = EncodedStatus(msg)
172
+ return _RevertibleStatus(msg, 'server')
43
173
  return _NoOpConsoleStatus()
44
174
 
45
175
 
176
+ def stop_safe_status():
177
+ """Stops all nested statuses.
178
+
179
+ This is useful when we need to stop all statuses, e.g., when we are going to
180
+ stream logs from user program and do not want it to interfere with the
181
+ spinner display.
182
+ """
183
+ if (threading.current_thread() is threading.main_thread() and
184
+ _statuses['server'] is not None):
185
+ _statuses['server'].stop()
186
+
187
+
46
188
  def force_update_status(msg: str):
47
189
  """Update the status message even if sky_logging.is_silent() is true."""
48
190
  if (threading.current_thread() is threading.main_thread() and
49
- _status is not None):
50
- _status.update(msg)
191
+ _statuses['server'] is not None):
192
+ _statuses['server'].update(msg)
51
193
 
52
194
 
53
195
  @contextlib.contextmanager
54
196
  def safe_logger():
55
- logged = False
56
197
  with _logging_lock:
57
- if _status is not None and _status._live.is_started: # pylint: disable=protected-access
58
- _status.stop()
59
- yield
60
- logged = True
61
- _status.start()
62
- if not logged:
198
+ client_status_obj = _statuses['client']
199
+
200
+ client_status_live = (client_status_obj is not None and
201
+ client_status_obj._live.is_started) # pylint: disable=protected-access
202
+ if client_status_live:
203
+ client_status_obj.stop()
63
204
  yield
205
+ if client_status_live:
206
+ client_status_obj.start()
207
+
208
+
209
+ class RichSafeStreamHandler(logging.StreamHandler):
210
+
211
+ def emit(self, record: logging.LogRecord) -> None:
212
+ with safe_logger():
213
+ return super().emit(record)
214
+
215
+
216
+ def client_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
217
+ """A wrapper for multi-threaded console.status."""
218
+ from sky import sky_logging # pylint: disable=import-outside-toplevel
219
+ if (threading.current_thread() is threading.main_thread() and
220
+ not sky_logging.is_silent()):
221
+ if _statuses['client'] is None:
222
+ _statuses['client'] = console.status(msg)
223
+ return _RevertibleStatus(msg, 'client')
224
+ return _NoOpConsoleStatus()
225
+
226
+
227
+ def decode_rich_status(
228
+ response: 'requests.Response') -> Iterator[Optional[str]]:
229
+ """Decode the rich status message from the response."""
230
+ decoding_status = None
231
+ try:
232
+ last_line = ''
233
+ # Iterate over the response content in chunks. We do not use iter_lines
234
+ # because it will strip the trailing newline characters, causing the
235
+ # progress bar ending with `\r` becomes a pyramid.
236
+ for encoded_msg in response.iter_content(chunk_size=None):
237
+ if encoded_msg is None:
238
+ return
239
+ encoded_msg = encoded_msg.decode('utf-8')
240
+ lines = encoded_msg.splitlines(keepends=True)
241
+
242
+ lines[0] = last_line + lines[0]
243
+ last_line = lines[-1]
244
+ # If the last line is not ended with `\r` or `\n` (with ending
245
+ # spaces stripped), it means the last line is not a complete line.
246
+ # We keep the last line in the buffer and continue.
247
+ if (not last_line.strip(' ').endswith('\r') and
248
+ not last_line.strip(' ').endswith('\n')):
249
+ lines = lines[:-1]
250
+ else:
251
+ # Reset the buffer for the next line, as the last line is a
252
+ # complete line.
253
+ last_line = ''
254
+
255
+ for line in lines:
256
+ if line.endswith('\r\n'):
257
+ # Replace `\r\n` with `\n`, as printing a line ends with
258
+ # `\r\n` in linux will cause the line to be empty.
259
+ line = line[:-2] + '\n'
260
+ is_payload, line = message_utils.decode_payload(
261
+ line, raise_for_mismatch=False)
262
+ control = None
263
+ if is_payload:
264
+ control, encoded_status = Control.decode(line)
265
+ if control is None:
266
+ yield line
267
+ continue
268
+
269
+ # control is not None, i.e. it is a rich status control message.
270
+ if threading.current_thread() is not threading.main_thread():
271
+ yield None
272
+ continue
273
+ if control == Control.INIT:
274
+ decoding_status = client_status(encoded_status)
275
+ else:
276
+ if decoding_status is None:
277
+ # status may not be initialized if a user use --tail for
278
+ # sky api logs.
279
+ continue
280
+ assert decoding_status is not None, (
281
+ f'Rich status not initialized: {line}')
282
+ if control == Control.UPDATE:
283
+ decoding_status.update(encoded_status)
284
+ elif control == Control.STOP:
285
+ decoding_status.stop()
286
+ elif control == Control.EXIT:
287
+ decoding_status.__exit__(None, None, None)
288
+ elif control == Control.START:
289
+ decoding_status.start()
290
+ finally:
291
+ if decoding_status is not None:
292
+ decoding_status.__exit__(None, None, None)