skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/resources.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Resources: compute requirements of Tasks."""
2
2
  import dataclasses
3
- import functools
4
3
  import textwrap
5
4
  from typing import Any, Dict, List, Optional, Set, Tuple, Union
6
5
 
@@ -9,15 +8,17 @@ import colorama
9
8
  from sky import check as sky_check
10
9
  from sky import clouds
11
10
  from sky import exceptions
12
- from sky import jobs as managed_jobs
13
11
  from sky import sky_logging
14
12
  from sky import skypilot_config
15
13
  from sky.clouds import service_catalog
16
14
  from sky.provision import docker_utils
15
+ from sky.provision.kubernetes import utils as kubernetes_utils
17
16
  from sky.skylet import constants
18
17
  from sky.utils import accelerator_registry
18
+ from sky.utils import annotations
19
19
  from sky.utils import common_utils
20
20
  from sky.utils import log_utils
21
+ from sky.utils import registry
21
22
  from sky.utils import resources_utils
22
23
  from sky.utils import schemas
23
24
  from sky.utils import ux_utils
@@ -32,7 +33,7 @@ class Resources:
32
33
 
33
34
  This class is immutable once created (to ensure some validations are done
34
35
  whenever properties change). To update the property of an instance of
35
- Resources, use `resources.copy(**new_properties)`.
36
+ Resources, use ``resources.copy(**new_properties)``.
36
37
 
37
38
  Used:
38
39
 
@@ -44,7 +45,7 @@ class Resources:
44
45
  """
45
46
  # If any fields changed, increment the version. For backward compatibility,
46
47
  # modify the __setstate__ method to handle the old version.
47
- _VERSION = 18
48
+ _VERSION = 22
48
49
 
49
50
  def __init__(
50
51
  self,
@@ -55,7 +56,7 @@ class Resources:
55
56
  accelerators: Union[None, str, Dict[str, int]] = None,
56
57
  accelerator_args: Optional[Dict[str, str]] = None,
57
58
  use_spot: Optional[bool] = None,
58
- job_recovery: Optional[str] = None,
59
+ job_recovery: Optional[Union[Dict[str, Union[str, int]], str]] = None,
59
60
  region: Optional[str] = None,
60
61
  zone: Optional[str] = None,
61
62
  image_id: Union[Dict[str, str], str, None] = None,
@@ -66,8 +67,10 @@ class Resources:
66
67
  # Internal use only.
67
68
  # pylint: disable=invalid-name
68
69
  _docker_login_config: Optional[docker_utils.DockerLoginConfig] = None,
70
+ _docker_username_for_runpod: Optional[str] = None,
69
71
  _is_image_managed: Optional[bool] = None,
70
72
  _requires_fuse: Optional[bool] = None,
73
+ _cluster_config_overrides: Optional[Dict[str, Any]] = None,
71
74
  ):
72
75
  """Initialize a Resources object.
73
76
 
@@ -110,6 +113,12 @@ class Resources:
110
113
  job to recover the cluster from preemption. Refer to
111
114
  `recovery_strategy module <https://github.com/skypilot-org/skypilot/blob/master/sky/jobs/recovery_strategy.py>`__ # pylint: disable=line-too-long
112
115
  for more details.
116
+ When a dict is provided, it can have the following fields:
117
+
118
+ - strategy: the recovery strategy to use.
119
+ - max_restarts_on_errors: the max number of restarts on user code
120
+ errors.
121
+
113
122
  region: the region to use.
114
123
  zone: the zone to use.
115
124
  image_id: the image ID to use. If a str, must be a string
@@ -140,6 +149,9 @@ class Resources:
140
149
  _docker_login_config: the docker configuration to use. This includes
141
150
  the docker username, password, and registry server. If None, skip
142
151
  docker login.
152
+ _docker_username_for_runpod: the login username for the docker
153
+ containers. This is used by RunPod to set the ssh user for the
154
+ docker containers.
143
155
  _requires_fuse: whether the task requires FUSE mounting support. This
144
156
  is used internally by certain cloud implementations to do additional
145
157
  setup for FUSE mounting. This flag also safeguards against using
@@ -152,18 +164,27 @@ class Resources:
152
164
  """
153
165
  self._version = self._VERSION
154
166
  self._cloud = cloud
155
- self._region: Optional[str] = None
156
- self._zone: Optional[str] = None
157
- self._validate_and_set_region_zone(region, zone)
167
+ self._region: Optional[str] = region
168
+ self._zone: Optional[str] = zone
158
169
 
159
170
  self._instance_type = instance_type
160
171
 
161
172
  self._use_spot_specified = use_spot is not None
162
173
  self._use_spot = use_spot if use_spot is not None else False
163
- self._job_recovery = None
174
+ self._job_recovery: Optional[Dict[str, Union[str, int]]] = None
164
175
  if job_recovery is not None:
165
- if job_recovery.strip().lower() != 'none':
166
- self._job_recovery = job_recovery.upper()
176
+ if isinstance(job_recovery, str):
177
+ job_recovery = {'strategy': job_recovery}
178
+ if 'strategy' not in job_recovery:
179
+ job_recovery['strategy'] = None
180
+
181
+ strategy_name = job_recovery['strategy']
182
+ if strategy_name == 'none':
183
+ self._job_recovery = None
184
+ else:
185
+ if strategy_name is not None:
186
+ job_recovery['strategy'] = strategy_name.upper()
187
+ self._job_recovery = job_recovery
167
188
 
168
189
  if disk_size is not None:
169
190
  if round(disk_size) != disk_size:
@@ -174,8 +195,6 @@ class Resources:
174
195
  else:
175
196
  self._disk_size = _DEFAULT_DISK_SIZE_GB
176
197
 
177
- # self._image_id is a dict of {region: image_id}.
178
- # The key is None if the same image_id applies for all regions.
179
198
  self._image_id = image_id
180
199
  if isinstance(image_id, str):
181
200
  self._image_id = {self._region: image_id.strip()}
@@ -216,12 +235,25 @@ class Resources:
216
235
 
217
236
  self._docker_login_config = _docker_login_config
218
237
 
238
+ # TODO(andyl): This ctor param seems to be unused.
239
+ # We always use `Task.set_resources` and `Resources.copy` to set the
240
+ # `docker_username_for_runpod`. But to keep the consistency with
241
+ # `_docker_login_config`, we keep it here.
242
+ self._docker_username_for_runpod = _docker_username_for_runpod
243
+
219
244
  self._requires_fuse = _requires_fuse
220
245
 
246
+ self._cluster_config_overrides = _cluster_config_overrides
247
+ self._cached_repr = None
248
+
221
249
  self._set_cpus(cpus)
222
250
  self._set_memory(memory)
223
251
  self._set_accelerators(accelerators, accelerator_args)
224
252
 
253
+ def validate(self):
254
+ """Validate the resources and infer the missing fields if possible."""
255
+ self._try_canonicalize_accelerators()
256
+ self._try_validate_and_set_region_zone()
225
257
  self._try_validate_instance_type()
226
258
  self._try_validate_cpus_mem()
227
259
  self._try_validate_managed_job_attributes()
@@ -260,6 +292,8 @@ class Resources:
260
292
  >>> sky.Resources(disk_size=100)
261
293
  <Cloud>(disk_size=100)
262
294
  """
295
+ if self._cached_repr is not None:
296
+ return self._cached_repr
263
297
  accelerators = ''
264
298
  accelerator_args = ''
265
299
  if self.accelerators is not None:
@@ -319,7 +353,8 @@ class Resources:
319
353
  if self.cloud is not None:
320
354
  cloud_str = f'{self.cloud}'
321
355
 
322
- return f'{cloud_str}({hardware_str})'
356
+ self._cached_repr = f'{cloud_str}({hardware_str})'
357
+ return self._cached_repr
323
358
 
324
359
  @property
325
360
  def repr_with_region_zone(self) -> str:
@@ -353,7 +388,7 @@ class Resources:
353
388
  return self._instance_type
354
389
 
355
390
  @property
356
- @functools.lru_cache(maxsize=1)
391
+ @annotations.lru_cache(scope='global', maxsize=1)
357
392
  def cpus(self) -> Optional[str]:
358
393
  """Returns the number of vCPUs that each instance must have.
359
394
 
@@ -387,8 +422,8 @@ class Resources:
387
422
  return self._memory
388
423
 
389
424
  @property
390
- @functools.lru_cache(maxsize=1)
391
- def accelerators(self) -> Optional[Dict[str, int]]:
425
+ @annotations.lru_cache(scope='global', maxsize=1)
426
+ def accelerators(self) -> Optional[Dict[str, Union[int, float]]]:
392
427
  """Returns the accelerators field directly or by inferring.
393
428
 
394
429
  For example, Resources(AWS, 'p3.2xlarge') has its accelerators field
@@ -415,7 +450,7 @@ class Resources:
415
450
  return self._use_spot_specified
416
451
 
417
452
  @property
418
- def job_recovery(self) -> Optional[str]:
453
+ def job_recovery(self) -> Optional[Dict[str, Union[str, int]]]:
419
454
  return self._job_recovery
420
455
 
421
456
  @property
@@ -448,10 +483,20 @@ class Resources:
448
483
  return False
449
484
  return self._requires_fuse
450
485
 
486
+ @property
487
+ def cluster_config_overrides(self) -> Dict[str, Any]:
488
+ if self._cluster_config_overrides is None:
489
+ return {}
490
+ return self._cluster_config_overrides
491
+
451
492
  @requires_fuse.setter
452
493
  def requires_fuse(self, value: Optional[bool]) -> None:
453
494
  self._requires_fuse = value
454
495
 
496
+ @property
497
+ def docker_username_for_runpod(self) -> Optional[str]:
498
+ return self._docker_username_for_runpod
499
+
455
500
  def _set_cpus(
456
501
  self,
457
502
  cpus: Union[None, int, float, str],
@@ -513,7 +558,7 @@ class Resources:
513
558
  if memory_gb <= 0:
514
559
  with ux_utils.print_exception_no_traceback():
515
560
  raise ValueError(
516
- f'The "cpus" field should be positive. Found: {memory!r}')
561
+ f'The "memory" field should be positive. Found: {memory!r}')
517
562
 
518
563
  def _set_accelerators(
519
564
  self,
@@ -546,36 +591,49 @@ class Resources:
546
591
  with ux_utils.print_exception_no_traceback():
547
592
  raise ValueError(parse_error) from None
548
593
 
549
- # Canonicalize the accelerator names.
550
- accelerators = {
551
- accelerator_registry.canonicalize_accelerator_name(
552
- acc, self._cloud): acc_count
553
- for acc, acc_count in accelerators.items()
554
- }
555
-
556
594
  acc, _ = list(accelerators.items())[0]
557
595
  if 'tpu' in acc.lower():
558
596
  if self.cloud is None:
559
- self._cloud = clouds.GCP()
560
- assert self.cloud.is_same_cloud(
561
- clouds.GCP()), 'Cloud must be GCP.'
597
+ if kubernetes_utils.is_tpu_on_gke(acc):
598
+ self._cloud = clouds.Kubernetes()
599
+ else:
600
+ self._cloud = clouds.GCP()
601
+ assert (self.cloud.is_same_cloud(clouds.GCP()) or
602
+ self.cloud.is_same_cloud(clouds.Kubernetes())), (
603
+ 'Cloud must be GCP or Kubernetes for TPU '
604
+ 'accelerators.')
605
+
562
606
  if accelerator_args is None:
563
607
  accelerator_args = {}
608
+
564
609
  use_tpu_vm = accelerator_args.get('tpu_vm', True)
565
- if self.instance_type is not None and use_tpu_vm:
566
- if self.instance_type != 'TPU-VM':
567
- with ux_utils.print_exception_no_traceback():
568
- raise ValueError(
569
- 'Cannot specify instance type'
570
- f' (got "{self.instance_type}") for TPU VM.')
571
- if 'runtime_version' not in accelerator_args:
572
- if use_tpu_vm:
573
- accelerator_args['runtime_version'] = 'tpu-vm-base'
574
- else:
575
- accelerator_args['runtime_version'] = '2.12.0'
576
- logger.info(
577
- 'Missing runtime_version in accelerator_args, using'
578
- f' default ({accelerator_args["runtime_version"]})')
610
+ if (self.cloud.is_same_cloud(clouds.GCP()) and
611
+ not kubernetes_utils.is_tpu_on_gke(acc)):
612
+ if 'runtime_version' not in accelerator_args:
613
+
614
+ def _get_default_runtime_version() -> str:
615
+ if not use_tpu_vm:
616
+ return '2.12.0'
617
+ # TPU V5 requires a newer runtime version.
618
+ if acc.startswith('tpu-v5'):
619
+ return 'v2-alpha-tpuv5'
620
+ # TPU V6e requires a newer runtime version.
621
+ elif acc.startswith('tpu-v6e'):
622
+ return 'v2-alpha-tpuv6e'
623
+ return 'tpu-vm-base'
624
+
625
+ accelerator_args['runtime_version'] = (
626
+ _get_default_runtime_version())
627
+ logger.info(
628
+ 'Missing runtime_version in accelerator_args, using'
629
+ f' default ({accelerator_args["runtime_version"]})')
630
+
631
+ if self.instance_type is not None and use_tpu_vm:
632
+ if self.instance_type != 'TPU-VM':
633
+ with ux_utils.print_exception_no_traceback():
634
+ raise ValueError(
635
+ 'Cannot specify instance type (got '
636
+ f'{self.instance_type!r}) for TPU VM.')
579
637
 
580
638
  self._accelerators = accelerators
581
639
  self._accelerator_args = accelerator_args
@@ -588,15 +646,30 @@ class Resources:
588
646
  assert self.is_launchable(), self
589
647
  return self.cloud.need_cleanup_after_preemption_or_failure(self)
590
648
 
591
- def _validate_and_set_region_zone(self, region: Optional[str],
592
- zone: Optional[str]) -> None:
649
+ def _try_canonicalize_accelerators(self) -> None:
650
+ """Try to canonicalize the accelerators attribute.
651
+
652
+ We don't canonicalize accelerators during creation of Resources object
653
+ because it may check Kubernetes accelerators online. It requires
654
+ Kubernetes credentias which may not be available locally when a remote
655
+ API server is used.
656
+ """
657
+ if self._accelerators is None:
658
+ return
659
+ self._accelerators = {
660
+ accelerator_registry.canonicalize_accelerator_name(
661
+ acc, self._cloud): acc_count
662
+ for acc, acc_count in self._accelerators.items()
663
+ }
664
+
665
+ def _try_validate_and_set_region_zone(self) -> None:
593
666
  """Try to validate and set the region and zone attribute.
594
667
 
595
668
  Raises:
596
669
  ValueError: if the attributes are invalid.
597
670
  exceptions.NoCloudAccessError: if no public cloud is enabled.
598
671
  """
599
- if region is None and zone is None:
672
+ if self._region is None and self._zone is None:
600
673
  return
601
674
 
602
675
  if self._cloud is None:
@@ -608,13 +681,13 @@ class Resources:
608
681
  cloud_to_errors = {}
609
682
  for cloud in enabled_clouds:
610
683
  try:
611
- cloud.validate_region_zone(region, zone)
684
+ cloud.validate_region_zone(self._region, self._zone)
612
685
  except ValueError as e:
613
686
  cloud_to_errors[repr(cloud)] = e
614
687
  continue
615
688
  valid_clouds.append(cloud)
616
689
 
617
- if len(valid_clouds) == 0:
690
+ if not valid_clouds:
618
691
  if len(enabled_clouds) == 1:
619
692
  cloud_str = f'for cloud {enabled_clouds[0]}'
620
693
  else:
@@ -632,23 +705,24 @@ class Resources:
632
705
  table.add_row([str(cloud), reason_str])
633
706
  hint = table.get_string()
634
707
  raise ValueError(
635
- f'Invalid (region {region!r}, zone {zone!r}) '
636
- f'{cloud_str}. Details:\n{hint}')
708
+ f'Invalid (region {self._region!r}, zone '
709
+ f'{self._zone!r}) {cloud_str}. Details:\n{hint}')
637
710
  elif len(valid_clouds) > 1:
638
711
  with ux_utils.print_exception_no_traceback():
639
712
  raise ValueError(
640
- f'Cannot infer cloud from (region {region!r}, zone '
641
- f'{zone!r}). Multiple enabled clouds have region/zone '
642
- f'of the same names: {valid_clouds}. '
713
+ f'Cannot infer cloud from (region {self._region!r}, '
714
+ f'zone {self._zone!r}). Multiple enabled clouds '
715
+ f'have region/zone of the same names: {valid_clouds}. '
643
716
  f'To fix: explicitly specify `cloud`.')
644
717
  logger.debug(f'Cloud is not specified, using {valid_clouds[0]} '
645
- f'inferred from region {region!r} and zone {zone!r}')
718
+ f'inferred from region {self._region!r} and zone '
719
+ f'{self._zone!r}')
646
720
  self._cloud = valid_clouds[0]
647
721
 
648
722
  # Validate if region and zone exist in the catalog, and set the region
649
723
  # if zone is specified.
650
724
  self._region, self._zone = self._cloud.validate_region_zone(
651
- region, zone)
725
+ self._region, self._zone)
652
726
 
653
727
  def get_valid_regions_for_launchable(self) -> List[clouds.Region]:
654
728
  """Returns a set of `Region`s that can provision this Resources.
@@ -726,7 +800,7 @@ class Resources:
726
800
  for cloud in enabled_clouds:
727
801
  if cloud.instance_type_exists(self._instance_type):
728
802
  valid_clouds.append(cloud)
729
- if len(valid_clouds) == 0:
803
+ if not valid_clouds:
730
804
  if len(enabled_clouds) == 1:
731
805
  cloud_str = f'for cloud {enabled_clouds[0]}'
732
806
  else:
@@ -797,14 +871,11 @@ class Resources:
797
871
  Raises:
798
872
  ValueError: if the attributes are invalid.
799
873
  """
800
- if self._job_recovery is None:
874
+ if self._job_recovery is None or self._job_recovery['strategy'] is None:
801
875
  return
802
- if self._job_recovery not in managed_jobs.RECOVERY_STRATEGIES:
803
- with ux_utils.print_exception_no_traceback():
804
- raise ValueError(
805
- f'Spot recovery strategy {self._job_recovery} '
806
- 'is not supported. The strategy should be among '
807
- f'{list(managed_jobs.RECOVERY_STRATEGIES.keys())}')
876
+ # Validate the job recovery strategy
877
+ registry.JOBS_RECOVERY_STRATEGY_REGISTRY.from_str(
878
+ self._job_recovery['strategy'])
808
879
 
809
880
  def extract_docker_image(self) -> Optional[str]:
810
881
  if self.image_id is None:
@@ -826,12 +897,6 @@ class Resources:
826
897
 
827
898
  if self.extract_docker_image() is not None:
828
899
  # TODO(tian): validate the docker image exists / of reasonable size
829
- if self.accelerators is not None:
830
- for acc in self.accelerators.keys():
831
- if acc.lower().startswith('tpu'):
832
- with ux_utils.print_exception_no_traceback():
833
- raise ValueError(
834
- 'Docker image is not supported for TPU VM.')
835
900
  if self.cloud is not None:
836
901
  self.cloud.check_features_are_supported(
837
902
  self, {clouds.CloudImplementationFeatures.DOCKER_IMAGE})
@@ -920,12 +985,6 @@ class Resources:
920
985
  """
921
986
  if self.ports is None:
922
987
  return
923
- if skypilot_config.get_nested(('aws', 'security_group_name'),
924
- None) is not None:
925
- with ux_utils.print_exception_no_traceback():
926
- raise ValueError(
927
- 'Cannot specify ports when AWS security group name is '
928
- 'specified.')
929
988
  if self.cloud is not None:
930
989
  self.cloud.check_features_are_supported(
931
990
  self, {clouds.CloudImplementationFeatures.OPEN_PORTS})
@@ -956,21 +1015,23 @@ class Resources:
956
1015
  """
957
1016
  if not self._labels:
958
1017
  return
959
-
960
- if self.cloud is None:
961
- # Because each cloud has its own label format, we cannot validate
962
- # the labels without knowing the cloud.
963
- with ux_utils.print_exception_no_traceback():
964
- raise ValueError(
965
- 'Cloud must be specified when labels are provided.')
966
-
967
- # Check if the label key value pairs are valid.
1018
+ if self.cloud is not None:
1019
+ validated_clouds = [self.cloud]
1020
+ else:
1021
+ # If no specific cloud is set, validate label against ALL clouds.
1022
+ # The label will be dropped if invalid for any one of the cloud
1023
+ validated_clouds = sky_check.get_cached_enabled_clouds_or_refresh()
968
1024
  invalid_table = log_utils.create_table(['Label', 'Reason'])
969
1025
  for key, value in self._labels.items():
970
- valid, err_msg = self.cloud.is_label_valid(key, value)
971
- if not valid:
972
- invalid_table.add_row([f'{key}: {value}', err_msg])
973
- if len(invalid_table.rows) > 0:
1026
+ for cloud in validated_clouds:
1027
+ valid, err_msg = cloud.is_label_valid(key, value)
1028
+ if not valid:
1029
+ invalid_table.add_row([
1030
+ f'{key}: {value}',
1031
+ f'Label rejected due to {cloud}: {err_msg}'
1032
+ ])
1033
+ break
1034
+ if invalid_table.rows:
974
1035
  with ux_utils.print_exception_no_traceback():
975
1036
  raise ValueError(
976
1037
  'The following labels are invalid:'
@@ -1000,9 +1061,10 @@ class Resources:
1000
1061
  def get_spot_str(self) -> str:
1001
1062
  return '[Spot]' if self.use_spot else ''
1002
1063
 
1003
- def make_deploy_variables(self, cluster_name_on_cloud: str,
1064
+ def make_deploy_variables(self, cluster_name: resources_utils.ClusterName,
1004
1065
  region: clouds.Region,
1005
1066
  zones: Optional[List[clouds.Zone]],
1067
+ num_nodes: int,
1006
1068
  dryrun: bool) -> Dict[str, Optional[str]]:
1007
1069
  """Converts planned sky.Resources to resource variables.
1008
1070
 
@@ -1011,13 +1073,48 @@ class Resources:
1011
1073
  cloud.make_deploy_resources_variables() method, and the cloud-agnostic
1012
1074
  variables are generated by this method.
1013
1075
  """
1014
- cloud_specific_variables = self.cloud.make_deploy_resources_variables(
1015
- self, cluster_name_on_cloud, region, zones, dryrun)
1076
+ # Initial setup commands
1077
+ initial_setup_commands = []
1078
+ if (skypilot_config.get_nested(
1079
+ ('nvidia_gpus', 'disable_ecc'),
1080
+ False,
1081
+ override_configs=self.cluster_config_overrides) and
1082
+ self.accelerators is not None):
1083
+ initial_setup_commands = [constants.DISABLE_GPU_ECC_COMMAND]
1084
+
1016
1085
  docker_image = self.extract_docker_image()
1086
+
1087
+ # Cloud specific variables
1088
+ cloud_specific_variables = self.cloud.make_deploy_resources_variables(
1089
+ self, cluster_name, region, zones, num_nodes, dryrun)
1090
+
1091
+ # TODO(andyl): Should we print some warnings if users' envs share
1092
+ # same names with the cloud specific variables, but not enabled
1093
+ # since it's not on the particular cloud?
1094
+
1095
+ # Docker run options
1096
+ docker_run_options = skypilot_config.get_nested(
1097
+ ('docker', 'run_options'),
1098
+ default_value=[],
1099
+ override_configs=self.cluster_config_overrides)
1100
+ if isinstance(docker_run_options, str):
1101
+ docker_run_options = [docker_run_options]
1102
+ # Special accelerator runtime might require additional docker run
1103
+ # options. e.g., for TPU, we need --privileged.
1104
+ if 'docker_run_options' in cloud_specific_variables:
1105
+ docker_run_options.extend(
1106
+ cloud_specific_variables['docker_run_options'])
1107
+ if docker_run_options and isinstance(self.cloud, clouds.Kubernetes):
1108
+ logger.warning(
1109
+ f'{colorama.Style.DIM}Docker run options are specified, '
1110
+ 'but ignored for Kubernetes: '
1111
+ f'{" ".join(docker_run_options)}'
1112
+ f'{colorama.Style.RESET_ALL}')
1017
1113
  return dict(
1018
1114
  cloud_specific_variables,
1019
1115
  **{
1020
1116
  # Docker config
1117
+ 'docker_run_options': docker_run_options,
1021
1118
  # Docker image. The image name used to pull the image, e.g.
1022
1119
  # ubuntu:latest.
1023
1120
  'docker_image': docker_image,
@@ -1027,7 +1124,9 @@ class Resources:
1027
1124
  constants.DEFAULT_DOCKER_CONTAINER_NAME,
1028
1125
  # Docker login config (if any). This helps pull the image from
1029
1126
  # private registries.
1030
- 'docker_login_config': self._docker_login_config
1127
+ 'docker_login_config': self._docker_login_config,
1128
+ # Initial setup commands.
1129
+ 'initial_setup_commands': initial_setup_commands,
1031
1130
  })
1032
1131
 
1033
1132
  def get_reservations_available_resources(self) -> Dict[str, int]:
@@ -1169,17 +1268,17 @@ class Resources:
1169
1268
  def is_empty(self) -> bool:
1170
1269
  """Is this Resources an empty request (all fields None)?"""
1171
1270
  return all([
1172
- self.cloud is None,
1271
+ self._cloud is None,
1173
1272
  self._instance_type is None,
1174
1273
  self._cpus is None,
1175
- self.memory is None,
1176
- self.accelerators is None,
1177
- self.accelerator_args is None,
1274
+ self._memory is None,
1275
+ self._accelerators is None,
1276
+ self._accelerator_args is None,
1178
1277
  not self._use_spot_specified,
1179
- self.disk_size == _DEFAULT_DISK_SIZE_GB,
1180
- self.disk_tier is None,
1278
+ self._disk_size == _DEFAULT_DISK_SIZE_GB,
1279
+ self._disk_tier is None,
1181
1280
  self._image_id is None,
1182
- self.ports is None,
1281
+ self._ports is None,
1183
1282
  self._docker_login_config is None,
1184
1283
  ])
1185
1284
 
@@ -1205,11 +1304,16 @@ class Resources:
1205
1304
  labels=override.pop('labels', self.labels),
1206
1305
  _docker_login_config=override.pop('_docker_login_config',
1207
1306
  self._docker_login_config),
1307
+ _docker_username_for_runpod=override.pop(
1308
+ '_docker_username_for_runpod',
1309
+ self._docker_username_for_runpod),
1208
1310
  _is_image_managed=override.pop('_is_image_managed',
1209
1311
  self._is_image_managed),
1210
1312
  _requires_fuse=override.pop('_requires_fuse', self._requires_fuse),
1313
+ _cluster_config_overrides=override.pop(
1314
+ '_cluster_config_overrides', self._cluster_config_overrides),
1211
1315
  )
1212
- assert len(override) == 0
1316
+ assert not override
1213
1317
  return resources
1214
1318
 
1215
1319
  def valid_on_region_zones(self, region: str, zones: List[str]) -> bool:
@@ -1337,7 +1441,7 @@ class Resources:
1337
1441
  def _from_yaml_config_single(cls, config: Dict[str, str]) -> 'Resources':
1338
1442
 
1339
1443
  resources_fields = {}
1340
- resources_fields['cloud'] = clouds.CLOUD_REGISTRY.from_str(
1444
+ resources_fields['cloud'] = registry.CLOUD_REGISTRY.from_str(
1341
1445
  config.pop('cloud', None))
1342
1446
  resources_fields['instance_type'] = config.pop('instance_type', None)
1343
1447
  resources_fields['cpus'] = config.pop('cpus', None)
@@ -1364,9 +1468,13 @@ class Resources:
1364
1468
  resources_fields['labels'] = config.pop('labels', None)
1365
1469
  resources_fields['_docker_login_config'] = config.pop(
1366
1470
  '_docker_login_config', None)
1471
+ resources_fields['_docker_username_for_runpod'] = config.pop(
1472
+ '_docker_username_for_runpod', None)
1367
1473
  resources_fields['_is_image_managed'] = config.pop(
1368
1474
  '_is_image_managed', None)
1369
1475
  resources_fields['_requires_fuse'] = config.pop('_requires_fuse', None)
1476
+ resources_fields['_cluster_config_overrides'] = config.pop(
1477
+ '_cluster_config_overrides', None)
1370
1478
 
1371
1479
  if resources_fields['cpus'] is not None:
1372
1480
  resources_fields['cpus'] = str(resources_fields['cpus'])
@@ -1393,7 +1501,7 @@ class Resources:
1393
1501
  add_if_not_none('instance_type', self.instance_type)
1394
1502
  add_if_not_none('cpus', self._cpus)
1395
1503
  add_if_not_none('memory', self.memory)
1396
- add_if_not_none('accelerators', self.accelerators)
1504
+ add_if_not_none('accelerators', self._accelerators)
1397
1505
  add_if_not_none('accelerator_args', self.accelerator_args)
1398
1506
 
1399
1507
  if self._use_spot_specified:
@@ -1410,6 +1518,11 @@ class Resources:
1410
1518
  if self._docker_login_config is not None:
1411
1519
  config['_docker_login_config'] = dataclasses.asdict(
1412
1520
  self._docker_login_config)
1521
+ if self._docker_username_for_runpod is not None:
1522
+ config['_docker_username_for_runpod'] = (
1523
+ self._docker_username_for_runpod)
1524
+ add_if_not_none('_cluster_config_overrides',
1525
+ self._cluster_config_overrides)
1413
1526
  if self._is_image_managed is not None:
1414
1527
  config['_is_image_managed'] = self._is_image_managed
1415
1528
  if self._requires_fuse is not None:
@@ -1525,4 +1638,36 @@ class Resources:
1525
1638
  if version < 18:
1526
1639
  self._job_recovery = state.pop('_spot_recovery', None)
1527
1640
 
1641
+ if version < 19:
1642
+ self._cluster_config_overrides = state.pop(
1643
+ '_cluster_config_overrides', None)
1644
+
1645
+ if version < 20:
1646
+ # Pre-0.7.0, we used 'kubernetes' as the default region for
1647
+ # Kubernetes clusters. With the introduction of support for
1648
+ # multiple contexts, we now set the region to the context name.
1649
+ # Since we do not have information on which context the cluster
1650
+ # was run in, we default it to the current active context.
1651
+ legacy_region = clouds.Kubernetes().LEGACY_SINGLETON_REGION
1652
+ original_cloud = state.get('_cloud', None)
1653
+ original_region = state.get('_region', None)
1654
+ if (isinstance(original_cloud, clouds.Kubernetes) and
1655
+ original_region == legacy_region):
1656
+ current_context = (
1657
+ kubernetes_utils.get_current_kube_config_context_name())
1658
+ state['_region'] = current_context
1659
+ # Also update the image_id dict if it contains the old region
1660
+ if isinstance(state['_image_id'], dict):
1661
+ if legacy_region in state['_image_id']:
1662
+ state['_image_id'][current_context] = (
1663
+ state['_image_id'][legacy_region])
1664
+ del state['_image_id'][legacy_region]
1665
+
1666
+ if version < 21:
1667
+ self._cached_repr = None
1668
+
1669
+ if version < 22:
1670
+ self._docker_username_for_runpod = state.pop(
1671
+ '_docker_username_for_runpod', None)
1672
+
1528
1673
  self.__dict__.update(state)