skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'e620ccc418ee69d70e580a703569d7cf0508b0ce'
8
+ _SKYPILOT_COMMIT_SHA = '2619dc8f63f271852a6b00f91381b27428ff1780'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0-dev2024053101'
38
+ __version__ = '1.0.0.dev2025022801'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -75,6 +75,7 @@ def _set_http_proxy_env_vars() -> None:
75
75
 
76
76
 
77
77
  _set_http_proxy_env_vars()
78
+
78
79
  # ----------------------------------------------------------------- #
79
80
 
80
81
  # Keep this order to avoid cyclic imports
@@ -82,38 +83,49 @@ _set_http_proxy_env_vars()
82
83
  from sky import backends
83
84
  from sky import benchmark
84
85
  from sky import clouds
86
+ from sky.admin_policy import AdminPolicy
87
+ from sky.admin_policy import MutatedUserRequest
88
+ from sky.admin_policy import UserRequest
89
+ from sky.client.sdk import api_cancel
90
+ from sky.client.sdk import api_info
91
+ from sky.client.sdk import api_server_logs
92
+ from sky.client.sdk import api_start
93
+ from sky.client.sdk import api_status
94
+ from sky.client.sdk import api_stop
95
+ from sky.client.sdk import autostop
96
+ from sky.client.sdk import cancel
97
+ from sky.client.sdk import cost_report
98
+ from sky.client.sdk import down
99
+ from sky.client.sdk import download_logs
100
+ from sky.client.sdk import exec # pylint: disable=redefined-builtin
101
+ from sky.client.sdk import get
102
+ from sky.client.sdk import job_status
103
+ from sky.client.sdk import launch
104
+ from sky.client.sdk import optimize
105
+ from sky.client.sdk import queue
106
+ from sky.client.sdk import start
107
+ from sky.client.sdk import status
108
+ from sky.client.sdk import stop
109
+ from sky.client.sdk import storage_delete
110
+ from sky.client.sdk import storage_ls
111
+ from sky.client.sdk import stream_and_get
112
+ from sky.client.sdk import tail_logs
85
113
  from sky.clouds.service_catalog import list_accelerators
86
- from sky.core import autostop
87
- from sky.core import cancel
88
- from sky.core import cost_report
89
- from sky.core import down
90
- from sky.core import download_logs
91
- from sky.core import job_status
92
- from sky.core import queue
93
- from sky.core import start
94
- from sky.core import status
95
- from sky.core import stop
96
- from sky.core import storage_delete
97
- from sky.core import storage_ls
98
- from sky.core import tail_logs
99
114
  from sky.dag import Dag
100
115
  from sky.data import Storage
101
116
  from sky.data import StorageMode
102
117
  from sky.data import StoreType
103
- from sky.execution import exec # pylint: disable=redefined-builtin
104
- from sky.execution import launch
105
- # TODO (zhwu): These imports are for backward compatibility, and spot APIs
106
- # should be called with `sky.spot.xxx` instead. Remove in release 0.8.0
107
- from sky.jobs.core import spot_cancel
108
- from sky.jobs.core import spot_launch
109
- from sky.jobs.core import spot_queue
110
- from sky.jobs.core import spot_tail_logs
118
+ from sky.jobs import ManagedJobStatus
111
119
  from sky.optimizer import Optimizer
112
- from sky.optimizer import OptimizeTarget
113
120
  from sky.resources import Resources
114
121
  from sky.skylet.job_lib import JobStatus
115
- from sky.status_lib import ClusterStatus
116
122
  from sky.task import Task
123
+ from sky.utils.common import OptimizeTarget
124
+ from sky.utils.common import StatusRefreshMode
125
+ from sky.utils.config_utils import Config
126
+ from sky.utils.registry import CLOUD_REGISTRY
127
+ from sky.utils.registry import JOBS_RECOVERY_STRATEGY_REGISTRY
128
+ from sky.utils.status_lib import ClusterStatus
117
129
 
118
130
  # Aliases.
119
131
  IBM = clouds.IBM
@@ -124,12 +136,14 @@ GCP = clouds.GCP
124
136
  Lambda = clouds.Lambda
125
137
  SCP = clouds.SCP
126
138
  Kubernetes = clouds.Kubernetes
139
+ K8s = Kubernetes
127
140
  OCI = clouds.OCI
128
141
  Paperspace = clouds.Paperspace
129
142
  RunPod = clouds.RunPod
143
+ Vast = clouds.Vast
130
144
  Vsphere = clouds.Vsphere
131
145
  Fluidstack = clouds.Fluidstack
132
- optimize = Optimizer.optimize
146
+ Nebius = clouds.Nebius
133
147
 
134
148
  __all__ = [
135
149
  '__version__',
@@ -139,13 +153,16 @@ __all__ = [
139
153
  'GCP',
140
154
  'IBM',
141
155
  'Kubernetes',
156
+ 'K8s',
142
157
  'Lambda',
143
158
  'OCI',
144
159
  'Paperspace',
145
160
  'RunPod',
161
+ 'Vast',
146
162
  'SCP',
147
163
  'Vsphere',
148
164
  'Fluidstack',
165
+ 'Nebius',
149
166
  'Optimizer',
150
167
  'OptimizeTarget',
151
168
  'backends',
@@ -157,14 +174,16 @@ __all__ = [
157
174
  'StoreType',
158
175
  'ClusterStatus',
159
176
  'JobStatus',
177
+ 'ManagedJobStatus',
178
+ 'StatusRefreshMode',
160
179
  # APIs
161
180
  'Dag',
162
181
  'Task',
163
182
  'Resources',
164
- # execution APIs
183
+ # core APIs
184
+ 'optimize',
165
185
  'launch',
166
186
  'exec',
167
- 'spot_launch',
168
187
  # core APIs
169
188
  'status',
170
189
  'start',
@@ -176,13 +195,26 @@ __all__ = [
176
195
  'queue',
177
196
  'cancel',
178
197
  'tail_logs',
179
- 'spot_tail_logs',
180
198
  'download_logs',
181
199
  'job_status',
182
- # core APIs Spot Job Management
183
- 'spot_queue',
184
- 'spot_cancel',
185
200
  # core APIs Storage Management
186
201
  'storage_ls',
187
202
  'storage_delete',
203
+ # API server APIs
204
+ 'get',
205
+ 'stream_and_get',
206
+ 'api_status',
207
+ 'api_cancel',
208
+ 'api_info',
209
+ 'api_start',
210
+ 'api_stop',
211
+ 'api_server_logs',
212
+ # Admin Policy
213
+ 'UserRequest',
214
+ 'MutatedUserRequest',
215
+ 'AdminPolicy',
216
+ 'Config',
217
+ # Registry
218
+ 'CLOUD_REGISTRY',
219
+ 'JOBS_RECOVERY_STRATEGY_REGISTRY',
188
220
  ]
sky/adaptors/aws.py CHANGED
@@ -35,6 +35,7 @@ import time
35
35
  from typing import Any, Callable
36
36
 
37
37
  from sky.adaptors import common
38
+ from sky.utils import annotations
38
39
  from sky.utils import common_utils
39
40
 
40
41
  _IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for AWS. '
@@ -59,7 +60,7 @@ class _ThreadLocalLRUCache(threading.local):
59
60
 
60
61
  def __init__(self, maxsize=32):
61
62
  super().__init__()
62
- self.cache = functools.lru_cache(maxsize=maxsize)
63
+ self.cache = annotations.lru_cache(scope='global', maxsize=maxsize)
63
64
 
64
65
 
65
66
  def _thread_local_lru_cache(maxsize=32):
@@ -120,9 +121,17 @@ def _create_aws_object(creation_fn_or_cls: Callable[[], Any],
120
121
  # The LRU cache needs to be thread-local to avoid multiple threads sharing the
121
122
  # same session object, which is not guaranteed to be thread-safe.
122
123
  @_thread_local_lru_cache()
123
- def session():
124
+ def session(check_credentials: bool = True):
124
125
  """Create an AWS session."""
125
- return _create_aws_object(boto3.session.Session, 'session')
126
+ s = _create_aws_object(boto3.session.Session, 'session')
127
+ if check_credentials and s.get_credentials() is None:
128
+ # s.get_credentials() can be None if there are actually no credentials,
129
+ # or if we fail to get credentials from IMDS (e.g. due to throttling).
130
+ # Technically, it could be okay to have no credentials, as certain AWS
131
+ # APIs don't actually need them. But afaik everything we use AWS for
132
+ # needs credentials.
133
+ raise botocore_exceptions().NoCredentialsError()
134
+ return s
126
135
 
127
136
 
128
137
  # Avoid caching the resource/client objects. If we are using the assumed role,
@@ -149,11 +158,15 @@ def resource(service_name: str, **kwargs):
149
158
  config = botocore_config().Config(
150
159
  retries={'max_attempts': max_attempts})
151
160
  kwargs['config'] = config
161
+
162
+ check_credentials = kwargs.pop('check_credentials', True)
163
+
152
164
  # Need to use the client retrieved from the per-thread session to avoid
153
165
  # thread-safety issues (Directly creating the client with boto3.resource()
154
166
  # is not thread-safe). Reference: https://stackoverflow.com/a/59635814
155
167
  return _create_aws_object(
156
- lambda: session().resource(service_name, **kwargs), 'resource')
168
+ lambda: session(check_credentials=check_credentials).resource(
169
+ service_name, **kwargs), 'resource')
157
170
 
158
171
 
159
172
  def client(service_name: str, **kwargs):
@@ -164,12 +177,16 @@ def client(service_name: str, **kwargs):
164
177
  kwargs: Other options.
165
178
  """
166
179
  _assert_kwargs_builtin_type(kwargs)
180
+
181
+ check_credentials = kwargs.pop('check_credentials', True)
182
+
167
183
  # Need to use the client retrieved from the per-thread session to avoid
168
184
  # thread-safety issues (Directly creating the client with boto3.client() is
169
185
  # not thread-safe). Reference: https://stackoverflow.com/a/59635814
170
186
 
171
- return _create_aws_object(lambda: session().client(service_name, **kwargs),
172
- 'client')
187
+ return _create_aws_object(
188
+ lambda: session(check_credentials=check_credentials).client(
189
+ service_name, **kwargs), 'client')
173
190
 
174
191
 
175
192
  @common.load_lazy_modules(modules=_LAZY_MODULES)