skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,531 @@
1
+ """SCP instance provisioning."""
2
+
3
+ import logging
4
+ import random
5
+ import string
6
+ import time
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+
9
+ from sky.clouds.utils import scp_utils
10
+ from sky.provision import common
11
+ from sky.utils import status_lib
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def run_instances(region: str, cluster_name: str, cluster_name_on_cloud: str,
17
+ config: common.ProvisionConfig) -> common.ProvisionRecord:
18
+ del cluster_name # unused
19
+ zone_id = config.node_config['zone_id']
20
+ running_instances = _filter_instances(cluster_name_on_cloud, ['RUNNING'])
21
+ head_instance_id = _get_head_instance_id(running_instances)
22
+
23
+ to_start_count = config.count - len(running_instances)
24
+ if to_start_count < 0:
25
+ raise RuntimeError(
26
+ f'Cluster {cluster_name_on_cloud} already has '
27
+ f'{len(running_instances)} nodes, but {config.count} are required.')
28
+
29
+ if to_start_count == 0:
30
+ if head_instance_id is None:
31
+ raise RuntimeError(
32
+ f'Cluster {cluster_name_on_cloud} has no head node.')
33
+ logger.info(f'Cluster {cluster_name_on_cloud} already has '
34
+ f'{len(running_instances)} nodes, no need to start more.')
35
+ return common.ProvisionRecord(provider_name='scp',
36
+ cluster_name=cluster_name_on_cloud,
37
+ region=region,
38
+ zone=None,
39
+ head_instance_id=head_instance_id,
40
+ resumed_instance_ids=[],
41
+ created_instance_ids=[])
42
+
43
+ stopped_instances = _filter_instances(cluster_name_on_cloud, ['STOPPED'])
44
+ if to_start_count <= len(stopped_instances):
45
+ head_instance_id = _get_head_instance_id(stopped_instances)
46
+ scp_utils.SCPClient().start_instance(head_instance_id)
47
+ while True:
48
+ instance_info = scp_utils.SCPClient().get_instance_info(
49
+ head_instance_id)
50
+ if instance_info['virtualServerState'] == 'RUNNING':
51
+ break
52
+ time.sleep(2)
53
+ resumed_instance_ids = [head_instance_id]
54
+ return common.ProvisionRecord(provider_name='scp',
55
+ cluster_name=cluster_name_on_cloud,
56
+ region=region,
57
+ zone=None,
58
+ head_instance_id=head_instance_id,
59
+ resumed_instance_ids=resumed_instance_ids,
60
+ created_instance_ids=[])
61
+
62
+ # SCP does not support multi-node
63
+ instance_config = config.docker_config
64
+ instance_config['virtualServerName'] = cluster_name_on_cloud
65
+
66
+ instance_id = None
67
+ vpc_subnets = _get_or_create_vpc_subnets(zone_id)
68
+ for vpc, subnets in vpc_subnets.items():
69
+ sg_id = _create_security_group(zone_id, vpc)
70
+ if sg_id is None:
71
+ continue
72
+ try:
73
+ instance_config['securityGroupIds'] = [sg_id]
74
+ for subnet in subnets:
75
+ instance_config['nic']['subnetId'] = subnet
76
+ instance_id = _create_instance(vpc, instance_config)
77
+ if instance_id is not None:
78
+ break
79
+ except Exception as e: # pylint: disable=broad-except
80
+ _delete_security_group(sg_id)
81
+ logger.error(f'run_instances error: {e}')
82
+ continue
83
+
84
+ if instance_id is None:
85
+ raise RuntimeError('instance creation error')
86
+
87
+ if head_instance_id is None:
88
+ head_instance_id = instance_id
89
+
90
+ created_instance_ids = [instance_id]
91
+
92
+ return common.ProvisionRecord(provider_name='scp',
93
+ cluster_name=cluster_name_on_cloud,
94
+ region=region,
95
+ zone=None,
96
+ head_instance_id=head_instance_id,
97
+ resumed_instance_ids=[],
98
+ created_instance_ids=created_instance_ids)
99
+
100
+
101
+ def _get_or_create_vpc_subnets(zone_id):
102
+ while len(_get_vcp_subnets(zone_id)) == 0:
103
+ try:
104
+ response = scp_utils.SCPClient().create_vpc(zone_id)
105
+ time.sleep(5)
106
+ vpc_id = response['resourceId']
107
+ while True:
108
+ vpc_info = scp_utils.SCPClient().get_vpc_info(vpc_id)
109
+ if vpc_info['vpcState'] == 'ACTIVE':
110
+ break
111
+ else:
112
+ time.sleep(5)
113
+
114
+ response = scp_utils.SCPClient().create_subnet(vpc_id, zone_id)
115
+ time.sleep(5)
116
+ subnet_id = response['resourceId']
117
+ while True:
118
+ subnet_info = scp_utils.SCPClient().get_subnet_info(subnet_id)
119
+ if subnet_info['subnetState'] == 'ACTIVE':
120
+ break
121
+ else:
122
+ time.sleep(5)
123
+
124
+ response = scp_utils.SCPClient().create_internet_gateway(vpc_id)
125
+ time.sleep(5)
126
+ internet_gateway_id = response['resourceId']
127
+ while True:
128
+ internet_gateway_info = scp_utils.SCPClient(
129
+ ).get_internet_gateway_info(internet_gateway_id)
130
+ if internet_gateway_info['internetGatewayState'] == 'ATTACHED':
131
+ break
132
+ else:
133
+ time.sleep(5)
134
+
135
+ while True:
136
+ vpc_info = scp_utils.SCPClient().get_vpc_info(vpc_id)
137
+ if vpc_info['vpcState'] == 'ACTIVE':
138
+ break
139
+ else:
140
+ time.sleep(5)
141
+
142
+ break
143
+ except Exception as e: # pylint: disable=broad-except
144
+ time.sleep(10)
145
+ logger.error(f'vpc creation error: {e}')
146
+ continue
147
+
148
+ vpc_subnets = _get_vcp_subnets(zone_id)
149
+ return vpc_subnets
150
+
151
+
152
+ def _get_vcp_subnets(zone_id):
153
+ vpc_contents = scp_utils.SCPClient().get_vpcs(zone_id)
154
+ vpc_list = [
155
+ item['vpcId'] for item in vpc_contents if item['vpcState'] == 'ACTIVE'
156
+ ]
157
+
158
+ igw_contents = scp_utils.SCPClient().get_internet_gateway()
159
+ vpc_with_igw = [
160
+ item['vpcId']
161
+ for item in igw_contents
162
+ if item['internetGatewayState'] == 'ATTACHED'
163
+ ]
164
+
165
+ vpc_list = [vpc for vpc in vpc_list if vpc in vpc_with_igw]
166
+
167
+ subnet_contents = scp_utils.SCPClient().get_subnets()
168
+
169
+ vpc_subnets = {}
170
+ for vpc in vpc_list:
171
+ subnet_list = [
172
+ item['subnetId']
173
+ for item in subnet_contents
174
+ if item['subnetState'] == 'ACTIVE' and item['vpcId'] == vpc
175
+ ]
176
+ if subnet_list:
177
+ vpc_subnets[vpc] = subnet_list
178
+
179
+ return vpc_subnets
180
+
181
+
182
+ def _filter_instances(cluster_name_on_cloud,
183
+ status_filter: Optional[List[str]]):
184
+ instances = scp_utils.SCPClient().get_instances()
185
+ filtered_instances = []
186
+ if status_filter is not None:
187
+ for instance in instances:
188
+ if instance[
189
+ 'virtualServerName'] == cluster_name_on_cloud and instance[
190
+ 'virtualServerState'] in status_filter:
191
+ filtered_instances.append(instance)
192
+ return filtered_instances
193
+ else:
194
+ return instances
195
+
196
+
197
+ def _get_head_instance_id(instances):
198
+ head_instance_id = None
199
+ if len(instances) > 0:
200
+ head_instance_id = instances[0]['virtualServerId']
201
+ return head_instance_id
202
+
203
+
204
+ def _create_security_group(zone_id, vpc):
205
+ sg_name = 'sky' + ''.join(random.choices(string.ascii_lowercase, k=8))
206
+
207
+ undo_func_stack = []
208
+ try:
209
+ response = scp_utils.SCPClient().create_security_group(
210
+ zone_id, vpc, sg_name)
211
+ sg_id = response['resourceId']
212
+ undo_func_stack.append(lambda: _delete_security_group(sg_id))
213
+ while True:
214
+ sg_contents = scp_utils.SCPClient().get_security_groups(
215
+ vpc, sg_name)
216
+ sg = [
217
+ sg['securityGroupState']
218
+ for sg in sg_contents
219
+ if sg['securityGroupId'] == sg_id
220
+ ]
221
+ if sg and sg[0] == 'ACTIVE':
222
+ break
223
+ time.sleep(5)
224
+
225
+ scp_utils.SCPClient().add_security_group_rule(sg_id, 'IN', None)
226
+ scp_utils.SCPClient().add_security_group_rule(sg_id, 'OUT', None)
227
+
228
+ return sg_id
229
+ except Exception as e: # pylint: disable=broad-except
230
+ _undo_functions(undo_func_stack)
231
+ logger.error(f'security group creation error: {e}')
232
+ return None
233
+
234
+
235
+ def _delete_security_group(sg_id):
236
+ scp_utils.SCPClient().delete_security_group(sg_id)
237
+ while True:
238
+ time.sleep(5)
239
+ sg_contents = scp_utils.SCPClient().get_security_groups()
240
+ sg = [
241
+ sg['securityGroupState']
242
+ for sg in sg_contents
243
+ if sg['securityGroupId'] == sg_id
244
+ ]
245
+ if not sg:
246
+ break
247
+
248
+
249
+ def _undo_functions(undo_func_list):
250
+ while undo_func_list:
251
+ func = undo_func_list.pop()
252
+ func()
253
+
254
+
255
+ def _create_instance(vpc_id, instance_config):
256
+ undo_func_stack = []
257
+ try:
258
+ instance = scp_utils.SCPClient().create_instance(instance_config)
259
+ instance_id = instance['resourceId']
260
+ while True:
261
+ time.sleep(10)
262
+ instance_info = scp_utils.SCPClient().get_instance_info(instance_id)
263
+ if instance_info['virtualServerState'] == 'RUNNING':
264
+ break
265
+ undo_func_stack.append(lambda: _delete_instance(instance_id))
266
+ firewall_id = _get_firewall_id(vpc_id)
267
+ internal_ip = instance_info['ip']
268
+ in_rule_id = _add_firewall_rule(firewall_id, internal_ip, 'IN', None)
269
+ undo_func_stack.append(
270
+ lambda: _delete_firewall_rule(firewall_id, in_rule_id))
271
+ out_rule_id = _add_firewall_rule(firewall_id, internal_ip, 'OUT', None)
272
+ undo_func_stack.append(
273
+ lambda: _delete_firewall_rule(firewall_id, out_rule_id))
274
+ return instance_id
275
+
276
+ except Exception as e: # pylint: disable=broad-except
277
+ _undo_functions(undo_func_stack)
278
+ logger.error(f'instance creation error: {e}')
279
+ return None
280
+
281
+
282
+ def _delete_instance(instance_id):
283
+ scp_utils.SCPClient().terminate_instance(instance_id)
284
+ while True:
285
+ time.sleep(10)
286
+ instances = scp_utils.SCPClient().get_instances()
287
+ inst = [
288
+ instance['virtualServerId']
289
+ for instance in instances
290
+ if instance['virtualServerId'] == instance_id
291
+ ]
292
+ if not inst:
293
+ break
294
+
295
+
296
+ def _get_firewall_id(vpc_id):
297
+ firewalls = scp_utils.SCPClient().get_firewalls()
298
+ firewall_id = [
299
+ firewall['firewallId']
300
+ for firewall in firewalls
301
+ if firewall['vpcId'] == vpc_id and
302
+ (firewall['firewallState'] in ['ACTIVE', 'DEPLOYING'])
303
+ ][0]
304
+ return firewall_id
305
+
306
+
307
+ def _add_firewall_rule(firewall_id, internal_ip, direction,
308
+ ports: Optional[List[str]]):
309
+ attempts = 0
310
+ max_attempts = 300
311
+
312
+ while attempts < max_attempts:
313
+ try:
314
+ rule_info = scp_utils.SCPClient().add_firewall_rule(
315
+ firewall_id, internal_ip, direction, ports)
316
+ rule_id = rule_info['resourceId']
317
+ while True:
318
+ rule_info = scp_utils.SCPClient().get_firewall_rule_info(
319
+ firewall_id, rule_id)
320
+ if rule_info['ruleState'] == 'ACTIVE':
321
+ return rule_id
322
+ except Exception as e: # pylint: disable=broad-except
323
+ attempts += 1
324
+ time.sleep(10)
325
+ logger.error(f'add firewall rule error: {e}')
326
+ continue
327
+ raise RuntimeError('add firewall rule error')
328
+
329
+
330
+ def _delete_firewall_rule(firewall_id, rule_ids):
331
+ if not isinstance(rule_ids, list):
332
+ rule_ids = [rule_ids]
333
+
334
+ attempts = 0
335
+ max_attempts = 300
336
+ while attempts < max_attempts:
337
+ try:
338
+ scp_utils.SCPClient().delete_firewall_rule(firewall_id, rule_ids)
339
+ if _remaining_firewall_rule(firewall_id, rule_ids) is False:
340
+ return
341
+ except Exception as e: # pylint: disable=broad-except
342
+ attempts += 1
343
+ time.sleep(5)
344
+ logger.error(f'delete firewall rule error: {e}')
345
+ continue
346
+ raise RuntimeError('delete firewall rule error')
347
+
348
+
349
+ def _remaining_firewall_rule(firewall_id, rule_ids):
350
+ firewall_rules = scp_utils.SCPClient().get_firewall_rules(firewall_id)
351
+ for rule_id in rule_ids:
352
+ if rule_id in firewall_rules:
353
+ return True
354
+ return False
355
+
356
+
357
+ def _get_firewall_rule_ids(instance_info, firewall_id,
358
+ ports: Optional[List[str]]):
359
+ rule_ids = []
360
+ if ports is not None:
361
+ destination_ip = instance_info['ip']
362
+ rules = scp_utils.SCPClient().get_firewall_rules(firewall_id)
363
+ for rule in rules:
364
+ port_list = ','.join(rule['tcpServices'])
365
+ port = ','.join(ports)
366
+ if destination_ip == rule['destinationIpAddresses'][
367
+ 0] and '0.0.0.0/0' == rule['sourceIpAddresses'][
368
+ 0] and port == port_list:
369
+ rule_ids.append(rule['ruleId'])
370
+ else:
371
+ ip = instance_info['ip']
372
+ rules = scp_utils.SCPClient().get_firewall_rules(firewall_id)
373
+ for rule in rules:
374
+ if ip == rule['destinationIpAddresses'][0] and '0.0.0.0/0' == rule[
375
+ 'sourceIpAddresses'][0]:
376
+ rule_ids.append(rule['ruleId'])
377
+ if ip == rule['sourceIpAddresses'][0] and '0.0.0.0/0' == rule[
378
+ 'destinationIpAddresses'][0]:
379
+ rule_ids.append(rule['ruleId'])
380
+ return rule_ids
381
+
382
+
383
+ def stop_instances(
384
+ cluster_name_on_cloud: str,
385
+ provider_config: Optional[Dict[str, Any]] = None,
386
+ worker_only: bool = False,
387
+ ) -> None:
388
+ del provider_config, worker_only
389
+ instances = scp_utils.SCPClient().get_instances()
390
+
391
+ for instance in instances:
392
+ if instance['virtualServerName'] == cluster_name_on_cloud:
393
+ instance_id = instance['virtualServerId']
394
+ scp_utils.SCPClient().stop_instance(instance_id)
395
+ while True:
396
+ instance_info = scp_utils.SCPClient().get_instance_info(
397
+ instance_id)
398
+ time.sleep(2)
399
+ if instance_info['virtualServerState'] == 'STOPPED':
400
+ break
401
+
402
+
403
+ def terminate_instances(
404
+ cluster_name_on_cloud: str,
405
+ provider_config: Optional[Dict[str, Any]] = None,
406
+ worker_only: bool = False,
407
+ ) -> None:
408
+ del provider_config, worker_only
409
+ instances = scp_utils.SCPClient().get_instances()
410
+
411
+ for instance in instances:
412
+ if instance['virtualServerName'] == cluster_name_on_cloud:
413
+ try:
414
+ instance_id = instance['virtualServerId']
415
+ instance_info = scp_utils.SCPClient().get_instance_info(
416
+ instance_id)
417
+ vpc_id = instance_info['vpcId']
418
+ sg_id = instance_info['securityGroupIds'][0]['securityGroupId']
419
+ firewall_id = _get_firewall_id(vpc_id)
420
+ rule_ids = _get_firewall_rule_ids(instance_info, firewall_id,
421
+ None)
422
+ _delete_firewall_rule(firewall_id, rule_ids)
423
+ _delete_instance(instance_id)
424
+ _delete_security_group(sg_id)
425
+ except Exception as e: # pylint: disable=broad-except
426
+ logger.error(f'terminate_instances error: {e}')
427
+
428
+
429
+ def query_instances(
430
+ cluster_name: str,
431
+ cluster_name_on_cloud: str,
432
+ provider_config: Optional[Dict[str, Any]] = None,
433
+ non_terminated_only: bool = True,
434
+ retry_if_missing: bool = False,
435
+ ) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
436
+ del cluster_name, retry_if_missing # unused
437
+ assert provider_config is not None, (cluster_name_on_cloud, provider_config)
438
+ instances = _filter_instances(cluster_name_on_cloud, None)
439
+
440
+ status_map = {
441
+ 'CREATING': status_lib.ClusterStatus.INIT,
442
+ 'EDITING': status_lib.ClusterStatus.INIT,
443
+ 'RUNNING': status_lib.ClusterStatus.UP,
444
+ 'STARTING': status_lib.ClusterStatus.INIT,
445
+ 'RESTARTING': status_lib.ClusterStatus.INIT,
446
+ 'STOPPING': status_lib.ClusterStatus.STOPPED,
447
+ 'STOPPED': status_lib.ClusterStatus.STOPPED,
448
+ 'TERMINATING': None,
449
+ 'TERMINATED': None,
450
+ }
451
+
452
+ statuses: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
453
+ Optional[str]]] = {}
454
+ for instance in instances:
455
+ status = status_map[instance['virtualServerState']]
456
+ if non_terminated_only and status is None:
457
+ continue
458
+ statuses[instance['virtualServerId']] = (status, None)
459
+ return statuses
460
+
461
+
462
+ def wait_instances(region: str, cluster_name_on_cloud: str, state: str) -> None:
463
+ del region, cluster_name_on_cloud, state
464
+
465
+
466
+ def get_cluster_info(
467
+ region: str,
468
+ cluster_name_on_cloud: str,
469
+ provider_config: Optional[Dict[str, Any]] = None) -> common.ClusterInfo:
470
+ del region
471
+
472
+ running_instances = _filter_instances(cluster_name_on_cloud, ['RUNNING'])
473
+ head_instance_id = _get_head_instance_id(running_instances)
474
+
475
+ instances = {}
476
+ for instance in running_instances:
477
+ instances[instance['virtualServerId']] = [
478
+ common.InstanceInfo(
479
+ instance_id=instance['virtualServerId'],
480
+ internal_ip=instance['ip'],
481
+ external_ip=scp_utils.SCPClient().get_external_ip(
482
+ instance['virtualServerId'], instance['ip']),
483
+ tags={})
484
+ ]
485
+
486
+ return common.ClusterInfo(
487
+ instances=instances,
488
+ head_instance_id=head_instance_id,
489
+ provider_name='scp',
490
+ provider_config=provider_config,
491
+ )
492
+
493
+
494
+ def open_ports(
495
+ cluster_name_on_cloud: str,
496
+ ports: List[str],
497
+ provider_config: Optional[Dict[str, Any]] = None,
498
+ ) -> None:
499
+
500
+ del provider_config
501
+ instances = scp_utils.SCPClient().get_instances()
502
+
503
+ for instance in instances:
504
+ if instance['virtualServerName'] == cluster_name_on_cloud:
505
+ instance_info = scp_utils.SCPClient().get_instance_info(
506
+ instance['virtualServerId'])
507
+ sg_id = instance_info['securityGroupIds'][0]['securityGroupId']
508
+ scp_utils.SCPClient().add_security_group_rule(sg_id, 'IN', ports)
509
+ vpc_id = instance_info['vpcId']
510
+ internal_ip = instance_info['ip']
511
+ firewall_id = _get_firewall_id(vpc_id)
512
+ _add_firewall_rule(firewall_id, internal_ip, 'IN', ports)
513
+
514
+
515
+ def cleanup_ports(
516
+ cluster_name_on_cloud: str,
517
+ ports: List[str],
518
+ provider_config: Optional[Dict[str, Any]] = None,
519
+ ) -> None:
520
+
521
+ del provider_config
522
+ instances = scp_utils.SCPClient().get_instances()
523
+
524
+ for instance in instances:
525
+ if instance['virtualServerName'] == cluster_name_on_cloud:
526
+ instance_info = scp_utils.SCPClient().get_instance_info(
527
+ instance['virtualServerId'])
528
+ vpc_id = instance_info['vpcId']
529
+ firewall_id = _get_firewall_id(vpc_id)
530
+ rule_ids = _get_firewall_rule_ids(instance_info, firewall_id, ports)
531
+ _delete_firewall_rule(firewall_id, rule_ids)
@@ -0,0 +1,11 @@
1
+ """Seeweb provisioner for SkyPilot."""
2
+
3
+ from sky.provision.seeweb.config import bootstrap_instances
4
+ from sky.provision.seeweb.instance import cleanup_ports
5
+ from sky.provision.seeweb.instance import get_cluster_info
6
+ from sky.provision.seeweb.instance import open_ports
7
+ from sky.provision.seeweb.instance import query_instances
8
+ from sky.provision.seeweb.instance import run_instances
9
+ from sky.provision.seeweb.instance import stop_instances
10
+ from sky.provision.seeweb.instance import terminate_instances
11
+ from sky.provision.seeweb.instance import wait_instances
@@ -0,0 +1,13 @@
1
+ """Configuration for Seeweb provisioning."""
2
+
3
+ from typing import Any, Dict
4
+
5
+
6
+ def bootstrap_instances(*args, **_kwargs) -> Dict[str, Any]:
7
+ """Bootstrap instances for Seeweb.
8
+
9
+ Seeweb doesn't require any special configuration bootstrapping,
10
+ so we just return the config as-is.
11
+ """
12
+ config = args[2]
13
+ return config