skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
File without changes
File without changes
@@ -0,0 +1,225 @@
1
+ """Responses for the API server."""
2
+
3
+ import enum
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import pydantic
7
+
8
+ from sky import data
9
+ from sky import models
10
+ from sky.jobs import state as job_state
11
+ from sky.server import common
12
+ from sky.skylet import job_lib
13
+ from sky.utils import status_lib
14
+
15
+
16
+ class ResponseBaseModel(pydantic.BaseModel):
17
+ """A pydantic model that acts like a dict.
18
+
19
+ Supports the following syntax:
20
+ class SampleResponse(DictLikePayload):
21
+ field: str
22
+
23
+ response = SampleResponse(field='value')
24
+ print(response['field']) # prints 'value'
25
+ response['field'] = 'value2'
26
+ print(response['field']) # prints 'value2'
27
+ print('field' in response) # prints True
28
+
29
+ This model exists for backwards compatibility with the
30
+ old SDK that used to return a dict.
31
+
32
+ The backward compatibility may be removed
33
+ in the future.
34
+ """
35
+ # Ignore extra fields in the request body, which is useful for backward
36
+ # compatibility. The difference with `allow` is that `ignore` will not
37
+ # include the unknown fields when dump the model, i.e., we can add new
38
+ # fields to the request body without breaking the existing old API server
39
+ # where the handler function does not accept the new field in function
40
+ # signature.
41
+ model_config = pydantic.ConfigDict(extra='ignore')
42
+
43
+ # backward compatibility with dict
44
+ # TODO(syang): remove this in v0.13.0
45
+ def __getitem__(self, key):
46
+ try:
47
+ return getattr(self, key)
48
+ except AttributeError as e:
49
+ raise KeyError(key) from e
50
+
51
+ def __setitem__(self, key, value):
52
+ setattr(self, key, value)
53
+
54
+ def get(self, key, default=None):
55
+ return getattr(self, key, default)
56
+
57
+ def __contains__(self, key):
58
+ return hasattr(self, key)
59
+
60
+ def keys(self):
61
+ return self.model_dump().keys()
62
+
63
+ def values(self):
64
+ return self.model_dump().values()
65
+
66
+ def items(self):
67
+ return self.model_dump().items()
68
+
69
+ def __repr__(self):
70
+ return self.__dict__.__repr__()
71
+
72
+
73
+ class APIHealthResponse(ResponseBaseModel):
74
+ """Response for the API health endpoint."""
75
+ status: common.ApiServerStatus
76
+ api_version: str = ''
77
+ version: str = ''
78
+ version_on_disk: str = ''
79
+ commit: str = ''
80
+ # Whether basic auth on api server is enabled
81
+ basic_auth_enabled: bool = False
82
+ user: Optional[models.User] = None
83
+ # Whether service account token is enabled
84
+ service_account_token_enabled: bool = False
85
+ # Whether basic auth on ingress is enabled
86
+ ingress_basic_auth_enabled: bool = False
87
+
88
+
89
+ class StatusResponse(ResponseBaseModel):
90
+ """Response for the status endpoint."""
91
+ name: str
92
+ launched_at: int
93
+ # pydantic cannot generate the pydantic-core schema for
94
+ # backends.ResourceHandle, so we use Any here.
95
+ # This is an internally facing field anyway, so it's less
96
+ # of a problem that it's not typed.
97
+ handle: Optional[Any] = None
98
+ last_use: Optional[str] = None
99
+ status: status_lib.ClusterStatus
100
+ autostop: int
101
+ to_down: bool
102
+ owner: Optional[List[str]] = None
103
+ # metadata is a JSON, so we use Any here.
104
+ metadata: Optional[Dict[str, Any]] = None
105
+ cluster_hash: str
106
+ cluster_ever_up: bool
107
+ status_updated_at: Optional[int] = None
108
+ user_hash: str
109
+ user_name: str
110
+ config_hash: Optional[str] = None
111
+ workspace: str
112
+ last_creation_yaml: Optional[str] = None
113
+ last_creation_command: Optional[str] = None
114
+ is_managed: bool
115
+ last_event: Optional[str] = None
116
+ resources_str: Optional[str] = None
117
+ resources_str_full: Optional[str] = None
118
+ # credentials is a JSON, so we use Any here.
119
+ credentials: Optional[Dict[str, Any]] = None
120
+ nodes: int
121
+ cloud: Optional[str] = None
122
+ region: Optional[str] = None
123
+ cpus: Optional[str] = None
124
+ memory: Optional[str] = None
125
+ accelerators: Optional[str] = None
126
+ cluster_name_on_cloud: Optional[str] = None
127
+
128
+
129
+ class ClusterJobRecord(ResponseBaseModel):
130
+ """Response for the cluster job queue endpoint."""
131
+ job_id: int
132
+ job_name: str
133
+ username: str
134
+ user_hash: str
135
+ submitted_at: float
136
+ # None if the job has not started yet.
137
+ start_at: Optional[float] = None
138
+ # None if the job has not ended yet.
139
+ end_at: Optional[float] = None
140
+ resources: str
141
+ status: job_lib.JobStatus
142
+ log_path: str
143
+ metadata: Dict[str, Any] = {}
144
+
145
+
146
+ class UploadStatus(enum.Enum):
147
+ """Status of the upload."""
148
+ UPLOADING = 'uploading'
149
+ COMPLETED = 'completed'
150
+
151
+
152
+ class StorageRecord(ResponseBaseModel):
153
+ """Response for the storage list endpoint."""
154
+ name: str
155
+ launched_at: int
156
+ store: List[data.StoreType]
157
+ last_use: str
158
+ status: status_lib.StorageStatus
159
+
160
+
161
+ # TODO (syang) figure out which fields are always present
162
+ # and therefore can be non-optional.
163
+ class ManagedJobRecord(ResponseBaseModel):
164
+ """A single managed job record."""
165
+ # The job_id in the spot table
166
+ task_job_id: Optional[int] = pydantic.Field(None, alias='_job_id')
167
+ job_id: Optional[int] = None
168
+ task_id: Optional[int] = None
169
+ job_name: Optional[str] = None
170
+ task_name: Optional[str] = None
171
+ job_duration: Optional[float] = None
172
+ workspace: Optional[str] = None
173
+ status: Optional[job_state.ManagedJobStatus] = None
174
+ schedule_state: Optional[str] = None
175
+ resources: Optional[str] = None
176
+ cluster_resources: Optional[str] = None
177
+ cluster_resources_full: Optional[str] = None
178
+ cloud: Optional[str] = None
179
+ region: Optional[str] = None
180
+ zone: Optional[str] = None
181
+ infra: Optional[str] = None
182
+ recovery_count: Optional[int] = None
183
+ details: Optional[str] = None
184
+ failure_reason: Optional[str] = None
185
+ user_name: Optional[str] = None
186
+ user_hash: Optional[str] = None
187
+ submitted_at: Optional[float] = None
188
+ start_at: Optional[float] = None
189
+ end_at: Optional[float] = None
190
+ user_yaml: Optional[str] = None
191
+ entrypoint: Optional[str] = None
192
+ metadata: Optional[Dict[str, Any]] = None
193
+ controller_pid: Optional[int] = None
194
+ dag_yaml_path: Optional[str] = None
195
+ env_file_path: Optional[str] = None
196
+ last_recovered_at: Optional[float] = None
197
+ run_timestamp: Optional[str] = None
198
+ priority: Optional[int] = None
199
+ original_user_yaml_path: Optional[str] = None
200
+ pool: Optional[str] = None
201
+ pool_hash: Optional[str] = None
202
+ current_cluster_name: Optional[str] = None
203
+ job_id_on_pool_cluster: Optional[int] = None
204
+ accelerators: Optional[Dict[str, int]] = None
205
+
206
+
207
+ class VolumeRecord(ResponseBaseModel):
208
+ """A single volume record."""
209
+ name: str
210
+ type: str
211
+ launched_at: int
212
+ cloud: str
213
+ region: Optional[str] = None
214
+ zone: Optional[str] = None
215
+ size: Optional[str] = None
216
+ config: Dict[str, Any]
217
+ name_on_cloud: str
218
+ user_hash: str
219
+ user_name: str
220
+ workspace: str
221
+ last_attached_at: Optional[int] = None
222
+ last_use: Optional[str] = None
223
+ status: Optional[str] = None
224
+ usedby_pods: List[str]
225
+ usedby_clusters: List[str]
sky/schemas/db/README ADDED
@@ -0,0 +1,4 @@
1
+ Migrations for sqlalchemy databases. Currently includes:
2
+ global_user_state
3
+ spot_jobs (managed jobs state)
4
+ skypilot_config
sky/schemas/db/env.py ADDED
@@ -0,0 +1,90 @@
1
+ """Alembic environment configuration for state database migrations."""
2
+ from logging.config import fileConfig
3
+
4
+ from alembic import context
5
+ from sqlalchemy import engine_from_config
6
+ from sqlalchemy import pool
7
+
8
+ # this is the Alembic Config object, which provides
9
+ # access to the values within the .ini file in use.
10
+ config = context.config
11
+
12
+ # NOTE: We intentionally disable Alembic's logging configuration to prevent
13
+ # it from overriding SkyPilot's logging setup. Alembic's fileConfig() call
14
+ # globally reconfigures Python's logging system, which can suppress SkyPilot's
15
+ # output messages that tests expect to see.
16
+ #
17
+ # Original code (now disabled):
18
+ if config.config_file_name is not None:
19
+ fileConfig(config.config_file_name, disable_existing_loggers=False)
20
+
21
+ # add your model's MetaData object here
22
+ # for 'autogenerate' support
23
+ # from myapp import mymodel
24
+ # target_metadata = mymodel.Base.metadata
25
+ target_metadata = None
26
+
27
+ # other values from the config, defined by the needs of env.py,
28
+ # can be acquired:
29
+ # my_important_option = config.get_main_option("my_important_option")
30
+ # ... etc.
31
+
32
+
33
+ def run_migrations_offline() -> None:
34
+ """Run migrations in 'offline' mode.
35
+
36
+ This configures the context with just a URL
37
+ and not an Engine, though an Engine is acceptable
38
+ here as well. By skipping the Engine creation
39
+ we don't even need a DBAPI to be available.
40
+
41
+ Calls to context.execute() here emit the given string to the
42
+ script output.
43
+
44
+ """
45
+ url = config.get_main_option('sqlalchemy.url')
46
+ version_table = config.get_section_option(config.config_ini_section,
47
+ 'version_table',
48
+ 'alembic_version')
49
+ context.configure(
50
+ url=url,
51
+ target_metadata=target_metadata,
52
+ literal_binds=True,
53
+ dialect_opts={'paramstyle': 'named'},
54
+ version_table=version_table,
55
+ )
56
+
57
+ with context.begin_transaction():
58
+ context.run_migrations()
59
+
60
+
61
+ def run_migrations_online() -> None:
62
+ """Run migrations in 'online' mode.
63
+
64
+ In this scenario we need to create an Engine
65
+ and associate a connection with the context.
66
+
67
+ """
68
+ connectable = engine_from_config(
69
+ config.get_section(config.config_ini_section, {}),
70
+ prefix='sqlalchemy.',
71
+ poolclass=pool.NullPool,
72
+ )
73
+ version_table = config.get_section_option(config.config_ini_section,
74
+ 'version_table',
75
+ 'alembic_version')
76
+ with connectable.connect() as connection:
77
+ context.configure(
78
+ connection=connection,
79
+ target_metadata=target_metadata,
80
+ version_table=version_table,
81
+ )
82
+
83
+ with context.begin_transaction():
84
+ context.run_migrations()
85
+
86
+
87
+ if context.is_offline_mode():
88
+ run_migrations_offline()
89
+ else:
90
+ run_migrations_online()
@@ -0,0 +1,124 @@
1
+ """Initial schema for state database with backwards compatibility columns
2
+
3
+ Revision ID: 001
4
+ Revises:
5
+ Create Date: 2024-01-01 12:00:00.000000
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+
12
+ from sky.global_user_state import Base
13
+ from sky.utils.db import db_utils
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision = '001'
17
+ down_revision = None
18
+ branch_labels = None
19
+ depends_on = None
20
+
21
+
22
+ def upgrade():
23
+ with op.get_context().autocommit_block():
24
+ # Create any missing tables with current schema first
25
+ db_utils.add_all_tables_to_db_sqlalchemy(Base.metadata, op.get_bind())
26
+
27
+ # Add all missing columns to clusters table
28
+ # This allows each column addition to fail independently without rolling
29
+ # back the entire migration, which is needed for backwards compatibility
30
+
31
+ # Add all missing columns to clusters table
32
+ db_utils.add_column_to_table_alembic('clusters',
33
+ 'autostop',
34
+ sa.Integer(),
35
+ server_default='-1')
36
+ db_utils.add_column_to_table_alembic('clusters',
37
+ 'metadata',
38
+ sa.Text(),
39
+ server_default='{}')
40
+ db_utils.add_column_to_table_alembic('clusters',
41
+ 'to_down',
42
+ sa.Integer(),
43
+ server_default='0')
44
+ db_utils.add_column_to_table_alembic('clusters',
45
+ 'owner',
46
+ sa.Text(),
47
+ server_default=None)
48
+ db_utils.add_column_to_table_alembic('clusters',
49
+ 'cluster_hash',
50
+ sa.Text(),
51
+ server_default=None)
52
+ db_utils.add_column_to_table_alembic('clusters',
53
+ 'launched_nodes',
54
+ sa.Integer(),
55
+ server_default='0')
56
+ db_utils.add_column_to_table_alembic('clusters',
57
+ 'disk_tier',
58
+ sa.Text(),
59
+ server_default=None)
60
+ db_utils.add_column_to_table_alembic('clusters',
61
+ 'config_hash',
62
+ sa.Text(),
63
+ server_default=None)
64
+ db_utils.add_column_to_table_alembic('clusters',
65
+ 'user_hash',
66
+ sa.Text(),
67
+ server_default=None)
68
+ db_utils.add_column_to_table_alembic('clusters',
69
+ 'workspace',
70
+ sa.Text(),
71
+ server_default='default')
72
+ db_utils.add_column_to_table_alembic('clusters',
73
+ 'last_creation_yaml',
74
+ sa.Text(),
75
+ server_default=None)
76
+ db_utils.add_column_to_table_alembic('clusters',
77
+ 'last_creation_command',
78
+ sa.Text(),
79
+ server_default=None)
80
+ db_utils.add_column_to_table_alembic('clusters',
81
+ 'config_hash_locked',
82
+ sa.Boolean(),
83
+ server_default='FALSE')
84
+ db_utils.add_column_to_table_alembic('clusters',
85
+ 'handle_locked',
86
+ sa.Boolean(),
87
+ server_default='FALSE')
88
+ db_utils.add_column_to_table_alembic('clusters',
89
+ 'num_failures',
90
+ sa.Integer(),
91
+ server_default='0')
92
+ db_utils.add_column_to_table_alembic('clusters',
93
+ 'configs',
94
+ sa.Text(),
95
+ server_default='[]')
96
+
97
+ # Add all missing columns to cluster_history table
98
+ db_utils.add_column_to_table_alembic('cluster_history',
99
+ 'user_hash',
100
+ sa.Text(),
101
+ server_default=None)
102
+ db_utils.add_column_to_table_alembic('cluster_history',
103
+ 'last_creation_yaml',
104
+ sa.Text(),
105
+ server_default=None)
106
+ db_utils.add_column_to_table_alembic('cluster_history',
107
+ 'last_creation_command',
108
+ sa.Text(),
109
+ server_default=None)
110
+
111
+ # Add all missing columns to users table
112
+ db_utils.add_column_to_table_alembic('users',
113
+ 'password',
114
+ sa.Text(),
115
+ server_default=None)
116
+ db_utils.add_column_to_table_alembic('users',
117
+ 'created_at',
118
+ sa.Integer(),
119
+ server_default=None)
120
+
121
+
122
+ def downgrade():
123
+ # Drop all tables
124
+ Base.metadata.drop_all(bind=op.get_bind())
@@ -0,0 +1,35 @@
1
+ """add workspace column to cluster_history table
2
+
3
+ Revision ID: 002
4
+ Revises: 001
5
+ Create Date: 2025-08-06
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '002'
18
+ down_revision: Union[str, Sequence[str], None] = '001'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('cluster_history',
27
+ 'workspace',
28
+ sa.Text(),
29
+ server_default=None)
30
+ pass
31
+
32
+
33
+ def downgrade() -> None:
34
+ """Downgrade schema."""
35
+ pass
@@ -0,0 +1,61 @@
1
+ """fix initial revision
2
+
3
+ Revision ID: 003
4
+ Revises: 002
5
+ Create Date: 2025-08-07
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '003'
18
+ down_revision: Union[str, Sequence[str], None] = '002'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ with op.get_context().autocommit_block():
26
+ # add missing columns to clusters table
27
+ db_utils.add_column_to_table_alembic('clusters',
28
+ 'storage_mounts_metadata',
29
+ sa.LargeBinary(),
30
+ server_default=None)
31
+ # Set the value to replace existing entries to 1 so that all the
32
+ # existing clusters before #2977 are considered as ever up, i.e:
33
+ # existing cluster's default (null) -> 1;
34
+ # new cluster's default -> 0;
35
+ # This is conservative for the existing clusters: even if some INIT
36
+ # clusters were never really UP, setting it to 1 means they won't be
37
+ # auto-deleted during any failover.
38
+ db_utils.add_column_to_table_alembic(
39
+ 'clusters',
40
+ 'cluster_ever_up',
41
+ sa.Integer(),
42
+ server_default='0',
43
+ value_to_replace_existing_entries=1)
44
+ db_utils.add_column_to_table_alembic('clusters',
45
+ 'status_updated_at',
46
+ sa.Integer(),
47
+ server_default=None)
48
+
49
+ # remove mistakenly added columns
50
+ db_utils.drop_column_from_table_alembic('clusters', 'launched_nodes')
51
+ db_utils.drop_column_from_table_alembic('clusters', 'disk_tier')
52
+ db_utils.drop_column_from_table_alembic('clusters',
53
+ 'config_hash_locked')
54
+ db_utils.drop_column_from_table_alembic('clusters', 'handle_locked')
55
+ db_utils.drop_column_from_table_alembic('clusters', 'num_failures')
56
+ db_utils.drop_column_from_table_alembic('clusters', 'configs')
57
+
58
+
59
+ def downgrade() -> None:
60
+ """Downgrade schema."""
61
+ pass
@@ -0,0 +1,34 @@
1
+ """Columns for whether the cluster is managed.
2
+
3
+ Revision ID: 004
4
+ Revises: 003
5
+ Create Date: 2025-08-07
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.π
17
+ revision: str = '004'
18
+ down_revision: Union[str, Sequence[str], None] = '003'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add columns for whether the cluster is managed."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('clusters',
27
+ 'is_managed',
28
+ sa.Integer(),
29
+ server_default='0')
30
+
31
+
32
+ def downgrade():
33
+ """Remove columns for whether the cluster is managed."""
34
+ pass
@@ -0,0 +1,32 @@
1
+ """Columns for whether the cluster is managed.
2
+
3
+ Revision ID: 005
4
+ Revises: 004
5
+ Create Date: 2025-08-08
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+
13
+ from sky.global_user_state import Base
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.π
17
+ revision: str = '005'
18
+ down_revision: Union[str, Sequence[str], None] = '004'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add new table for cluster events."""
25
+ with op.get_context().autocommit_block():
26
+ # Add new table for cluster events.
27
+ db_utils.add_table_to_db_sqlalchemy(Base.metadata, op.get_bind(),
28
+ 'cluster_events')
29
+
30
+
31
+ def downgrade():
32
+ pass
@@ -0,0 +1,41 @@
1
+ """Add provision_log_path to clusters and cluster_history.
2
+
3
+ Revision ID: 006
4
+ Revises: 005
5
+ Create Date: 2025-08-12
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '006'
18
+ down_revision: Union[str, Sequence[str], None] = '005'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add provision_log_path columns."""
25
+ with op.get_context().autocommit_block():
26
+ # clusters.provision_log_path
27
+ db_utils.add_column_to_table_alembic('clusters',
28
+ 'provision_log_path',
29
+ sa.Text(),
30
+ server_default=None)
31
+
32
+ # cluster_history.provision_log_path
33
+ db_utils.add_column_to_table_alembic('cluster_history',
34
+ 'provision_log_path',
35
+ sa.Text(),
36
+ server_default=None)
37
+
38
+
39
+ def downgrade():
40
+ """No-op for backward compatibility."""
41
+ pass
@@ -0,0 +1,34 @@
1
+ """Add request_id to cluster_events.
2
+
3
+ Revision ID: 007
4
+ Revises: 006
5
+ Create Date: 2025-08-28
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '007'
18
+ down_revision: Union[str, Sequence[str], None] = '006'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add request_id column to cluster_events."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('cluster_events',
27
+ 'request_id',
28
+ sa.Text(),
29
+ server_default=None)
30
+
31
+
32
+ def downgrade():
33
+ """No-op for backward compatibility."""
34
+ pass