skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/clouds/seeweb.py ADDED
@@ -0,0 +1,477 @@
1
+ """Seeweb Cloud
2
+
3
+ History:
4
+ @ Aug 6, 2025: Initial version of the integration.
5
+ - Francesco Massa
6
+ - Marco Cristofanilli (marco.cATseeweb.it)
7
+
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import typing
13
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
14
+
15
+ from sky import catalog
16
+ from sky import clouds
17
+ from sky import sky_logging
18
+ from sky.adaptors import seeweb as seeweb_adaptor
19
+ from sky.provision import seeweb as seeweb_provision
20
+ from sky.utils import registry
21
+ from sky.utils import resources_utils
22
+ from sky.utils import ux_utils
23
+
24
+ if typing.TYPE_CHECKING:
25
+ from sky import resources as resources_lib
26
+ from sky.utils import status_lib
27
+ from sky.utils import volume as volume_lib
28
+
29
+ # ---------- key file path -----------------
30
+ _SEEWEB_KEY_FILE = '~/.seeweb_cloud/seeweb_keys'
31
+
32
+ logger = sky_logging.init_logger(__name__)
33
+ # (content: ini-like)
34
+ # api_key = <TOKEN>
35
+
36
+
37
+ @registry.CLOUD_REGISTRY.register
38
+ class Seeweb(clouds.Cloud):
39
+ """Seeweb GPU Cloud."""
40
+
41
+ _REPR = 'Seeweb'
42
+ # Define unsupported features to provide clear error messages
43
+ # This helps users understand what Seeweb can and cannot do
44
+ _CLOUD_UNSUPPORTED_FEATURES = {
45
+ clouds.CloudImplementationFeatures.MULTI_NODE:
46
+ ('Multi-node not supported. '
47
+ 'Seeweb does not support multi-node clusters.'),
48
+ clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
49
+ ('Custom disk tiers not supported. '
50
+ 'Seeweb does not support custom disk tiers.'),
51
+ clouds.CloudImplementationFeatures.STORAGE_MOUNTING:
52
+ ('Storage mounting not supported. '
53
+ 'Seeweb does not support storage mounting.'),
54
+ clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
55
+ ('High availability controllers not supported. '
56
+ 'Seeweb does not support high availability controllers.'),
57
+ clouds.CloudImplementationFeatures.SPOT_INSTANCE:
58
+ ('Spot instances not supported. '
59
+ 'Seeweb does not support spot instances.'),
60
+ clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
61
+ ('Disk cloning not supported. '
62
+ 'Seeweb does not support disk cloning.'),
63
+ clouds.CloudImplementationFeatures.IMAGE_ID:
64
+ ('Custom image IDs not supported. '
65
+ 'Seeweb does not support custom image IDs.'),
66
+ clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
67
+ ('Custom network tiers not supported. '
68
+ 'Seeweb does not support custom network tiers.'),
69
+ clouds.CloudImplementationFeatures.HOST_CONTROLLERS:
70
+ ('Host controllers not supported. '
71
+ 'Seeweb does not support host controllers.'),
72
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
73
+ ('Custom multi-network not supported. '
74
+ 'Seeweb does not support custom multi-network.'),
75
+ }
76
+ _MAX_CLUSTER_NAME_LEN_LIMIT = 120
77
+ _regions: List[clouds.Region] = []
78
+
79
+ PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
80
+ STATUS_VERSION = clouds.StatusVersion.SKYPILOT
81
+
82
+ # Enable port support with updatable version
83
+ OPEN_PORTS_VERSION = clouds.OpenPortsVersion.UPDATABLE
84
+
85
+ @classmethod
86
+ def _unsupported_features_for_resources(
87
+ cls,
88
+ resources: 'resources_lib.Resources',
89
+ region: Optional[str] = None,
90
+ ) -> Dict[clouds.CloudImplementationFeatures, str]:
91
+ return cls._CLOUD_UNSUPPORTED_FEATURES
92
+
93
+ @classmethod
94
+ def max_cluster_name_length(cls) -> Optional[int]:
95
+ return cls._MAX_CLUSTER_NAME_LEN_LIMIT
96
+
97
+ @classmethod
98
+ def regions(cls) -> List['clouds.Region']:
99
+ """Return available regions for Seeweb."""
100
+ # Get regions from the catalog system
101
+ # This reads from the CSV files generated by fetch_seeweb.py
102
+ regions = catalog.regions(clouds='seeweb')
103
+ return regions
104
+
105
+ @classmethod
106
+ def regions_with_offering(
107
+ cls,
108
+ instance_type: str,
109
+ accelerators: Optional[Dict[str, int]],
110
+ use_spot: bool,
111
+ region: Optional[str],
112
+ zone: Optional[str],
113
+ resources: Optional['resources_lib.Resources'] = None,
114
+ ) -> List[clouds.Region]:
115
+ assert zone is None, 'Seeweb does not support zones.'
116
+ del zone
117
+ if use_spot:
118
+ return []
119
+
120
+ # Get regions from catalog based on instance type
121
+ # This will read the CSV and return only regions
122
+ # where the instance type exists
123
+ regions = catalog.get_region_zones_for_instance_type(
124
+ instance_type, use_spot, 'seeweb')
125
+
126
+ if region is not None:
127
+ regions = [r for r in regions if r.name == region]
128
+
129
+ return regions
130
+
131
+ @classmethod
132
+ def zones_provision_loop(
133
+ cls,
134
+ *,
135
+ region: str,
136
+ num_nodes: int,
137
+ instance_type: str,
138
+ accelerators: Optional[Dict[str, int]] = None,
139
+ use_spot: bool = False,
140
+ ) -> Iterator[None]:
141
+ del num_nodes
142
+ regions = cls.regions_with_offering(instance_type,
143
+ accelerators,
144
+ use_spot,
145
+ region=region,
146
+ zone=None)
147
+ for r in regions:
148
+ assert r.zones is None, r
149
+ yield r.zones
150
+
151
+ @classmethod
152
+ def get_zone_shell_cmd(cls) -> Optional[str]:
153
+ """Seeweb doesn't support zones."""
154
+ return None
155
+
156
+ def instance_type_to_hourly_cost(
157
+ self,
158
+ instance_type: str,
159
+ use_spot: bool,
160
+ region: Optional[str],
161
+ zone: Optional[str],
162
+ ) -> float:
163
+ cost = catalog.get_hourly_cost(instance_type,
164
+ use_spot=use_spot,
165
+ region=region,
166
+ zone=zone,
167
+ clouds='seeweb')
168
+ return cost
169
+
170
+ def accelerators_to_hourly_cost(
171
+ self,
172
+ accelerators: Dict[str, int],
173
+ use_spot: bool,
174
+ region: Optional[str],
175
+ zone: Optional[str],
176
+ ) -> float:
177
+
178
+ return 0.0
179
+
180
+ def get_egress_cost(self, num_gigabytes: float):
181
+ return 0.0
182
+
183
+ def make_deploy_resources_variables(
184
+ self,
185
+ resources: 'resources_lib.Resources',
186
+ cluster_name: resources_utils.ClusterName,
187
+ region: 'clouds.Region',
188
+ zones: Optional[List['clouds.Zone']],
189
+ num_nodes: int,
190
+ dryrun: bool = False,
191
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
192
+ ) -> Dict[str, Any]:
193
+ """Create deployment variables for Seeweb."""
194
+
195
+ # Note: Spot instances and multi-node are automatically handled by
196
+ # the framework via _CLOUD_UNSUPPORTED_FEATURES
197
+
198
+ resources = resources.assert_launchable()
199
+
200
+ acc_dict = self.get_accelerators_from_instance_type(
201
+ resources.instance_type)
202
+ docker_image = resources.extract_docker_image()
203
+ docker_run_options: List[str] = []
204
+ if docker_image is not None:
205
+ if acc_dict:
206
+ docker_run_options.append('--gpus all')
207
+ logger.info(
208
+ 'Launching Seeweb cluster with docker image %s. Ensure the '
209
+ 'image is Debian-based and allows passwordless sudo.',
210
+ docker_image)
211
+
212
+ # Standard custom_resources string for Ray
213
+ custom_resources = resources_utils.make_ray_custom_resources_str(
214
+ acc_dict)
215
+
216
+ # Seeweb-specific GPU configuration for the provisioner
217
+ # This tells the provisioner how to configure GPU resources
218
+ seeweb_gpu_config = None
219
+ if resources.accelerators:
220
+ # If the instance has accelerators, prepare GPU configuration
221
+ accelerator_name = list(resources.accelerators.keys())[0]
222
+ accelerator_count = resources.accelerators[accelerator_name]
223
+ seeweb_gpu_config = {
224
+ 'gpu': accelerator_count,
225
+ 'gpu_label': accelerator_name,
226
+ }
227
+
228
+ # Seeweb uses pre-configured images based on instance type
229
+ # Determine image based on whether the instance type name contains "GPU"
230
+ if resources.instance_type and 'GPU' in resources.instance_type.upper():
231
+ # GPU instance - use image with NVIDIA drivers
232
+ if resources.instance_type in ['ECS1GPU10', 'ECS2GPU10']:
233
+ # H200 GPU instance - use UEFI image with NVIDIA drivers
234
+ image_id = 'ubuntu-2204-uefi-nvidia-driver'
235
+ else:
236
+ # Other GPU instance - use standard image with NVIDIA drivers
237
+ image_id = 'ubuntu-2204-nvidia-driver'
238
+ else:
239
+ # CPU-only instance - use standard Ubuntu image
240
+ image_id = 'ubuntu-2204'
241
+
242
+ result = {
243
+ 'instance_type': resources.instance_type,
244
+ 'region': region.name,
245
+ 'cluster_name': cluster_name,
246
+ 'custom_resources': custom_resources,
247
+ 'seeweb_gpu_config': seeweb_gpu_config,
248
+ 'image_id': image_id,
249
+ }
250
+ if docker_run_options:
251
+ result['docker_run_options'] = docker_run_options
252
+ return result
253
+
254
+ @classmethod
255
+ def get_vcpus_mem_from_instance_type(
256
+ cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]:
257
+ result = catalog.get_vcpus_mem_from_instance_type(instance_type,
258
+ clouds='seeweb')
259
+ return result
260
+
261
+ @classmethod
262
+ def get_accelerators_from_instance_type(
263
+ cls,
264
+ instance_type: str,
265
+ ) -> Optional[Dict[str, Union[int, float]]]:
266
+ result = catalog.get_accelerators_from_instance_type(instance_type,
267
+ clouds='seeweb')
268
+ return result
269
+
270
+ @classmethod
271
+ def get_default_instance_type(
272
+ cls,
273
+ cpus: Optional[str] = None,
274
+ memory: Optional[str] = None,
275
+ disk_tier: Optional[resources_utils.DiskTier] = None,
276
+ region: Optional[str] = None,
277
+ zone: Optional[str] = None,
278
+ ) -> Optional[str]:
279
+ result = catalog.get_default_instance_type(cpus=cpus,
280
+ memory=memory,
281
+ disk_tier=disk_tier,
282
+ clouds='seeweb')
283
+ return result
284
+
285
+ def _get_feasible_launchable_resources(
286
+ self, resources: 'resources_lib.Resources'
287
+ ) -> 'resources_utils.FeasibleResources':
288
+ """Get feasible resources for Seeweb."""
289
+ if resources.use_spot:
290
+ return resources_utils.FeasibleResources(
291
+ [], [], 'Spot instances not supported on Seeweb')
292
+
293
+ if resources.accelerators and len(resources.accelerators) > 1:
294
+ return resources_utils.FeasibleResources(
295
+ [], [], 'Multiple accelerator types not supported on Seeweb')
296
+
297
+ # If no instance_type is specified, try to get a default one
298
+ if not resources.instance_type:
299
+ # If accelerators are specified, try to find instance
300
+ # type forthat accelerator
301
+ if resources.accelerators:
302
+ # Get the first accelerator
303
+ # (we already checked there's only one)
304
+ acc_name, acc_count = list(resources.accelerators.items())[0]
305
+
306
+ # Use catalog to find instance type for this accelerator
307
+ # This leverages the catalog system to find suitable instances
308
+ (
309
+ instance_types,
310
+ fuzzy_candidates,
311
+ ) = catalog.get_instance_type_for_accelerator(
312
+ acc_name=acc_name,
313
+ acc_count=acc_count,
314
+ cpus=resources.cpus,
315
+ memory=resources.memory,
316
+ use_spot=resources.use_spot,
317
+ region=resources.region,
318
+ zone=resources.zone,
319
+ clouds='seeweb',
320
+ )
321
+
322
+ if instance_types and len(instance_types) > 0:
323
+ # Use the first (cheapest) instance type
324
+ selected_instance_type = instance_types[0]
325
+ resources = resources.copy(
326
+ instance_type=selected_instance_type)
327
+ else:
328
+ return resources_utils.FeasibleResources(
329
+ [],
330
+ fuzzy_candidates,
331
+ f'No instance type found for accelerator'
332
+ f'{acc_name}:{acc_count} on Seeweb',
333
+ )
334
+ else:
335
+ # No accelerators specified, use default instance type
336
+ default_instance_type = self.get_default_instance_type(
337
+ cpus=resources.cpus,
338
+ memory=resources.memory,
339
+ region=resources.region,
340
+ zone=resources.zone,
341
+ )
342
+
343
+ if default_instance_type:
344
+ # Create new resources with the default instance type
345
+ resources = resources.copy(
346
+ instance_type=default_instance_type)
347
+ else:
348
+ return resources_utils.FeasibleResources(
349
+ [],
350
+ [],
351
+ f'No suitable instance type found for'
352
+ f'cpus={resources.cpus}, memory={resources.memory}',
353
+ )
354
+
355
+ # Check if instance type exists
356
+ if resources.instance_type:
357
+ exists = catalog.instance_type_exists(resources.instance_type,
358
+ clouds='seeweb')
359
+ if not exists:
360
+ return resources_utils.FeasibleResources(
361
+ [],
362
+ [],
363
+ f'Instance type {resources.instance_type}'
364
+ f' not available on Seeweb',
365
+ )
366
+
367
+ # Set the cloud if not already set
368
+ if not resources.cloud:
369
+ resources = resources.copy(cloud=self)
370
+
371
+ # Return the resources as feasible
372
+ return resources_utils.FeasibleResources([resources], [], None)
373
+
374
+ @classmethod
375
+ def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
376
+ """Check Seeweb compute credentials."""
377
+ try:
378
+ result = seeweb_adaptor.check_compute_credentials()
379
+ return result, None
380
+ except Exception as e: # pylint: disable=broad-except
381
+ return False, str(e)
382
+
383
+ @classmethod
384
+ def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
385
+ """Check Seeweb storage credentials."""
386
+ try:
387
+ result = seeweb_adaptor.check_storage_credentials()
388
+ return result, None
389
+ except Exception as e: # pylint: disable=broad-except
390
+ return False, str(e)
391
+
392
+ @classmethod
393
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
394
+ # Seeweb doesn't have user identity concept
395
+ return None
396
+
397
+ @classmethod
398
+ def query_status(
399
+ cls,
400
+ name: str,
401
+ tag_filters: Dict[str, str],
402
+ region: Optional[str],
403
+ zone: Optional[str],
404
+ **kwargs,
405
+ ) -> List['status_lib.ClusterStatus']:
406
+ """Query the status of Seeweb cluster instances."""
407
+ cluster_name_on_cloud = name
408
+
409
+ result = seeweb_provision.instance.query_instances(
410
+ cluster_name=name,
411
+ cluster_name_on_cloud=cluster_name_on_cloud,
412
+ provider_config={},
413
+ non_terminated_only=True)
414
+ # Convert Dict[str, Tuple[Optional[ClusterStatus],
415
+ # Optional[str]]] to List[ClusterStatus]
416
+ return [status for status, _ in result.values() if status is not None]
417
+
418
+ def get_credential_file_mounts(self) -> Dict[str, str]:
419
+ """Returns the credential files to mount."""
420
+ # Mount the Seeweb API key file to the remote instance
421
+ # This allows the provisioner to authenticate with Seeweb API
422
+ result = {
423
+ _SEEWEB_KEY_FILE: _SEEWEB_KEY_FILE,
424
+ }
425
+ return result
426
+
427
+ def instance_type_exists(self, instance_type: str) -> bool:
428
+ """Returns whether the instance type exists for Seeweb."""
429
+ result = catalog.instance_type_exists(instance_type, clouds='seeweb')
430
+ return result
431
+
432
+ @classmethod
433
+ def get_image_size(cls, image_id: str, region: Optional[str]) -> float:
434
+ """Seeweb doesn't support custom images."""
435
+ del image_id, region
436
+ with ux_utils.print_exception_no_traceback():
437
+ raise ValueError(f'Custom images are not supported on {cls._REPR}. '
438
+ 'Seeweb clusters use pre-configured images only.')
439
+
440
+ # Image-related methods (not supported)
441
+ @classmethod
442
+ def create_image_from_cluster(
443
+ cls,
444
+ cluster_name: resources_utils.ClusterName,
445
+ region: Optional[str],
446
+ zone: Optional[str],
447
+ ) -> str:
448
+ del cluster_name, region, zone # unused
449
+ with ux_utils.print_exception_no_traceback():
450
+ raise ValueError(
451
+ f'Creating images from clusters is not supported on'
452
+ f' {cls._REPR}. Seeweb does not support custom'
453
+ f' image creation.')
454
+
455
+ @classmethod
456
+ def maybe_move_image(
457
+ cls,
458
+ image_id: str,
459
+ source_region: str,
460
+ target_region: str,
461
+ source_zone: Optional[str],
462
+ target_zone: Optional[str],
463
+ ) -> str:
464
+ del image_id, source_region, target_region, source_zone, target_zone
465
+ with ux_utils.print_exception_no_traceback():
466
+ raise ValueError(
467
+ f'Moving images between regions is not supported on'
468
+ f' {cls._REPR}. '
469
+ 'Seeweb does not support custom images.')
470
+
471
+ @classmethod
472
+ def delete_image(cls, image_id: str, region: Optional[str]) -> None:
473
+ del image_id, region
474
+ with ux_utils.print_exception_no_traceback():
475
+ raise ValueError(
476
+ f'Deleting images is not supported on {cls._REPR}. '
477
+ 'Seeweb does not support custom image management.')