skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/skylet/job_lib.py CHANGED
@@ -23,20 +23,22 @@ from sky import global_user_state
23
23
  from sky import sky_logging
24
24
  from sky.adaptors import common as adaptors_common
25
25
  from sky.skylet import constants
26
+ from sky.skylet import runtime_utils
26
27
  from sky.utils import common_utils
27
- from sky.utils import log_utils
28
28
  from sky.utils import message_utils
29
29
  from sky.utils import subprocess_utils
30
30
  from sky.utils.db import db_utils
31
31
 
32
32
  if typing.TYPE_CHECKING:
33
33
  import psutil
34
+
35
+ from sky.schemas.generated import jobsv1_pb2
34
36
  else:
35
37
  psutil = adaptors_common.LazyImport('psutil')
38
+ jobsv1_pb2 = adaptors_common.LazyImport('sky.schemas.generated.jobsv1_pb2')
36
39
 
37
40
  logger = sky_logging.init_logger(__name__)
38
41
 
39
- _LINUX_NEW_LINE = '\n'
40
42
  _JOB_STATUS_LOCK = '~/.sky/locks/.job_{}.lock'
41
43
  # JOB_CMD_IDENTIFIER is used for identifying the process retrieved
42
44
  # with pid is the same driver process to guard against the case where
@@ -82,13 +84,9 @@ def create_table(cursor, conn):
82
84
  # is not critical and is likely to be enabled by other processes.
83
85
 
84
86
  # Pid column is used for keeping track of the driver process of a job. It
85
- # can be in three states:
86
- # -1: The job was submitted with SkyPilot older than #4318, where we use
87
- # ray job submit to submit the job, i.e. no pid is recorded. This is for
88
- # backward compatibility and should be removed after 0.10.0.
87
+ # can be in two states:
89
88
  # 0: The job driver process has never been started. When adding a job with
90
- # INIT state, the pid will be set to 0 (the default -1 value is just for
91
- # backward compatibility).
89
+ # INIT state, the pid will be set to 0.
92
90
  # >=0: The job has been started. The pid is the driver process's pid.
93
91
  # The driver can be actually running or finished.
94
92
  # TODO(SKY-1213): username is actually user hash, should rename.
@@ -144,7 +142,7 @@ def init_db(func):
144
142
 
145
143
  with _db_init_lock:
146
144
  if _DB is None:
147
- db_path = os.path.expanduser('~/.sky/jobs.db')
145
+ db_path = runtime_utils.get_runtime_dir_path('.sky/jobs.db')
148
146
  os.makedirs(pathlib.Path(db_path).parents[0], exist_ok=True)
149
147
  _DB = db_utils.SQLiteConn(db_path, create_table)
150
148
  return func(*args, **kwargs)
@@ -220,6 +218,45 @@ class JobStatus(enum.Enum):
220
218
  color = _JOB_STATUS_TO_COLOR[self]
221
219
  return f'{color}{self.value}{colorama.Style.RESET_ALL}'
222
220
 
221
+ @classmethod
222
+ def from_protobuf(
223
+ cls,
224
+ protobuf_value: 'jobsv1_pb2.JobStatus') -> Optional['JobStatus']:
225
+ """Convert protobuf JobStatus enum to Python enum value."""
226
+ protobuf_to_enum = {
227
+ jobsv1_pb2.JOB_STATUS_INIT: cls.INIT,
228
+ jobsv1_pb2.JOB_STATUS_PENDING: cls.PENDING,
229
+ jobsv1_pb2.JOB_STATUS_SETTING_UP: cls.SETTING_UP,
230
+ jobsv1_pb2.JOB_STATUS_RUNNING: cls.RUNNING,
231
+ jobsv1_pb2.JOB_STATUS_FAILED_DRIVER: cls.FAILED_DRIVER,
232
+ jobsv1_pb2.JOB_STATUS_SUCCEEDED: cls.SUCCEEDED,
233
+ jobsv1_pb2.JOB_STATUS_FAILED: cls.FAILED,
234
+ jobsv1_pb2.JOB_STATUS_FAILED_SETUP: cls.FAILED_SETUP,
235
+ jobsv1_pb2.JOB_STATUS_CANCELLED: cls.CANCELLED,
236
+ jobsv1_pb2.JOB_STATUS_UNSPECIFIED: None,
237
+ }
238
+ if protobuf_value not in protobuf_to_enum:
239
+ raise ValueError(
240
+ f'Unknown protobuf JobStatus value: {protobuf_value}')
241
+ return protobuf_to_enum[protobuf_value]
242
+
243
+ def to_protobuf(self) -> 'jobsv1_pb2.JobStatus':
244
+ """Convert this Python enum value to protobuf enum value."""
245
+ enum_to_protobuf = {
246
+ JobStatus.INIT: jobsv1_pb2.JOB_STATUS_INIT,
247
+ JobStatus.PENDING: jobsv1_pb2.JOB_STATUS_PENDING,
248
+ JobStatus.SETTING_UP: jobsv1_pb2.JOB_STATUS_SETTING_UP,
249
+ JobStatus.RUNNING: jobsv1_pb2.JOB_STATUS_RUNNING,
250
+ JobStatus.FAILED_DRIVER: jobsv1_pb2.JOB_STATUS_FAILED_DRIVER,
251
+ JobStatus.SUCCEEDED: jobsv1_pb2.JOB_STATUS_SUCCEEDED,
252
+ JobStatus.FAILED: jobsv1_pb2.JOB_STATUS_FAILED,
253
+ JobStatus.FAILED_SETUP: jobsv1_pb2.JOB_STATUS_FAILED_SETUP,
254
+ JobStatus.CANCELLED: jobsv1_pb2.JOB_STATUS_CANCELLED,
255
+ }
256
+ if self not in enum_to_protobuf:
257
+ raise ValueError(f'Unknown JobStatus value: {self}')
258
+ return enum_to_protobuf[self]
259
+
223
260
 
224
261
  # We have two steps for job submissions:
225
262
  # 1. Client reserve a job id from the job table by adding a INIT state job.
@@ -261,11 +298,7 @@ class JobScheduler:
261
298
  f'WHERE job_id={job_id!r}'))
262
299
  _DB.conn.commit()
263
300
  pid = subprocess_utils.launch_new_process_tree(run_cmd)
264
- # TODO(zhwu): Backward compatibility, remove this check after 0.10.0.
265
- # This is for the case where the job is submitted with SkyPilot older
266
- # than #4318, using ray job submit.
267
- if 'job submit' in run_cmd:
268
- pid = -1
301
+
269
302
  _DB.cursor.execute((f'UPDATE jobs SET pid={pid} '
270
303
  f'WHERE job_id={job_id!r}'))
271
304
  _DB.conn.commit()
@@ -475,6 +508,11 @@ def get_status(job_id: int) -> Optional[JobStatus]:
475
508
 
476
509
  @init_db
477
510
  def get_statuses_payload(job_ids: List[Optional[int]]) -> str:
511
+ return message_utils.encode_payload(get_statuses(job_ids))
512
+
513
+
514
+ @init_db
515
+ def get_statuses(job_ids: List[int]) -> Dict[int, Optional[str]]:
478
516
  assert _DB is not None
479
517
  # Per-job lock is not required here, since the staled job status will not
480
518
  # affect the caller.
@@ -482,10 +520,51 @@ def get_statuses_payload(job_ids: List[Optional[int]]) -> str:
482
520
  rows = _DB.cursor.execute(
483
521
  f'SELECT job_id, status FROM jobs WHERE job_id IN ({query_str})',
484
522
  job_ids)
485
- statuses = {job_id: None for job_id in job_ids}
523
+ statuses: Dict[int, Optional[str]] = {job_id: None for job_id in job_ids}
486
524
  for (job_id, status) in rows:
487
525
  statuses[job_id] = status
488
- return message_utils.encode_payload(statuses)
526
+ return statuses
527
+
528
+
529
+ @init_db
530
+ def get_jobs_info(user_hash: Optional[str] = None,
531
+ all_jobs: bool = False) -> List['jobsv1_pb2.JobInfo']:
532
+ """Get detailed job information.
533
+
534
+ Similar to dump_job_queue but returns structured protobuf objects instead
535
+ of encoded strings.
536
+
537
+ Args:
538
+ user_hash: The user hash to show jobs for. Show all the users if None.
539
+ all_jobs: Whether to show all jobs, not just the pending/running ones.
540
+ """
541
+ assert _DB is not None
542
+
543
+ status_list: Optional[List[JobStatus]] = [
544
+ JobStatus.SETTING_UP, JobStatus.PENDING, JobStatus.RUNNING
545
+ ]
546
+ if all_jobs:
547
+ status_list = None
548
+
549
+ jobs = _get_jobs(user_hash, status_list=status_list)
550
+ jobs_info = []
551
+ for job in jobs:
552
+ jobs_info.append(
553
+ jobsv1_pb2.JobInfo(job_id=job['job_id'],
554
+ job_name=job['job_name'],
555
+ username=job['username'],
556
+ submitted_at=job['submitted_at'],
557
+ status=job['status'].to_protobuf(),
558
+ run_timestamp=job['run_timestamp'],
559
+ start_at=job['start_at'],
560
+ end_at=job['end_at'],
561
+ resources=job['resources'],
562
+ pid=job['pid'],
563
+ log_path=os.path.join(
564
+ constants.SKY_LOGS_DIRECTORY,
565
+ job['run_timestamp']),
566
+ metadata=json.dumps(job['metadata'])))
567
+ return jobs_info
489
568
 
490
569
 
491
570
  def load_statuses_payload(
@@ -524,16 +603,27 @@ def get_job_submitted_or_ended_timestamp_payload(job_id: int,
524
603
  PENDING state.
525
604
 
526
605
  The normal job duration will use `start_at` instead of `submitted_at` (in
527
- `format_job_queue()`), because the job may stay in PENDING if the cluster is
528
- busy.
606
+ `table_utils.format_job_queue()`), because the job may stay in PENDING if
607
+ the cluster is busy.
608
+ """
609
+ return message_utils.encode_payload(
610
+ get_job_submitted_or_ended_timestamp(job_id, get_ended_time))
611
+
612
+
613
+ @init_db
614
+ def get_job_submitted_or_ended_timestamp(
615
+ job_id: int, get_ended_time: bool) -> Optional[float]:
616
+ """Get the job submitted timestamp.
617
+
618
+ Returns the raw timestamp or None if job doesn't exist.
529
619
  """
530
620
  assert _DB is not None
531
621
  field = 'end_at' if get_ended_time else 'submitted_at'
532
622
  rows = _DB.cursor.execute(f'SELECT {field} FROM jobs WHERE job_id=(?)',
533
623
  (job_id,))
534
624
  for (timestamp,) in rows:
535
- return message_utils.encode_payload(timestamp)
536
- return message_utils.encode_payload(None)
625
+ return timestamp
626
+ return None
537
627
 
538
628
 
539
629
  def get_ray_port():
@@ -542,7 +632,8 @@ def get_ray_port():
542
632
  If the port file does not exist, the cluster was launched before #1790,
543
633
  return the default port.
544
634
  """
545
- port_path = os.path.expanduser(constants.SKY_REMOTE_RAY_PORT_FILE)
635
+ port_path = runtime_utils.get_runtime_dir_path(
636
+ constants.SKY_REMOTE_RAY_PORT_FILE)
546
637
  if not os.path.exists(port_path):
547
638
  return 6379
548
639
  port = json.load(open(port_path, 'r', encoding='utf-8'))['ray_port']
@@ -555,7 +646,8 @@ def get_job_submission_port():
555
646
  If the port file does not exist, the cluster was launched before #1790,
556
647
  return the default port.
557
648
  """
558
- port_path = os.path.expanduser(constants.SKY_REMOTE_RAY_PORT_FILE)
649
+ port_path = runtime_utils.get_runtime_dir_path(
650
+ constants.SKY_REMOTE_RAY_PORT_FILE)
559
651
  if not os.path.exists(port_path):
560
652
  return 8265
561
653
  port = json.load(open(port_path, 'r',
@@ -673,7 +765,7 @@ def update_job_status(job_ids: List[int],
673
765
  statuses = []
674
766
  for job_id in job_ids:
675
767
  # Per-job status lock is required because between the job status
676
- # query and the job status update, the job status in the databse
768
+ # query and the job status update, the job status in the database
677
769
  # can be modified by the generated ray program.
678
770
  with filelock.FileLock(_get_lock_path(job_id)):
679
771
  status = None
@@ -724,12 +816,6 @@ def update_job_status(job_ids: List[int],
724
816
  'the job state is not in terminal states, setting '
725
817
  'it to FAILED_DRIVER')
726
818
  status = JobStatus.FAILED_DRIVER
727
- elif job_pid < 0:
728
- # TODO(zhwu): Backward compatibility, remove after 0.10.0.
729
- # We set the job status to PENDING instead of actually
730
- # checking ray job status and let the status in job table
731
- # take effect in the later max.
732
- status = JobStatus.PENDING
733
819
 
734
820
  pending_job = _get_pending_job(job_id)
735
821
  if pending_job is not None:
@@ -842,35 +928,6 @@ def is_cluster_idle() -> bool:
842
928
  assert False, 'Should not reach here'
843
929
 
844
930
 
845
- def format_job_queue(jobs: List[Dict[str, Any]]):
846
- """Format the job queue for display.
847
-
848
- Usage:
849
- jobs = get_job_queue()
850
- print(format_job_queue(jobs))
851
- """
852
- job_table = log_utils.create_table([
853
- 'ID', 'NAME', 'USER', 'SUBMITTED', 'STARTED', 'DURATION', 'RESOURCES',
854
- 'STATUS', 'LOG', 'GIT COMMIT'
855
- ])
856
- for job in jobs:
857
- job_table.add_row([
858
- job['job_id'],
859
- job['job_name'],
860
- job['username'],
861
- log_utils.readable_time_duration(job['submitted_at']),
862
- log_utils.readable_time_duration(job['start_at']),
863
- log_utils.readable_time_duration(job['start_at'],
864
- job['end_at'],
865
- absolute=True),
866
- job['resources'],
867
- job['status'].colored_str(),
868
- job['log_path'],
869
- job.get('metadata', {}).get('git_commit', '-'),
870
- ])
871
- return job_table
872
-
873
-
874
931
  def dump_job_queue(user_hash: Optional[str], all_jobs: bool) -> str:
875
932
  """Get the job queue in encoded json format.
876
933
 
@@ -907,27 +964,6 @@ def load_job_queue(payload: str) -> List[Dict[str, Any]]:
907
964
  return jobs
908
965
 
909
966
 
910
- # TODO(zhwu): Backward compatibility for jobs submitted before #4318, remove
911
- # after 0.10.0.
912
- def _create_ray_job_submission_client():
913
- """Import the ray job submission client."""
914
- try:
915
- import ray # pylint: disable=import-outside-toplevel
916
- except ImportError:
917
- logger.error('Failed to import ray')
918
- raise
919
- try:
920
- # pylint: disable=import-outside-toplevel
921
- from ray import job_submission
922
- except ImportError:
923
- logger.error(
924
- f'Failed to import job_submission with ray=={ray.__version__}')
925
- raise
926
- port = get_job_submission_port()
927
- return job_submission.JobSubmissionClient(
928
- address=f'http://127.0.0.1:{port}')
929
-
930
-
931
967
  def _make_ray_job_id(sky_job_id: int) -> str:
932
968
  return f'{sky_job_id}-{getpass.getuser()}'
933
969
 
@@ -947,6 +983,13 @@ def cancel_jobs_encoded_results(jobs: Optional[List[int]],
947
983
  Encoded job IDs that are actually cancelled. Caller should use
948
984
  message_utils.decode_payload() to parse.
949
985
  """
986
+ return message_utils.encode_payload(cancel_jobs(jobs, cancel_all,
987
+ user_hash))
988
+
989
+
990
+ def cancel_jobs(jobs: Optional[List[int]],
991
+ cancel_all: bool = False,
992
+ user_hash: Optional[str] = None) -> List[int]:
950
993
  job_records = []
951
994
  all_status = [JobStatus.PENDING, JobStatus.SETTING_UP, JobStatus.RUNNING]
952
995
  if jobs is None and not cancel_all:
@@ -989,18 +1032,6 @@ def cancel_jobs_encoded_results(jobs: Optional[List[int]],
989
1032
  # We don't have to start a daemon to forcefully kill the process
990
1033
  # as our job driver process will clean up the underlying
991
1034
  # child processes.
992
- elif job['pid'] < 0:
993
- try:
994
- # TODO(zhwu): Backward compatibility, remove after 0.10.0.
995
- # The job was submitted with ray job submit before #4318.
996
- job_client = _create_ray_job_submission_client()
997
- job_client.stop_job(_make_ray_job_id(job['job_id']))
998
- except RuntimeError as e:
999
- # If the request to the job server fails, we should not
1000
- # set the job to CANCELLED.
1001
- if 'does not exist' not in str(e):
1002
- logger.warning(str(e))
1003
- continue
1004
1035
  # Get the job status again to avoid race condition.
1005
1036
  job_status = get_status_no_lock(job['job_id'])
1006
1037
  if job_status in [
@@ -1010,7 +1041,7 @@ def cancel_jobs_encoded_results(jobs: Optional[List[int]],
1010
1041
  cancelled_ids.append(job['job_id'])
1011
1042
 
1012
1043
  scheduler.schedule_step()
1013
- return message_utils.encode_payload(cancelled_ids)
1044
+ return cancelled_ids
1014
1045
 
1015
1046
 
1016
1047
  @init_db
@@ -1030,6 +1061,17 @@ def get_run_timestamp(job_id: Optional[int]) -> Optional[str]:
1030
1061
 
1031
1062
  @init_db
1032
1063
  def get_log_dir_for_jobs(job_ids: List[Optional[str]]) -> str:
1064
+ """Returns the relative paths to the log files for jobs with globbing,
1065
+ encoded."""
1066
+ job_to_dir = get_job_log_dirs(job_ids)
1067
+ job_to_dir_str: Dict[str, str] = {}
1068
+ for job_id, log_dir in job_to_dir.items():
1069
+ job_to_dir_str[str(job_id)] = log_dir
1070
+ return message_utils.encode_payload(job_to_dir_str)
1071
+
1072
+
1073
+ @init_db
1074
+ def get_job_log_dirs(job_ids: List[int]) -> Dict[int, str]:
1033
1075
  """Returns the relative paths to the log files for jobs with globbing."""
1034
1076
  assert _DB is not None
1035
1077
  query_str = ' OR '.join(['job_id GLOB (?)'] * len(job_ids))
@@ -1038,16 +1080,16 @@ def get_log_dir_for_jobs(job_ids: List[Optional[str]]) -> str:
1038
1080
  SELECT * FROM jobs
1039
1081
  WHERE {query_str}""", job_ids)
1040
1082
  rows = _DB.cursor.fetchall()
1041
- job_to_dir = {}
1083
+ job_to_dir: Dict[int, str] = {}
1042
1084
  for row in rows:
1043
1085
  job_id = row[JobInfoLoc.JOB_ID.value]
1044
1086
  if row[JobInfoLoc.LOG_PATH.value]:
1045
- job_to_dir[str(job_id)] = row[JobInfoLoc.LOG_PATH.value]
1087
+ job_to_dir[job_id] = row[JobInfoLoc.LOG_PATH.value]
1046
1088
  else:
1047
1089
  run_timestamp = row[JobInfoLoc.RUN_TIMESTAMP.value]
1048
- job_to_dir[str(job_id)] = os.path.join(constants.SKY_LOGS_DIRECTORY,
1049
- run_timestamp)
1050
- return message_utils.encode_payload(job_to_dir)
1090
+ job_to_dir[job_id] = os.path.join(constants.SKY_LOGS_DIRECTORY,
1091
+ run_timestamp)
1092
+ return job_to_dir
1051
1093
 
1052
1094
 
1053
1095
  class JobLibCodeGen:
@@ -1176,15 +1218,10 @@ class JobLibCodeGen:
1176
1218
  f' log_dir = None if run_timestamp is None else os.path.join({constants.SKY_LOGS_DIRECTORY!r}, run_timestamp)'
1177
1219
  ),
1178
1220
  # Add a newline to leave the if indent block above.
1179
- f'\ntail_log_kwargs = {{"job_id": job_id, "log_dir": log_dir, "managed_job_id": {managed_job_id!r}, "follow": {follow}}}',
1180
- f'{_LINUX_NEW_LINE}if getattr(constants, "SKYLET_LIB_VERSION", 1) > 1: tail_log_kwargs["tail"] = {tail}',
1181
- f'{_LINUX_NEW_LINE}log_lib.tail_logs(**tail_log_kwargs)',
1221
+ f'\nlog_lib.tail_logs(job_id=job_id, log_dir=log_dir, managed_job_id={managed_job_id!r}, follow={follow}, tail={tail})',
1182
1222
  # After tailing, check the job status and exit with appropriate code
1183
1223
  'job_status = job_lib.get_status(job_id)',
1184
- # Backward compatibility for returning exit code: Skylet versions 2
1185
- # and older did not have JobExitCode, so we use 0 for those versions
1186
- # TODO: Remove this special handling after 0.10.0.
1187
- 'exit_code = exceptions.JobExitCode.from_job_status(job_status) if getattr(constants, "SKYLET_LIB_VERSION", 1) > 2 else 0',
1224
+ 'exit_code = exceptions.JobExitCode.from_job_status(job_status)',
1188
1225
  # Fix for dashboard: When follow=False and job is still running (NOT_FINISHED=101),
1189
1226
  # exit with success (0) since fetching current logs is a successful operation.
1190
1227
  # This prevents shell wrappers from printing "command terminated with exit code 101".
@@ -1236,4 +1273,5 @@ class JobLibCodeGen:
1236
1273
  def _build(cls, code: List[str]) -> str:
1237
1274
  code = cls._PREFIX + code
1238
1275
  code = ';'.join(code)
1239
- return f'{constants.SKY_PYTHON_CMD} -u -c {shlex.quote(code)}'
1276
+ return (f'{constants.ACTIVATE_SKY_REMOTE_PYTHON_ENV}; '
1277
+ f'{constants.SKY_PYTHON_CMD} -u -c {shlex.quote(code)}')