skypilot-nightly 1.0.0.dev20250605__py3-none-any.whl → 1.0.0.dev20250607__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +3 -1
  3. sky/check.py +14 -19
  4. sky/cli.py +0 -2
  5. sky/client/cli.py +0 -2
  6. sky/client/sdk.py +2 -1
  7. sky/clouds/cloud.py +4 -0
  8. sky/clouds/kubernetes.py +2 -2
  9. sky/clouds/nebius.py +44 -4
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/1qG0HTmVilJPxQdBk0fX5/_buildManifest.js +1 -0
  12. sky/dashboard/out/_next/static/chunks/236-619ed0248fb6fdd9.js +6 -0
  13. sky/dashboard/out/_next/static/chunks/{470-9e7a479cc8303baa.js → 470-ad1e0db3afcbd9c9.js} +1 -1
  14. sky/dashboard/out/_next/static/chunks/969-2c584e28e6b4b106.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/973-6d78a0814682d771.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-b919a73aecdfa78f.js +6 -0
  17. sky/dashboard/out/_next/static/chunks/pages/{clusters-5549a350f97d7ef3.js → clusters-4f6b9dd9abcb33ad.js} +1 -1
  18. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-b68ddeed712d45b5.js → [context]-3a18d0eeb5119fe4.js} +1 -1
  19. sky/dashboard/out/_next/static/chunks/pages/{infra-13b117a831702196.js → infra-a1a6abeeb58c1051.js} +1 -1
  20. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1354e28c81eeb686.js +16 -0
  21. sky/dashboard/out/_next/static/chunks/pages/{jobs-a76b2700eca236f7.js → jobs-23bfc8bf373423db.js} +1 -1
  22. sky/dashboard/out/_next/static/chunks/pages/{users-262aab38b9baaf3a.js → users-5800045bd04e69c2.js} +1 -1
  23. sky/dashboard/out/_next/static/chunks/pages/workspace/{new-c7516f2b4c3727c0.js → new-e1f9c0c3ff7ac4bd.js} +1 -1
  24. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-7799de9e691e35d8.js → [name]-686590e0ee4b2412.js} +1 -1
  25. sky/dashboard/out/_next/static/chunks/pages/{workspaces-384ea5fa0cea8f28.js → workspaces-76b07aa5da91b0df.js} +1 -1
  26. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  27. sky/dashboard/out/clusters/[cluster].html +1 -1
  28. sky/dashboard/out/clusters.html +1 -1
  29. sky/dashboard/out/config.html +1 -1
  30. sky/dashboard/out/index.html +1 -1
  31. sky/dashboard/out/infra/[context].html +1 -1
  32. sky/dashboard/out/infra.html +1 -1
  33. sky/dashboard/out/jobs/[job].html +1 -1
  34. sky/dashboard/out/jobs.html +1 -1
  35. sky/dashboard/out/users.html +1 -1
  36. sky/dashboard/out/workspace/new.html +1 -1
  37. sky/dashboard/out/workspaces/[name].html +1 -1
  38. sky/dashboard/out/workspaces.html +1 -1
  39. sky/jobs/scheduler.py +9 -4
  40. sky/jobs/server/core.py +14 -1
  41. sky/jobs/state.py +18 -15
  42. sky/provision/kubernetes/utils.py +12 -5
  43. sky/provision/nebius/constants.py +47 -0
  44. sky/provision/nebius/instance.py +2 -1
  45. sky/provision/nebius/utils.py +28 -7
  46. sky/serve/load_balancer.py +56 -45
  47. sky/skylet/constants.py +1 -0
  48. sky/skypilot_config.py +4 -1
  49. sky/templates/jobs-controller.yaml.j2 +3 -1
  50. sky/templates/nebius-ray.yml.j2 +6 -0
  51. sky/utils/kubernetes/deploy_remote_cluster.py +5 -3
  52. sky/utils/resources_utils.py +3 -1
  53. {skypilot_nightly-1.0.0.dev20250605.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/METADATA +1 -1
  54. {skypilot_nightly-1.0.0.dev20250605.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/RECORD +63 -62
  55. sky/dashboard/out/_next/static/chunks/236-4c0dc6f63ccc6319.js +0 -6
  56. sky/dashboard/out/_next/static/chunks/969-c7abda31c10440ac.js +0 -1
  57. sky/dashboard/out/_next/static/chunks/973-1a09cac61cfcc1e1.js +0 -1
  58. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-beabbcd7606c1a23.js +0 -6
  59. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-86c47edc500f15f9.js +0 -11
  60. sky/dashboard/out/_next/static/qjhIe-yC6nHcLKBqpzO1M/_buildManifest.js +0 -1
  61. /sky/dashboard/out/_next/static/{qjhIe-yC6nHcLKBqpzO1M → 1qG0HTmVilJPxQdBk0fX5}/_ssgManifest.js +0 -0
  62. /sky/dashboard/out/_next/static/chunks/{37-beedd583fea84cc8.js → 37-600191c5804dcae2.js} +0 -0
  63. /sky/dashboard/out/_next/static/chunks/{682-6647f0417d5662f0.js → 682-b60cfdacc15202e8.js} +0 -0
  64. /sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-65d04d5d77cbb6b6.js → [job]-18aed9b56247d074.js} +0 -0
  65. /sky/dashboard/out/_next/static/chunks/pages/{config-1a1eeb949dab8897.js → config-fe375a56342cf609.js} +0 -0
  66. {skypilot_nightly-1.0.0.dev20250605.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/WHEEL +0 -0
  67. {skypilot_nightly-1.0.0.dev20250605.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/entry_points.txt +0 -0
  68. {skypilot_nightly-1.0.0.dev20250605.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/licenses/LICENSE +0 -0
  69. {skypilot_nightly-1.0.0.dev20250605.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>Workspaces | SkyPilot Dashboard</title><link rel="preload" href="/dashboard/skypilot.svg" as="image" fetchpriority="high"/><meta name="next-head-count" content="4"/><link rel="preload" href="/dashboard/_next/static/css/667d941a2888ce6e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/667d941a2888ce6e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-65d465f948974c0d.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-cb81dc4d27f4d009.js" defer=""></script><script src="/dashboard/_next/static/chunks/614-635a84e87800f99e.js" defer=""></script><script src="/dashboard/_next/static/chunks/798-c0525dc3f21e488d.js" defer=""></script><script src="/dashboard/_next/static/chunks/121-865d2bf8a3b84c6a.js" defer=""></script><script src="/dashboard/_next/static/chunks/470-9e7a479cc8303baa.js" defer=""></script><script src="/dashboard/_next/static/chunks/293-351268365226d251.js" defer=""></script><script src="/dashboard/_next/static/chunks/969-c7abda31c10440ac.js" defer=""></script><script src="/dashboard/_next/static/chunks/856-3a32da4b84176f6d.js" defer=""></script><script src="/dashboard/_next/static/chunks/973-1a09cac61cfcc1e1.js" defer=""></script><script src="/dashboard/_next/static/chunks/236-4c0dc6f63ccc6319.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-384ea5fa0cea8f28.js" defer=""></script><script src="/dashboard/_next/static/qjhIe-yC6nHcLKBqpzO1M/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/qjhIe-yC6nHcLKBqpzO1M/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="min-h-screen bg-gray-50"><div class="fixed top-0 left-0 right-0 z-50 shadow-sm"><div class="fixed top-0 left-0 right-0 bg-white z-30 h-14 px-4 border-b border-gray-200 shadow-sm"><div class="flex items-center h-full"><div class="flex items-center space-x-4 mr-6"><a class="flex items-center px-1 pt-1 h-full" href="/dashboard"><div class="h-20 w-20 flex items-center justify-center"><img alt="SkyPilot Logo" fetchpriority="high" width="80" height="80" decoding="async" data-nimg="1" class="w-full h-full object-contain" style="color:transparent" src="/dashboard/skypilot.svg"/></div></a></div><div class="flex items-center space-x-2 md:space-x-4 mr-6"><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/clusters"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="20" height="8" x="2" y="2" rx="2" ry="2"></rect><rect width="20" height="8" x="2" y="14" rx="2" ry="2"></rect><line x1="6" x2="6.01" y1="6" y2="6"></line><line x1="6" x2="6.01" y1="18" y2="18"></line></svg><span>Clusters</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/jobs"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 20V4a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><rect width="20" height="14" x="2" y="6" rx="2"></rect></svg><span>Jobs</span></a><div class="border-l border-gray-200 h-6 mx-1"></div><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/infra"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4" width="16" height="16" rx="2" ry="2"></rect><rect x="9" y="9" width="6" height="6"></rect><line x1="9" y1="1" x2="9" y2="4"></line><line x1="15" y1="1" x2="15" y2="4"></line><line x1="9" y1="20" x2="9" y2="23"></line><line x1="15" y1="20" x2="15" y2="23"></line><line x1="20" y1="9" x2="23" y2="9"></line><line x1="20" y1="14" x2="23" y2="14"></line><line x1="1" y1="9" x2="4" y2="9"></line><line x1="1" y1="14" x2="4" y2="14"></line></svg><span>Infra</span></a><a class="inline-flex items-center border-b-2 border-transparent text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/workspaces"><svg class="w-4 h-4" stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g><path fill="none" d="M0 0h24v24H0z"></path><path d="M3 18.5V5a3 3 0 0 1 3-3h14a1 1 0 0 1 1 1v18a1 1 0 0 1-1 1H6.5A3.5 3.5 0 0 1 3 18.5zM19 20v-3H6.5a1.5 1.5 0 0 0 0 3H19zM10 4H6a1 1 0 0 0-1 1v10.337A3.486 3.486 0 0 1 6.5 15H19V4h-2v8l-3.5-2-3.5 2V4z"></path></g></svg><span>Workspaces</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/users"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-users w-4 h-4"><path d="M16 21v-2a4 4 0 0 0-4-4H6a4 4 0 0 0-4 4v2"></path><circle cx="9" cy="7" r="4"></circle><path d="M22 21v-2a4 4 0 0 0-3-3.87"></path><path d="M16 3.13a4 4 0 0 1 0 7.75"></path></svg><span>Users</span></a></div><div class="flex items-center space-x-1 ml-auto"><a href="https://skypilot.readthedocs.io/en/latest/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center px-2 py-1 text-gray-600 hover:text-blue-600 transition-colors duration-150 cursor-pointer" title="Docs" tabindex="0"><span class="mr-1">Docs</span><svg class="w-3.5 h-3.5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path><polyline points="15 3 21 3 21 9"></polyline><line x1="10" y1="14" x2="21" y2="3"></line></svg></a><a href="https://github.com/skypilot-org/skypilot" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="GitHub" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"></path></svg></a><a href="https://slack.skypilot.co/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Slack" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path transform="scale(0.85) translate(1.8, 1.8)" d="M5.042 15.165a2.528 2.528 0 0 1-2.52 2.523A2.528 2.528 0 0 1 0 15.165a2.527 2.527 0 0 1 2.522-2.52h2.52v2.52zM6.313 15.165a2.527 2.527 0 0 1 2.521-2.52 2.527 2.527 0 0 1 2.521 2.52v6.313A2.528 2.528 0 0 1 8.834 24a2.528 2.528 0 0 1-2.521-2.522v-6.313zM8.834 5.042a2.528 2.528 0 0 1-2.521-2.52A2.528 2.528 0 0 1 8.834 0a2.528 2.528 0 0 1 2.521 2.522v2.52H8.834zM8.834 6.313a2.528 2.528 0 0 1 2.521 2.521 2.528 2.528 0 0 1-2.521 2.521H2.522A2.528 2.528 0 0 1 0 8.834a2.528 2.528 0 0 1 2.522-2.521h6.312zM18.956 8.834a2.528 2.528 0 0 1 2.522-2.521A2.528 2.528 0 0 1 24 8.834a2.528 2.528 0 0 1-2.522 2.521h-2.522V8.834zM17.688 8.834a2.528 2.528 0 0 1-2.523 2.521 2.527 2.527 0 0 1-2.52-2.521V2.522A2.527 2.527 0 0 1 15.165 0a2.528 2.528 0 0 1 2.523 2.522v6.312zM15.165 18.956a2.528 2.528 0 0 1 2.523 2.522A2.528 2.528 0 0 1 15.165 24a2.527 2.527 0 0 1-2.52-2.522v-2.522h2.52zM15.165 17.688a2.527 2.527 0 0 1-2.52-2.523 2.526 2.526 0 0 1 2.52-2.52h6.313A2.527 2.527 0 0 1 24 15.165a2.528 2.528 0 0 1-2.522 2.523h-6.313z"></path></svg></a><a href="https://github.com/skypilot-org/skypilot/issues/new" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Leave Feedback" tabindex="0"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-message-square w-5 h-5"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg></a><div class="border-l border-gray-200 h-6"></div><a class="inline-flex items-center justify-center p-2 rounded-full transition-colors duration-150 cursor-pointer text-gray-600 hover:bg-gray-100" title="Configuration" tabindex="0" href="/dashboard/config"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-settings w-5 h-5"><path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path><circle cx="12" cy="12" r="3"></circle></svg></a></div></div></div></div><div class="transition-all duration-200 ease-in-out min-h-screen" style="padding-top:56px"><main class="p-6"><div class="flex justify-center items-center h-64"><style data-emotion="css z01bqi animation-61bdi0">.css-z01bqi{display:inline-block;color:#1976d2;-webkit-animation:animation-61bdi0 1.4s linear infinite;animation:animation-61bdi0 1.4s linear infinite;}@-webkit-keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}@keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}</style><span class="MuiCircularProgress-root MuiCircularProgress-indeterminate MuiCircularProgress-colorPrimary css-z01bqi" style="width:40px;height:40px" role="progressbar"><style data-emotion="css 13o7eu2">.css-13o7eu2{display:block;}</style><svg class="MuiCircularProgress-svg css-13o7eu2" viewBox="22 22 44 44"><style data-emotion="css 14891ef animation-1p2h4ri">.css-14891ef{stroke:currentColor;stroke-dasharray:80px,200px;stroke-dashoffset:0;-webkit-animation:animation-1p2h4ri 1.4s ease-in-out infinite;animation:animation-1p2h4ri 1.4s ease-in-out infinite;}@-webkit-keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}@keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}</style><circle class="MuiCircularProgress-circle MuiCircularProgress-circleIndeterminate css-14891ef" cx="44" cy="44" r="20.2" fill="none" stroke-width="3.6"></circle></svg></span><span class="ml-2 text-gray-500">Loading workspaces...</span></div></main></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"qjhIe-yC6nHcLKBqpzO1M","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>Workspaces | SkyPilot Dashboard</title><link rel="preload" href="/dashboard/skypilot.svg" as="image" fetchpriority="high"/><meta name="next-head-count" content="4"/><link rel="preload" href="/dashboard/_next/static/css/667d941a2888ce6e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/667d941a2888ce6e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-65d465f948974c0d.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-cb81dc4d27f4d009.js" defer=""></script><script src="/dashboard/_next/static/chunks/614-635a84e87800f99e.js" defer=""></script><script src="/dashboard/_next/static/chunks/798-c0525dc3f21e488d.js" defer=""></script><script src="/dashboard/_next/static/chunks/121-865d2bf8a3b84c6a.js" defer=""></script><script src="/dashboard/_next/static/chunks/470-ad1e0db3afcbd9c9.js" defer=""></script><script src="/dashboard/_next/static/chunks/969-2c584e28e6b4b106.js" defer=""></script><script src="/dashboard/_next/static/chunks/293-351268365226d251.js" defer=""></script><script src="/dashboard/_next/static/chunks/856-3a32da4b84176f6d.js" defer=""></script><script src="/dashboard/_next/static/chunks/973-6d78a0814682d771.js" defer=""></script><script src="/dashboard/_next/static/chunks/236-619ed0248fb6fdd9.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-76b07aa5da91b0df.js" defer=""></script><script src="/dashboard/_next/static/1qG0HTmVilJPxQdBk0fX5/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/1qG0HTmVilJPxQdBk0fX5/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="min-h-screen bg-gray-50"><div class="fixed top-0 left-0 right-0 z-50 shadow-sm"><div class="fixed top-0 left-0 right-0 bg-white z-30 h-14 px-4 border-b border-gray-200 shadow-sm"><div class="flex items-center h-full"><div class="flex items-center space-x-4 mr-6"><a class="flex items-center px-1 pt-1 h-full" href="/dashboard"><div class="h-20 w-20 flex items-center justify-center"><img alt="SkyPilot Logo" fetchpriority="high" width="80" height="80" decoding="async" data-nimg="1" class="w-full h-full object-contain" style="color:transparent" src="/dashboard/skypilot.svg"/></div></a></div><div class="flex items-center space-x-2 md:space-x-4 mr-6"><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/clusters"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="20" height="8" x="2" y="2" rx="2" ry="2"></rect><rect width="20" height="8" x="2" y="14" rx="2" ry="2"></rect><line x1="6" x2="6.01" y1="6" y2="6"></line><line x1="6" x2="6.01" y1="18" y2="18"></line></svg><span>Clusters</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/jobs"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 20V4a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><rect width="20" height="14" x="2" y="6" rx="2"></rect></svg><span>Jobs</span></a><div class="border-l border-gray-200 h-6 mx-1"></div><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/infra"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4" width="16" height="16" rx="2" ry="2"></rect><rect x="9" y="9" width="6" height="6"></rect><line x1="9" y1="1" x2="9" y2="4"></line><line x1="15" y1="1" x2="15" y2="4"></line><line x1="9" y1="20" x2="9" y2="23"></line><line x1="15" y1="20" x2="15" y2="23"></line><line x1="20" y1="9" x2="23" y2="9"></line><line x1="20" y1="14" x2="23" y2="14"></line><line x1="1" y1="9" x2="4" y2="9"></line><line x1="1" y1="14" x2="4" y2="14"></line></svg><span>Infra</span></a><a class="inline-flex items-center border-b-2 border-transparent text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/workspaces"><svg class="w-4 h-4" stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g><path fill="none" d="M0 0h24v24H0z"></path><path d="M3 18.5V5a3 3 0 0 1 3-3h14a1 1 0 0 1 1 1v18a1 1 0 0 1-1 1H6.5A3.5 3.5 0 0 1 3 18.5zM19 20v-3H6.5a1.5 1.5 0 0 0 0 3H19zM10 4H6a1 1 0 0 0-1 1v10.337A3.486 3.486 0 0 1 6.5 15H19V4h-2v8l-3.5-2-3.5 2V4z"></path></g></svg><span>Workspaces</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/users"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-users w-4 h-4"><path d="M16 21v-2a4 4 0 0 0-4-4H6a4 4 0 0 0-4 4v2"></path><circle cx="9" cy="7" r="4"></circle><path d="M22 21v-2a4 4 0 0 0-3-3.87"></path><path d="M16 3.13a4 4 0 0 1 0 7.75"></path></svg><span>Users</span></a></div><div class="flex items-center space-x-1 ml-auto"><a href="https://skypilot.readthedocs.io/en/latest/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center px-2 py-1 text-gray-600 hover:text-blue-600 transition-colors duration-150 cursor-pointer" title="Docs" tabindex="0"><span class="mr-1">Docs</span><svg class="w-3.5 h-3.5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path><polyline points="15 3 21 3 21 9"></polyline><line x1="10" y1="14" x2="21" y2="3"></line></svg></a><a href="https://github.com/skypilot-org/skypilot" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="GitHub" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"></path></svg></a><a href="https://slack.skypilot.co/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Slack" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path transform="scale(0.85) translate(1.8, 1.8)" d="M5.042 15.165a2.528 2.528 0 0 1-2.52 2.523A2.528 2.528 0 0 1 0 15.165a2.527 2.527 0 0 1 2.522-2.52h2.52v2.52zM6.313 15.165a2.527 2.527 0 0 1 2.521-2.52 2.527 2.527 0 0 1 2.521 2.52v6.313A2.528 2.528 0 0 1 8.834 24a2.528 2.528 0 0 1-2.521-2.522v-6.313zM8.834 5.042a2.528 2.528 0 0 1-2.521-2.52A2.528 2.528 0 0 1 8.834 0a2.528 2.528 0 0 1 2.521 2.522v2.52H8.834zM8.834 6.313a2.528 2.528 0 0 1 2.521 2.521 2.528 2.528 0 0 1-2.521 2.521H2.522A2.528 2.528 0 0 1 0 8.834a2.528 2.528 0 0 1 2.522-2.521h6.312zM18.956 8.834a2.528 2.528 0 0 1 2.522-2.521A2.528 2.528 0 0 1 24 8.834a2.528 2.528 0 0 1-2.522 2.521h-2.522V8.834zM17.688 8.834a2.528 2.528 0 0 1-2.523 2.521 2.527 2.527 0 0 1-2.52-2.521V2.522A2.527 2.527 0 0 1 15.165 0a2.528 2.528 0 0 1 2.523 2.522v6.312zM15.165 18.956a2.528 2.528 0 0 1 2.523 2.522A2.528 2.528 0 0 1 15.165 24a2.527 2.527 0 0 1-2.52-2.522v-2.522h2.52zM15.165 17.688a2.527 2.527 0 0 1-2.52-2.523 2.526 2.526 0 0 1 2.52-2.52h6.313A2.527 2.527 0 0 1 24 15.165a2.528 2.528 0 0 1-2.522 2.523h-6.313z"></path></svg></a><a href="https://github.com/skypilot-org/skypilot/issues/new" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Leave Feedback" tabindex="0"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-message-square w-5 h-5"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg></a><div class="border-l border-gray-200 h-6"></div><a class="inline-flex items-center justify-center p-2 rounded-full transition-colors duration-150 cursor-pointer text-gray-600 hover:bg-gray-100" title="Configuration" tabindex="0" href="/dashboard/config"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-settings w-5 h-5"><path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path><circle cx="12" cy="12" r="3"></circle></svg></a></div></div></div></div><div class="transition-all duration-200 ease-in-out min-h-screen" style="padding-top:56px"><main class="p-6"><div class="flex justify-center items-center h-64"><style data-emotion="css z01bqi animation-61bdi0">.css-z01bqi{display:inline-block;color:#1976d2;-webkit-animation:animation-61bdi0 1.4s linear infinite;animation:animation-61bdi0 1.4s linear infinite;}@-webkit-keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}@keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}</style><span class="MuiCircularProgress-root MuiCircularProgress-indeterminate MuiCircularProgress-colorPrimary css-z01bqi" style="width:40px;height:40px" role="progressbar"><style data-emotion="css 13o7eu2">.css-13o7eu2{display:block;}</style><svg class="MuiCircularProgress-svg css-13o7eu2" viewBox="22 22 44 44"><style data-emotion="css 14891ef animation-1p2h4ri">.css-14891ef{stroke:currentColor;stroke-dasharray:80px,200px;stroke-dashoffset:0;-webkit-animation:animation-1p2h4ri 1.4s ease-in-out infinite;animation:animation-1p2h4ri 1.4s ease-in-out infinite;}@-webkit-keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}@keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}</style><circle class="MuiCircularProgress-circle MuiCircularProgress-circleIndeterminate css-14891ef" cx="44" cy="44" r="20.2" fill="none" stroke-width="3.6"></circle></svg></span><span class="ml-2 text-gray-500">Loading workspaces...</span></div></main></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"1qG0HTmVilJPxQdBk0fX5","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
sky/jobs/scheduler.py CHANGED
@@ -191,8 +191,8 @@ def maybe_schedule_next_jobs() -> None:
191
191
  pass
192
192
 
193
193
 
194
- def submit_job(job_id: int, dag_yaml_path: str, env_file_path: str,
195
- priority: int) -> None:
194
+ def submit_job(job_id: int, dag_yaml_path: str, original_user_yaml_path: str,
195
+ env_file_path: str, priority: int) -> None:
196
196
  """Submit an existing job to the scheduler.
197
197
 
198
198
  This should be called after a job is created in the `spot` table as
@@ -203,7 +203,8 @@ def submit_job(job_id: int, dag_yaml_path: str, env_file_path: str,
203
203
  The user hash should be set (e.g. via SKYPILOT_USER_ID) before calling this.
204
204
  """
205
205
  with filelock.FileLock(_get_lock_path()):
206
- state.scheduler_set_waiting(job_id, dag_yaml_path, env_file_path,
206
+ state.scheduler_set_waiting(job_id, dag_yaml_path,
207
+ original_user_yaml_path, env_file_path,
207
208
  common_utils.get_user_hash(), priority)
208
209
  maybe_schedule_next_jobs()
209
210
 
@@ -312,6 +313,9 @@ if __name__ == '__main__':
312
313
  parser.add_argument('dag_yaml',
313
314
  type=str,
314
315
  help='The path to the user job yaml file.')
316
+ parser.add_argument('--user-yaml-path',
317
+ type=str,
318
+ help='The path to the original user job yaml file.')
315
319
  parser.add_argument('--job-id',
316
320
  required=True,
317
321
  type=int,
@@ -325,4 +329,5 @@ if __name__ == '__main__':
325
329
  default=500,
326
330
  help='Job priority (0-1000, lower is higher). Default: 500.')
327
331
  args = parser.parse_args()
328
- submit_job(args.job_id, args.dag_yaml, args.env_file, args.priority)
332
+ submit_job(args.job_id, args.dag_yaml, args.user_yaml_path, args.env_file,
333
+ args.priority)
sky/jobs/server/core.py CHANGED
@@ -88,6 +88,9 @@ def launch(
88
88
  raise ValueError('Only single-task or chain DAG is '
89
89
  f'allowed for job_launch. Dag: {dag}')
90
90
  dag.validate()
91
+
92
+ user_dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
93
+
91
94
  dag_utils.maybe_infer_and_fill_dag_and_task_names(dag)
92
95
 
93
96
  task_names = set()
@@ -175,12 +178,20 @@ def launch(
175
178
  controller_utils.translate_local_file_mounts_to_two_hop(
176
179
  task_))
177
180
 
181
+ # Has to use `\` to avoid yapf issue.
178
182
  with tempfile.NamedTemporaryFile(prefix=f'managed-dag-{dag.name}-',
179
- mode='w') as f:
183
+ mode='w') as f, \
184
+ tempfile.NamedTemporaryFile(prefix=f'managed-user-dag-{dag.name}-',
185
+ mode='w') as original_user_yaml_path:
186
+ original_user_yaml_path.write(user_dag_str)
187
+ original_user_yaml_path.flush()
188
+
180
189
  dag_utils.dump_chain_dag_to_yaml(dag, f.name)
181
190
  controller = controller_utils.Controllers.JOBS_CONTROLLER
182
191
  controller_name = controller.value.cluster_name
183
192
  prefix = managed_job_constants.JOBS_TASK_YAML_PREFIX
193
+ remote_original_user_yaml_path = (
194
+ f'{prefix}/{dag.name}-{dag_uuid}.original_user_yaml')
184
195
  remote_user_yaml_path = f'{prefix}/{dag.name}-{dag_uuid}.yaml'
185
196
  remote_user_config_path = f'{prefix}/{dag.name}-{dag_uuid}.config_yaml'
186
197
  remote_env_file_path = f'{prefix}/{dag.name}-{dag_uuid}.env'
@@ -189,6 +200,8 @@ def launch(
189
200
  task_resources=sum([list(t.resources) for t in dag.tasks], []))
190
201
 
191
202
  vars_to_fill = {
203
+ 'remote_original_user_yaml_path': remote_original_user_yaml_path,
204
+ 'original_user_dag_path': original_user_yaml_path.name,
192
205
  'remote_user_yaml_path': remote_user_yaml_path,
193
206
  'user_yaml_path': f.name,
194
207
  'local_to_controller_file_mounts': local_to_controller_file_mounts,
sky/jobs/state.py CHANGED
@@ -122,7 +122,8 @@ def create_table(cursor, conn):
122
122
  user_hash TEXT,
123
123
  workspace TEXT DEFAULT NULL,
124
124
  priority INTEGER DEFAULT 500,
125
- entrypoint TEXT DEFAULT NULL)""")
125
+ entrypoint TEXT DEFAULT NULL,
126
+ original_user_yaml_path TEXT DEFAULT NULL)""")
126
127
 
127
128
  db_utils.add_column_to_table(cursor, conn, 'job_info', 'schedule_state',
128
129
  'TEXT')
@@ -153,6 +154,8 @@ def create_table(cursor, conn):
153
154
  value_to_replace_existing_entries=500)
154
155
 
155
156
  db_utils.add_column_to_table(cursor, conn, 'job_info', 'entrypoint', 'TEXT')
157
+ db_utils.add_column_to_table(cursor, conn, 'job_info',
158
+ 'original_user_yaml_path', 'TEXT')
156
159
  conn.commit()
157
160
 
158
161
 
@@ -212,6 +215,7 @@ columns = [
212
215
  'workspace',
213
216
  'priority',
214
217
  'entrypoint',
218
+ 'original_user_yaml_path',
215
219
  ]
216
220
 
217
221
 
@@ -1013,19 +1017,16 @@ def get_managed_jobs(job_id: Optional[int] = None) -> List[Dict[str, Any]]:
1013
1017
  if job_dict['job_name'] is None:
1014
1018
  job_dict['job_name'] = job_dict['task_name']
1015
1019
 
1016
- # Add YAML content and command for managed jobs
1017
- dag_yaml_path = job_dict.get('dag_yaml_path')
1018
- if dag_yaml_path:
1020
+ # Add user YAML content for managed jobs.
1021
+ yaml_path = job_dict.get('original_user_yaml_path')
1022
+ if yaml_path:
1019
1023
  try:
1020
- with open(dag_yaml_path, 'r', encoding='utf-8') as f:
1021
- job_dict['dag_yaml'] = f.read()
1024
+ with open(yaml_path, 'r', encoding='utf-8') as f:
1025
+ job_dict['user_yaml'] = f.read()
1022
1026
  except (FileNotFoundError, IOError, OSError):
1023
- job_dict['dag_yaml'] = None
1024
-
1025
- # Generate a command that could be used to launch this job
1026
- # Format: sky jobs launch <yaml_path>
1027
+ job_dict['user_yaml'] = None
1027
1028
  else:
1028
- job_dict['dag_yaml'] = None
1029
+ job_dict['user_yaml'] = None
1029
1030
 
1030
1031
  jobs.append(job_dict)
1031
1032
  return jobs
@@ -1083,18 +1084,20 @@ def get_local_log_file(job_id: int, task_id: Optional[int]) -> Optional[str]:
1083
1084
  # scheduler lock to work correctly.
1084
1085
 
1085
1086
 
1086
- def scheduler_set_waiting(job_id: int, dag_yaml_path: str, env_file_path: str,
1087
+ def scheduler_set_waiting(job_id: int, dag_yaml_path: str,
1088
+ original_user_yaml_path: str, env_file_path: str,
1087
1089
  user_hash: str, priority: int) -> None:
1088
1090
  """Do not call without holding the scheduler lock."""
1089
1091
  with db_utils.safe_cursor(_DB_PATH) as cursor:
1090
1092
  updated_count = cursor.execute(
1091
1093
  'UPDATE job_info SET '
1092
- 'schedule_state = (?), dag_yaml_path = (?), env_file_path = (?), '
1094
+ 'schedule_state = (?), dag_yaml_path = (?), '
1095
+ 'original_user_yaml_path = (?), env_file_path = (?), '
1093
1096
  ' user_hash = (?), priority = (?) '
1094
1097
  'WHERE spot_job_id = (?) AND schedule_state = (?)',
1095
1098
  (ManagedJobScheduleState.WAITING.value, dag_yaml_path,
1096
- env_file_path, user_hash, priority, job_id,
1097
- ManagedJobScheduleState.INACTIVE.value)).rowcount
1099
+ original_user_yaml_path, env_file_path, user_hash, priority,
1100
+ job_id, ManagedJobScheduleState.INACTIVE.value)).rowcount
1098
1101
  assert updated_count == 1, (job_id, updated_count)
1099
1102
 
1100
1103
 
@@ -2342,6 +2342,7 @@ def get_endpoint_debug_message() -> str:
2342
2342
  def combine_pod_config_fields(
2343
2343
  cluster_yaml_path: str,
2344
2344
  cluster_config_overrides: Dict[str, Any],
2345
+ cloud: Optional[clouds.Cloud] = None,
2345
2346
  ) -> None:
2346
2347
  """Adds or updates fields in the YAML with fields from the
2347
2348
  ~/.sky/config.yaml's kubernetes.pod_spec dict.
@@ -2386,11 +2387,17 @@ def combine_pod_config_fields(
2386
2387
  yaml_obj = yaml.safe_load(yaml_content)
2387
2388
  # We don't use override_configs in `skypilot_config.get_nested`, as merging
2388
2389
  # the pod config requires special handling.
2389
- kubernetes_config = skypilot_config.get_nested(('kubernetes', 'pod_config'),
2390
- default_value={},
2391
- override_configs={})
2392
- override_pod_config = (cluster_config_overrides.get('kubernetes', {}).get(
2393
- 'pod_config', {}))
2390
+ if isinstance(cloud, clouds.SSH):
2391
+ kubernetes_config = skypilot_config.get_nested(('ssh', 'pod_config'),
2392
+ default_value={},
2393
+ override_configs={})
2394
+ override_pod_config = (cluster_config_overrides.get('ssh', {}).get(
2395
+ 'pod_config', {}))
2396
+ else:
2397
+ kubernetes_config = skypilot_config.get_nested(
2398
+ ('kubernetes', 'pod_config'), default_value={}, override_configs={})
2399
+ override_pod_config = (cluster_config_overrides.get(
2400
+ 'kubernetes', {}).get('pod_config', {}))
2394
2401
  config_utils.merge_k8s_configs(kubernetes_config, override_pod_config)
2395
2402
 
2396
2403
  # Merge the kubernetes config into the YAML for both head and worker nodes.
@@ -0,0 +1,47 @@
1
+ """Constants used by the Nebius provisioner."""
2
+
3
+ VERSION = 'v1'
4
+
5
+ # InfiniBand-capable instance platforms
6
+ INFINIBAND_INSTANCE_PLATFORMS = [
7
+ 'gpu-h100-sxm',
8
+ 'gpu-h200-sxm',
9
+ ]
10
+
11
+ # InfiniBand environment variables for NCCL and UCX
12
+ INFINIBAND_ENV_VARS = {
13
+ 'NCCL_IB_HCA': 'mlx5',
14
+ 'UCX_NET_DEVICES': ('mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,'
15
+ 'mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1')
16
+ }
17
+
18
+ # Docker run options for InfiniBand support
19
+ INFINIBAND_DOCKER_OPTIONS = ['--device=/dev/infiniband', '--cap-add=IPC_LOCK']
20
+
21
+ # InfiniBand fabric mapping by platform and region
22
+ # Based on Nebius documentation
23
+ INFINIBAND_FABRIC_MAPPING = {
24
+ # H100 platforms
25
+ ('gpu-h100-sxm', 'eu-north1'): [
26
+ 'fabric-2', 'fabric-3', 'fabric-4', 'fabric-6'
27
+ ],
28
+
29
+ # H200 platforms
30
+ ('gpu-h200-sxm', 'eu-north1'): ['fabric-7'],
31
+ ('gpu-h200-sxm', 'eu-west1'): ['fabric-5'],
32
+ ('gpu-h200-sxm', 'us-central1'): ['us-central1-a'],
33
+ }
34
+
35
+
36
+ def get_default_fabric(platform: str, region: str) -> str:
37
+ """Get the default (first) fabric for a given platform and region."""
38
+ fabrics = INFINIBAND_FABRIC_MAPPING.get((platform, region), [])
39
+ if not fabrics:
40
+ # Select north europe region as default
41
+ fabrics = INFINIBAND_FABRIC_MAPPING.get(('gpu-h100-sxm', 'eu-north1'),
42
+ [])
43
+ if not fabrics:
44
+ raise ValueError(
45
+ f'No InfiniBand fabric available for platform {platform} '
46
+ f'in region {region}')
47
+ return fabrics[0]
@@ -124,6 +124,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
124
124
  node_type = 'head' if head_instance_id is None else 'worker'
125
125
  try:
126
126
  platform, preset = config.node_config['InstanceType'].split('_')
127
+
127
128
  instance_id = utils.launch(
128
129
  cluster_name_on_cloud=cluster_name_on_cloud,
129
130
  node_type=node_type,
@@ -136,7 +137,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
136
137
  associate_public_ip_address=(
137
138
  not config.provider_config['use_internal_ips']),
138
139
  filesystems=config.node_config.get('filesystems', []),
139
- )
140
+ network_tier=config.node_config.get('network_tier'))
140
141
  except Exception as e: # pylint: disable=broad-except
141
142
  logger.warning(f'run_instances error: {e}')
142
143
  raise
@@ -1,12 +1,14 @@
1
1
  """Nebius library wrapper for SkyPilot."""
2
2
  import time
3
- from typing import Any, Dict, List
3
+ from typing import Any, Dict, List, Optional
4
4
  import uuid
5
5
 
6
6
  from sky import sky_logging
7
7
  from sky import skypilot_config
8
8
  from sky.adaptors import nebius
9
+ from sky.provision.nebius import constants as nebius_constants
9
10
  from sky.utils import common_utils
11
+ from sky.utils import resources_utils
10
12
 
11
13
  logger = sky_logging.init_logger(__name__)
12
14
 
@@ -156,10 +158,17 @@ def start(instance_id: str) -> None:
156
158
  f' to be ready.')
157
159
 
158
160
 
159
- def launch(cluster_name_on_cloud: str, node_type: str, platform: str,
160
- preset: str, region: str, image_family: str, disk_size: int,
161
- user_data: str, associate_public_ip_address: bool,
162
- filesystems: List[Dict[str, Any]]) -> str:
161
+ def launch(cluster_name_on_cloud: str,
162
+ node_type: str,
163
+ platform: str,
164
+ preset: str,
165
+ region: str,
166
+ image_family: str,
167
+ disk_size: int,
168
+ user_data: str,
169
+ associate_public_ip_address: bool,
170
+ filesystems: List[Dict[str, Any]],
171
+ network_tier: Optional[resources_utils.NetworkTier] = None) -> str:
163
172
  # Each node must have a unique name to avoid conflicts between
164
173
  # multiple worker VMs. To ensure uniqueness,a UUID is appended
165
174
  # to the node name.
@@ -173,11 +182,23 @@ def launch(cluster_name_on_cloud: str, node_type: str, platform: str,
173
182
  # 8 GPU virtual machines can be grouped into a GPU cluster.
174
183
  # The GPU clusters are built with InfiniBand secure high-speed networking.
175
184
  # https://docs.nebius.com/compute/clusters/gpu
176
- if platform in ('gpu-h100-sxm', 'gpu-h200-sxm'):
185
+ if platform in nebius_constants.INFINIBAND_INSTANCE_PLATFORMS:
177
186
  if preset == '8gpu-128vcpu-1600gb':
178
- # Check is there fabric in config
179
187
  fabric = skypilot_config.get_nested(('nebius', region, 'fabric'),
180
188
  None)
189
+
190
+ # Auto-select fabric if network_tier=best and no fabric configured
191
+ if (fabric is None and
192
+ str(network_tier) == str(resources_utils.NetworkTier.BEST)):
193
+ try:
194
+ fabric = nebius_constants.get_default_fabric(
195
+ platform, region)
196
+ logger.info(f'Auto-selected InfiniBand fabric {fabric} '
197
+ f'for {platform} in {region}')
198
+ except ValueError as e:
199
+ logger.warning(
200
+ f'InfiniBand fabric auto-selection failed: {e}')
201
+
181
202
  if fabric is None:
182
203
  logger.warning(
183
204
  f'Set up fabric for region {region} in ~/.sky/config.yaml '
@@ -2,7 +2,8 @@
2
2
  import asyncio
3
3
  import logging
4
4
  import threading
5
- from typing import Dict, Optional, Union
5
+ import traceback
6
+ from typing import Dict, List, Optional, Union
6
7
 
7
8
  import aiohttp
8
9
  import fastapi
@@ -69,6 +70,48 @@ class SkyServeLoadBalancer:
69
70
  # updating it from _sync_with_controller.
70
71
  self._client_pool_lock: threading.Lock = threading.Lock()
71
72
 
73
+ async def _sync_with_controller_once(self) -> List[asyncio.Task]:
74
+ close_client_tasks = []
75
+ async with aiohttp.ClientSession() as session:
76
+ try:
77
+ # Send request information
78
+ async with session.post(
79
+ self._controller_url + '/controller/load_balancer_sync',
80
+ json={
81
+ 'request_aggregator':
82
+ self._request_aggregator.to_dict()
83
+ },
84
+ timeout=aiohttp.ClientTimeout(5),
85
+ ) as response:
86
+ # Clean up after reporting request info to avoid OOM.
87
+ self._request_aggregator.clear()
88
+ response.raise_for_status()
89
+ response_json = await response.json()
90
+ ready_replica_urls = response_json.get(
91
+ 'ready_replica_urls', [])
92
+ except (aiohttp.ClientError, asyncio.TimeoutError) as e:
93
+ logger.error(f'An error occurred when syncing with '
94
+ f'the controller: {e}'
95
+ f'\nTraceback: {traceback.format_exc()}')
96
+ else:
97
+ logger.info(f'Available Replica URLs: {ready_replica_urls}')
98
+ with self._client_pool_lock:
99
+ self._load_balancing_policy.set_ready_replicas(
100
+ ready_replica_urls)
101
+ for replica_url in ready_replica_urls:
102
+ if replica_url not in self._client_pool:
103
+ self._client_pool[replica_url] = httpx.AsyncClient(
104
+ base_url=replica_url)
105
+ urls_to_close = set(
106
+ self._client_pool.keys()) - set(ready_replica_urls)
107
+ client_to_close = []
108
+ for replica_url in urls_to_close:
109
+ client_to_close.append(
110
+ self._client_pool.pop(replica_url))
111
+ for client in client_to_close:
112
+ close_client_tasks.append(client.aclose())
113
+ return close_client_tasks
114
+
72
115
  async def _sync_with_controller(self):
73
116
  """Sync with controller periodically.
74
117
 
@@ -82,49 +125,16 @@ class SkyServeLoadBalancer:
82
125
  await asyncio.sleep(5)
83
126
 
84
127
  while True:
85
- close_client_tasks = []
86
- async with aiohttp.ClientSession() as session:
87
- try:
88
- # Send request information
89
- async with session.post(
90
- self._controller_url +
91
- '/controller/load_balancer_sync',
92
- json={
93
- 'request_aggregator':
94
- self._request_aggregator.to_dict()
95
- },
96
- timeout=aiohttp.ClientTimeout(5),
97
- ) as response:
98
- # Clean up after reporting request info to avoid OOM.
99
- self._request_aggregator.clear()
100
- response.raise_for_status()
101
- response_json = await response.json()
102
- ready_replica_urls = response_json.get(
103
- 'ready_replica_urls', [])
104
- except aiohttp.ClientError as e:
105
- logger.error('An error occurred when syncing with '
106
- f'the controller: {e}')
107
- else:
108
- logger.info(f'Available Replica URLs: {ready_replica_urls}')
109
- with self._client_pool_lock:
110
- self._load_balancing_policy.set_ready_replicas(
111
- ready_replica_urls)
112
- for replica_url in ready_replica_urls:
113
- if replica_url not in self._client_pool:
114
- self._client_pool[replica_url] = (
115
- httpx.AsyncClient(base_url=replica_url))
116
- urls_to_close = set(
117
- self._client_pool.keys()) - set(ready_replica_urls)
118
- client_to_close = []
119
- for replica_url in urls_to_close:
120
- client_to_close.append(
121
- self._client_pool.pop(replica_url))
122
- for client in client_to_close:
123
- close_client_tasks.append(client.aclose())
124
-
125
- await asyncio.sleep(constants.LB_CONTROLLER_SYNC_INTERVAL_SECONDS)
126
- # Await those tasks after the interval to avoid blocking.
127
- await asyncio.gather(*close_client_tasks)
128
+ try:
129
+ close_client_tasks = await self._sync_with_controller_once()
130
+ await asyncio.sleep(
131
+ constants.LB_CONTROLLER_SYNC_INTERVAL_SECONDS)
132
+ # Await those tasks after the interval to avoid blocking.
133
+ await asyncio.gather(*close_client_tasks)
134
+ except Exception as e: # pylint: disable=broad-except
135
+ logger.error(f'An error occurred when syncing with '
136
+ f'the controller: {e}'
137
+ f'\nTraceback: {traceback.format_exc()}')
128
138
 
129
139
  async def _proxy_request_to(
130
140
  self, url: str, request: fastapi.Request
@@ -168,7 +178,8 @@ class SkyServeLoadBalancer:
168
178
  background=background.BackgroundTask(background_func))
169
179
  except (httpx.RequestError, httpx.HTTPStatusError) as e:
170
180
  logger.error(f'Error when proxy request to {url}: '
171
- f'{common_utils.format_exception(e)}')
181
+ f'{common_utils.format_exception(e)}'
182
+ f'\nTraceback: {traceback.format_exc()}')
172
183
  return e
173
184
 
174
185
  async def _proxy_with_retries(
sky/skylet/constants.py CHANGED
@@ -367,6 +367,7 @@ RCLONE_CACHE_REFRESH_INTERVAL = 10
367
367
  OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
368
368
  ('docker', 'run_options'),
369
369
  ('nvidia_gpus', 'disable_ecc'),
370
+ ('ssh', 'pod_config'),
370
371
  ('kubernetes', 'pod_config'),
371
372
  ('kubernetes', 'provision_timeout'),
372
373
  ('gcp', 'managed_instance_group'),
sky/skypilot_config.py CHANGED
@@ -167,7 +167,10 @@ def _get_loaded_config_path() -> List[Optional[str]]:
167
167
  serialized = _get_config_context().config_path
168
168
  if not serialized:
169
169
  return []
170
- return json.loads(serialized)
170
+ config_paths = json.loads(serialized)
171
+ if config_paths is None:
172
+ return []
173
+ return config_paths
171
174
 
172
175
 
173
176
  def _set_loaded_config_path(
@@ -3,6 +3,7 @@
3
3
  name: {{dag_name}}
4
4
 
5
5
  file_mounts:
6
+ {{remote_original_user_yaml_path}}: {{original_user_dag_path}}
6
7
  {{remote_user_yaml_path}}: {{user_yaml_path}}
7
8
  {%- if local_user_config_path is not none %}
8
9
  {{remote_user_config_path}}: {{local_user_config_path}}
@@ -28,7 +29,7 @@ setup: |
28
29
  grep -q 'export SKYPILOT_DEV=' ~/.bashrc || echo 'export SKYPILOT_DEV=1' >> ~/.bashrc
29
30
  grep -q 'alias sky-env=' ~/.bashrc || echo 'alias sky-env="{{ sky_activate_python_env }}"' >> ~/.bashrc
30
31
  {% endif %}
31
-
32
+
32
33
  # Create systemd service file
33
34
  mkdir -p ~/.config/systemd/user/
34
35
 
@@ -65,6 +66,7 @@ run: |
65
66
  # CloudVmRayBackend._exec_code_on_head() calls
66
67
  # managed_job_codegen.set_pending() before we get here.
67
68
  python -u -m sky.jobs.scheduler {{remote_user_yaml_path}} \
69
+ --user-yaml-path {{remote_original_user_yaml_path}} \
68
70
  --job-id $SKYPILOT_INTERNAL_JOB_ID \
69
71
  --env-file {{remote_env_file_path}} \
70
72
  --priority {{priority}}
@@ -46,6 +46,7 @@ available_node_types:
46
46
  InstanceType: {{instance_type}}
47
47
  ImageId: {{image_id}}
48
48
  DiskSize: {{disk_size}}
49
+ network_tier: {{network_tier}}
49
50
  filesystems:
50
51
  {%- for fs in filesystems %}
51
52
  - filesystem_id: {{ fs.filesystem_id }}
@@ -152,6 +153,11 @@ setup_commands:
152
153
  mkdir -p ~/.ssh; touch ~/.ssh/config;
153
154
  {{ conda_installation_commands }}
154
155
  {{ ray_skypilot_installation_commands }}
156
+ {%- if env_vars is defined %}
157
+ {%- for env_var, env_value in env_vars.items() %}
158
+ echo '{{env_var}}={{env_value}}' | sudo tee -a /etc/environment;
159
+ {%- endfor %}
160
+ {%- endif %}
155
161
  sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
156
162
  sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
157
163
  mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
@@ -723,14 +723,16 @@ def main():
723
723
  # Do not support changing anything besides hosts for now
724
724
  if history is not None:
725
725
  for key in ['user', 'identity_file', 'password']:
726
- if history.get(key) != cluster_config.get(key):
726
+ if not args.cleanup and history.get(
727
+ key) != cluster_config.get(key):
727
728
  raise ValueError(
728
729
  f'Cluster configuration has changed for field {key!r}. '
729
730
  f'Previous value: {history.get(key)}, '
730
731
  f'Current value: {cluster_config.get(key)}')
731
732
  history_hosts_info = prepare_hosts_info(
732
733
  cluster_name, history)
733
- if history_hosts_info[0] != hosts_info[0]:
734
+ if not args.cleanup and history_hosts_info[0] != hosts_info[
735
+ 0]:
734
736
  raise ValueError(
735
737
  f'Cluster configuration has changed for master node. '
736
738
  f'Previous value: {history_hosts_info[0]}, '
@@ -860,7 +862,7 @@ def deploy_cluster(head_node,
860
862
  use_ssh_config=head_use_ssh_config,
861
863
  # For SkySSHUpLineProcessor
862
864
  print_output=True)
863
- if result is None:
865
+ if not cleanup and result is None:
864
866
  with ux_utils.print_exception_no_traceback():
865
867
  raise RuntimeError(
866
868
  f'Failed to SSH to head node ({head_node}). '
@@ -63,7 +63,9 @@ class NetworkTier(enum.Enum):
63
63
  def cli_help_message(cls) -> str:
64
64
  return (
65
65
  f'Network tier. Could be one of {", ".join(cls.supported_tiers())}'
66
- f'. Default: {cls.STANDARD.value}')
66
+ f'. If {cls.BEST.value} is specified, use the best network tier '
67
+ 'available on the specified instance. '
68
+ f'Default: {cls.STANDARD.value}')
67
69
 
68
70
  @classmethod
69
71
  def from_str(cls, tier: str) -> 'NetworkTier':