skypilot-nightly 1.0.0.dev20250606__py3-none-any.whl → 1.0.0.dev20250607__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +3 -1
- sky/check.py +14 -19
- sky/cli.py +0 -2
- sky/client/cli.py +0 -2
- sky/client/sdk.py +2 -1
- sky/clouds/cloud.py +4 -0
- sky/clouds/nebius.py +44 -4
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/1qG0HTmVilJPxQdBk0fX5/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/236-619ed0248fb6fdd9.js +6 -0
- sky/dashboard/out/_next/static/chunks/{470-9e7a479cc8303baa.js → 470-ad1e0db3afcbd9c9.js} +1 -1
- sky/dashboard/out/_next/static/chunks/969-2c584e28e6b4b106.js +1 -0
- sky/dashboard/out/_next/static/chunks/973-6d78a0814682d771.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-35cbeb5214fd4036.js → [cluster]-b919a73aecdfa78f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{clusters-5549a350f97d7ef3.js → clusters-4f6b9dd9abcb33ad.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-b68ddeed712d45b5.js → [context]-3a18d0eeb5119fe4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-13b117a831702196.js → infra-a1a6abeeb58c1051.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1354e28c81eeb686.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/{jobs-a76b2700eca236f7.js → jobs-23bfc8bf373423db.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{users-262aab38b9baaf3a.js → users-5800045bd04e69c2.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/{new-c7516f2b4c3727c0.js → new-e1f9c0c3ff7ac4bd.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-7799de9e691e35d8.js → [name]-686590e0ee4b2412.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-384ea5fa0cea8f28.js → workspaces-76b07aa5da91b0df.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/jobs/scheduler.py +9 -4
- sky/jobs/server/core.py +14 -1
- sky/jobs/state.py +18 -15
- sky/provision/kubernetes/utils.py +12 -5
- sky/provision/nebius/constants.py +47 -0
- sky/provision/nebius/instance.py +2 -1
- sky/provision/nebius/utils.py +28 -7
- sky/skylet/constants.py +1 -0
- sky/skypilot_config.py +4 -1
- sky/templates/jobs-controller.yaml.j2 +3 -1
- sky/templates/nebius-ray.yml.j2 +6 -0
- sky/utils/kubernetes/deploy_remote_cluster.py +5 -3
- sky/utils/resources_utils.py +3 -1
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/RECORD +61 -60
- sky/dashboard/out/_next/static/99m-BAySO8Q7J-ul1jZVL/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-a90f0a9753a10420.js +0 -6
- sky/dashboard/out/_next/static/chunks/969-c7abda31c10440ac.js +0 -1
- sky/dashboard/out/_next/static/chunks/973-1a09cac61cfcc1e1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-2d23a9c7571e6320.js +0 -16
- /sky/dashboard/out/_next/static/{99m-BAySO8Q7J-ul1jZVL → 1qG0HTmVilJPxQdBk0fX5}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{37-beedd583fea84cc8.js → 37-600191c5804dcae2.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/{682-6647f0417d5662f0.js → 682-b60cfdacc15202e8.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-65d04d5d77cbb6b6.js → [job]-18aed9b56247d074.js} +0 -0
- /sky/dashboard/out/_next/static/chunks/pages/{config-1a1eeb949dab8897.js → config-fe375a56342cf609.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250606.dist-info → skypilot_nightly-1.0.0.dev20250607.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>Workspaces | SkyPilot Dashboard</title><link rel="preload" href="/dashboard/skypilot.svg" as="image" fetchpriority="high"/><meta name="next-head-count" content="4"/><link rel="preload" href="/dashboard/_next/static/css/667d941a2888ce6e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/667d941a2888ce6e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-65d465f948974c0d.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-cb81dc4d27f4d009.js" defer=""></script><script src="/dashboard/_next/static/chunks/614-635a84e87800f99e.js" defer=""></script><script src="/dashboard/_next/static/chunks/798-c0525dc3f21e488d.js" defer=""></script><script src="/dashboard/_next/static/chunks/121-865d2bf8a3b84c6a.js" defer=""></script><script src="/dashboard/_next/static/chunks/470-9e7a479cc8303baa.js" defer=""></script><script src="/dashboard/_next/static/chunks/293-351268365226d251.js" defer=""></script><script src="/dashboard/_next/static/chunks/969-c7abda31c10440ac.js" defer=""></script><script src="/dashboard/_next/static/chunks/856-3a32da4b84176f6d.js" defer=""></script><script src="/dashboard/_next/static/chunks/973-1a09cac61cfcc1e1.js" defer=""></script><script src="/dashboard/_next/static/chunks/236-a90f0a9753a10420.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-384ea5fa0cea8f28.js" defer=""></script><script src="/dashboard/_next/static/99m-BAySO8Q7J-ul1jZVL/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/99m-BAySO8Q7J-ul1jZVL/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="min-h-screen bg-gray-50"><div class="fixed top-0 left-0 right-0 z-50 shadow-sm"><div class="fixed top-0 left-0 right-0 bg-white z-30 h-14 px-4 border-b border-gray-200 shadow-sm"><div class="flex items-center h-full"><div class="flex items-center space-x-4 mr-6"><a class="flex items-center px-1 pt-1 h-full" href="/dashboard"><div class="h-20 w-20 flex items-center justify-center"><img alt="SkyPilot Logo" fetchpriority="high" width="80" height="80" decoding="async" data-nimg="1" class="w-full h-full object-contain" style="color:transparent" src="/dashboard/skypilot.svg"/></div></a></div><div class="flex items-center space-x-2 md:space-x-4 mr-6"><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/clusters"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="20" height="8" x="2" y="2" rx="2" ry="2"></rect><rect width="20" height="8" x="2" y="14" rx="2" ry="2"></rect><line x1="6" x2="6.01" y1="6" y2="6"></line><line x1="6" x2="6.01" y1="18" y2="18"></line></svg><span>Clusters</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/jobs"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 20V4a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><rect width="20" height="14" x="2" y="6" rx="2"></rect></svg><span>Jobs</span></a><div class="border-l border-gray-200 h-6 mx-1"></div><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/infra"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4" width="16" height="16" rx="2" ry="2"></rect><rect x="9" y="9" width="6" height="6"></rect><line x1="9" y1="1" x2="9" y2="4"></line><line x1="15" y1="1" x2="15" y2="4"></line><line x1="9" y1="20" x2="9" y2="23"></line><line x1="15" y1="20" x2="15" y2="23"></line><line x1="20" y1="9" x2="23" y2="9"></line><line x1="20" y1="14" x2="23" y2="14"></line><line x1="1" y1="9" x2="4" y2="9"></line><line x1="1" y1="14" x2="4" y2="14"></line></svg><span>Infra</span></a><a class="inline-flex items-center border-b-2 border-transparent text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/workspaces"><svg class="w-4 h-4" stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g><path fill="none" d="M0 0h24v24H0z"></path><path d="M3 18.5V5a3 3 0 0 1 3-3h14a1 1 0 0 1 1 1v18a1 1 0 0 1-1 1H6.5A3.5 3.5 0 0 1 3 18.5zM19 20v-3H6.5a1.5 1.5 0 0 0 0 3H19zM10 4H6a1 1 0 0 0-1 1v10.337A3.486 3.486 0 0 1 6.5 15H19V4h-2v8l-3.5-2-3.5 2V4z"></path></g></svg><span>Workspaces</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/users"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-users w-4 h-4"><path d="M16 21v-2a4 4 0 0 0-4-4H6a4 4 0 0 0-4 4v2"></path><circle cx="9" cy="7" r="4"></circle><path d="M22 21v-2a4 4 0 0 0-3-3.87"></path><path d="M16 3.13a4 4 0 0 1 0 7.75"></path></svg><span>Users</span></a></div><div class="flex items-center space-x-1 ml-auto"><a href="https://skypilot.readthedocs.io/en/latest/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center px-2 py-1 text-gray-600 hover:text-blue-600 transition-colors duration-150 cursor-pointer" title="Docs" tabindex="0"><span class="mr-1">Docs</span><svg class="w-3.5 h-3.5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path><polyline points="15 3 21 3 21 9"></polyline><line x1="10" y1="14" x2="21" y2="3"></line></svg></a><a href="https://github.com/skypilot-org/skypilot" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="GitHub" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"></path></svg></a><a href="https://slack.skypilot.co/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Slack" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path transform="scale(0.85) translate(1.8, 1.8)" d="M5.042 15.165a2.528 2.528 0 0 1-2.52 2.523A2.528 2.528 0 0 1 0 15.165a2.527 2.527 0 0 1 2.522-2.52h2.52v2.52zM6.313 15.165a2.527 2.527 0 0 1 2.521-2.52 2.527 2.527 0 0 1 2.521 2.52v6.313A2.528 2.528 0 0 1 8.834 24a2.528 2.528 0 0 1-2.521-2.522v-6.313zM8.834 5.042a2.528 2.528 0 0 1-2.521-2.52A2.528 2.528 0 0 1 8.834 0a2.528 2.528 0 0 1 2.521 2.522v2.52H8.834zM8.834 6.313a2.528 2.528 0 0 1 2.521 2.521 2.528 2.528 0 0 1-2.521 2.521H2.522A2.528 2.528 0 0 1 0 8.834a2.528 2.528 0 0 1 2.522-2.521h6.312zM18.956 8.834a2.528 2.528 0 0 1 2.522-2.521A2.528 2.528 0 0 1 24 8.834a2.528 2.528 0 0 1-2.522 2.521h-2.522V8.834zM17.688 8.834a2.528 2.528 0 0 1-2.523 2.521 2.527 2.527 0 0 1-2.52-2.521V2.522A2.527 2.527 0 0 1 15.165 0a2.528 2.528 0 0 1 2.523 2.522v6.312zM15.165 18.956a2.528 2.528 0 0 1 2.523 2.522A2.528 2.528 0 0 1 15.165 24a2.527 2.527 0 0 1-2.52-2.522v-2.522h2.52zM15.165 17.688a2.527 2.527 0 0 1-2.52-2.523 2.526 2.526 0 0 1 2.52-2.52h6.313A2.527 2.527 0 0 1 24 15.165a2.528 2.528 0 0 1-2.522 2.523h-6.313z"></path></svg></a><a href="https://github.com/skypilot-org/skypilot/issues/new" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Leave Feedback" tabindex="0"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-message-square w-5 h-5"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg></a><div class="border-l border-gray-200 h-6"></div><a class="inline-flex items-center justify-center p-2 rounded-full transition-colors duration-150 cursor-pointer text-gray-600 hover:bg-gray-100" title="Configuration" tabindex="0" href="/dashboard/config"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-settings w-5 h-5"><path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path><circle cx="12" cy="12" r="3"></circle></svg></a></div></div></div></div><div class="transition-all duration-200 ease-in-out min-h-screen" style="padding-top:56px"><main class="p-6"><div class="flex justify-center items-center h-64"><style data-emotion="css z01bqi animation-61bdi0">.css-z01bqi{display:inline-block;color:#1976d2;-webkit-animation:animation-61bdi0 1.4s linear infinite;animation:animation-61bdi0 1.4s linear infinite;}@-webkit-keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}@keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}</style><span class="MuiCircularProgress-root MuiCircularProgress-indeterminate MuiCircularProgress-colorPrimary css-z01bqi" style="width:40px;height:40px" role="progressbar"><style data-emotion="css 13o7eu2">.css-13o7eu2{display:block;}</style><svg class="MuiCircularProgress-svg css-13o7eu2" viewBox="22 22 44 44"><style data-emotion="css 14891ef animation-1p2h4ri">.css-14891ef{stroke:currentColor;stroke-dasharray:80px,200px;stroke-dashoffset:0;-webkit-animation:animation-1p2h4ri 1.4s ease-in-out infinite;animation:animation-1p2h4ri 1.4s ease-in-out infinite;}@-webkit-keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}@keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}</style><circle class="MuiCircularProgress-circle MuiCircularProgress-circleIndeterminate css-14891ef" cx="44" cy="44" r="20.2" fill="none" stroke-width="3.6"></circle></svg></span><span class="ml-2 text-gray-500">Loading workspaces...</span></div></main></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"99m-BAySO8Q7J-ul1jZVL","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>Workspaces | SkyPilot Dashboard</title><link rel="preload" href="/dashboard/skypilot.svg" as="image" fetchpriority="high"/><meta name="next-head-count" content="4"/><link rel="preload" href="/dashboard/_next/static/css/667d941a2888ce6e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/667d941a2888ce6e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-65d465f948974c0d.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-cb81dc4d27f4d009.js" defer=""></script><script src="/dashboard/_next/static/chunks/614-635a84e87800f99e.js" defer=""></script><script src="/dashboard/_next/static/chunks/798-c0525dc3f21e488d.js" defer=""></script><script src="/dashboard/_next/static/chunks/121-865d2bf8a3b84c6a.js" defer=""></script><script src="/dashboard/_next/static/chunks/470-ad1e0db3afcbd9c9.js" defer=""></script><script src="/dashboard/_next/static/chunks/969-2c584e28e6b4b106.js" defer=""></script><script src="/dashboard/_next/static/chunks/293-351268365226d251.js" defer=""></script><script src="/dashboard/_next/static/chunks/856-3a32da4b84176f6d.js" defer=""></script><script src="/dashboard/_next/static/chunks/973-6d78a0814682d771.js" defer=""></script><script src="/dashboard/_next/static/chunks/236-619ed0248fb6fdd9.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-76b07aa5da91b0df.js" defer=""></script><script src="/dashboard/_next/static/1qG0HTmVilJPxQdBk0fX5/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/1qG0HTmVilJPxQdBk0fX5/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="min-h-screen bg-gray-50"><div class="fixed top-0 left-0 right-0 z-50 shadow-sm"><div class="fixed top-0 left-0 right-0 bg-white z-30 h-14 px-4 border-b border-gray-200 shadow-sm"><div class="flex items-center h-full"><div class="flex items-center space-x-4 mr-6"><a class="flex items-center px-1 pt-1 h-full" href="/dashboard"><div class="h-20 w-20 flex items-center justify-center"><img alt="SkyPilot Logo" fetchpriority="high" width="80" height="80" decoding="async" data-nimg="1" class="w-full h-full object-contain" style="color:transparent" src="/dashboard/skypilot.svg"/></div></a></div><div class="flex items-center space-x-2 md:space-x-4 mr-6"><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/clusters"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="20" height="8" x="2" y="2" rx="2" ry="2"></rect><rect width="20" height="8" x="2" y="14" rx="2" ry="2"></rect><line x1="6" x2="6.01" y1="6" y2="6"></line><line x1="6" x2="6.01" y1="18" y2="18"></line></svg><span>Clusters</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/jobs"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 20V4a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><rect width="20" height="14" x="2" y="6" rx="2"></rect></svg><span>Jobs</span></a><div class="border-l border-gray-200 h-6 mx-1"></div><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/infra"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="4" y="4" width="16" height="16" rx="2" ry="2"></rect><rect x="9" y="9" width="6" height="6"></rect><line x1="9" y1="1" x2="9" y2="4"></line><line x1="15" y1="1" x2="15" y2="4"></line><line x1="9" y1="20" x2="9" y2="23"></line><line x1="15" y1="20" x2="15" y2="23"></line><line x1="20" y1="9" x2="23" y2="9"></line><line x1="20" y1="14" x2="23" y2="14"></line><line x1="1" y1="9" x2="4" y2="9"></line><line x1="1" y1="14" x2="4" y2="14"></line></svg><span>Infra</span></a><a class="inline-flex items-center border-b-2 border-transparent text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/workspaces"><svg class="w-4 h-4" stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g><path fill="none" d="M0 0h24v24H0z"></path><path d="M3 18.5V5a3 3 0 0 1 3-3h14a1 1 0 0 1 1 1v18a1 1 0 0 1-1 1H6.5A3.5 3.5 0 0 1 3 18.5zM19 20v-3H6.5a1.5 1.5 0 0 0 0 3H19zM10 4H6a1 1 0 0 0-1 1v10.337A3.486 3.486 0 0 1 6.5 15H19V4h-2v8l-3.5-2-3.5 2V4z"></path></g></svg><span>Workspaces</span></a><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/users"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-users w-4 h-4"><path d="M16 21v-2a4 4 0 0 0-4-4H6a4 4 0 0 0-4 4v2"></path><circle cx="9" cy="7" r="4"></circle><path d="M22 21v-2a4 4 0 0 0-3-3.87"></path><path d="M16 3.13a4 4 0 0 1 0 7.75"></path></svg><span>Users</span></a></div><div class="flex items-center space-x-1 ml-auto"><a href="https://skypilot.readthedocs.io/en/latest/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center px-2 py-1 text-gray-600 hover:text-blue-600 transition-colors duration-150 cursor-pointer" title="Docs" tabindex="0"><span class="mr-1">Docs</span><svg class="w-3.5 h-3.5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path><polyline points="15 3 21 3 21 9"></polyline><line x1="10" y1="14" x2="21" y2="3"></line></svg></a><a href="https://github.com/skypilot-org/skypilot" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="GitHub" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"></path></svg></a><a href="https://slack.skypilot.co/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Slack" tabindex="0"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path transform="scale(0.85) translate(1.8, 1.8)" d="M5.042 15.165a2.528 2.528 0 0 1-2.52 2.523A2.528 2.528 0 0 1 0 15.165a2.527 2.527 0 0 1 2.522-2.52h2.52v2.52zM6.313 15.165a2.527 2.527 0 0 1 2.521-2.52 2.527 2.527 0 0 1 2.521 2.52v6.313A2.528 2.528 0 0 1 8.834 24a2.528 2.528 0 0 1-2.521-2.522v-6.313zM8.834 5.042a2.528 2.528 0 0 1-2.521-2.52A2.528 2.528 0 0 1 8.834 0a2.528 2.528 0 0 1 2.521 2.522v2.52H8.834zM8.834 6.313a2.528 2.528 0 0 1 2.521 2.521 2.528 2.528 0 0 1-2.521 2.521H2.522A2.528 2.528 0 0 1 0 8.834a2.528 2.528 0 0 1 2.522-2.521h6.312zM18.956 8.834a2.528 2.528 0 0 1 2.522-2.521A2.528 2.528 0 0 1 24 8.834a2.528 2.528 0 0 1-2.522 2.521h-2.522V8.834zM17.688 8.834a2.528 2.528 0 0 1-2.523 2.521 2.527 2.527 0 0 1-2.52-2.521V2.522A2.527 2.527 0 0 1 15.165 0a2.528 2.528 0 0 1 2.523 2.522v6.312zM15.165 18.956a2.528 2.528 0 0 1 2.523 2.522A2.528 2.528 0 0 1 15.165 24a2.527 2.527 0 0 1-2.52-2.522v-2.522h2.52zM15.165 17.688a2.527 2.527 0 0 1-2.52-2.523 2.526 2.526 0 0 1 2.52-2.52h6.313A2.527 2.527 0 0 1 24 15.165a2.528 2.528 0 0 1-2.522 2.523h-6.313z"></path></svg></a><a href="https://github.com/skypilot-org/skypilot/issues/new" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Leave Feedback" tabindex="0"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-message-square w-5 h-5"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg></a><div class="border-l border-gray-200 h-6"></div><a class="inline-flex items-center justify-center p-2 rounded-full transition-colors duration-150 cursor-pointer text-gray-600 hover:bg-gray-100" title="Configuration" tabindex="0" href="/dashboard/config"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-settings w-5 h-5"><path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path><circle cx="12" cy="12" r="3"></circle></svg></a></div></div></div></div><div class="transition-all duration-200 ease-in-out min-h-screen" style="padding-top:56px"><main class="p-6"><div class="flex justify-center items-center h-64"><style data-emotion="css z01bqi animation-61bdi0">.css-z01bqi{display:inline-block;color:#1976d2;-webkit-animation:animation-61bdi0 1.4s linear infinite;animation:animation-61bdi0 1.4s linear infinite;}@-webkit-keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}@keyframes animation-61bdi0{0%{-webkit-transform:rotate(0deg);-moz-transform:rotate(0deg);-ms-transform:rotate(0deg);transform:rotate(0deg);}100%{-webkit-transform:rotate(360deg);-moz-transform:rotate(360deg);-ms-transform:rotate(360deg);transform:rotate(360deg);}}</style><span class="MuiCircularProgress-root MuiCircularProgress-indeterminate MuiCircularProgress-colorPrimary css-z01bqi" style="width:40px;height:40px" role="progressbar"><style data-emotion="css 13o7eu2">.css-13o7eu2{display:block;}</style><svg class="MuiCircularProgress-svg css-13o7eu2" viewBox="22 22 44 44"><style data-emotion="css 14891ef animation-1p2h4ri">.css-14891ef{stroke:currentColor;stroke-dasharray:80px,200px;stroke-dashoffset:0;-webkit-animation:animation-1p2h4ri 1.4s ease-in-out infinite;animation:animation-1p2h4ri 1.4s ease-in-out infinite;}@-webkit-keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}@keyframes animation-1p2h4ri{0%{stroke-dasharray:1px,200px;stroke-dashoffset:0;}50%{stroke-dasharray:100px,200px;stroke-dashoffset:-15px;}100%{stroke-dasharray:100px,200px;stroke-dashoffset:-125px;}}</style><circle class="MuiCircularProgress-circle MuiCircularProgress-circleIndeterminate css-14891ef" cx="44" cy="44" r="20.2" fill="none" stroke-width="3.6"></circle></svg></span><span class="ml-2 text-gray-500">Loading workspaces...</span></div></main></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"1qG0HTmVilJPxQdBk0fX5","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/jobs/scheduler.py
CHANGED
@@ -191,8 +191,8 @@ def maybe_schedule_next_jobs() -> None:
|
|
191
191
|
pass
|
192
192
|
|
193
193
|
|
194
|
-
def submit_job(job_id: int, dag_yaml_path: str,
|
195
|
-
priority: int) -> None:
|
194
|
+
def submit_job(job_id: int, dag_yaml_path: str, original_user_yaml_path: str,
|
195
|
+
env_file_path: str, priority: int) -> None:
|
196
196
|
"""Submit an existing job to the scheduler.
|
197
197
|
|
198
198
|
This should be called after a job is created in the `spot` table as
|
@@ -203,7 +203,8 @@ def submit_job(job_id: int, dag_yaml_path: str, env_file_path: str,
|
|
203
203
|
The user hash should be set (e.g. via SKYPILOT_USER_ID) before calling this.
|
204
204
|
"""
|
205
205
|
with filelock.FileLock(_get_lock_path()):
|
206
|
-
state.scheduler_set_waiting(job_id, dag_yaml_path,
|
206
|
+
state.scheduler_set_waiting(job_id, dag_yaml_path,
|
207
|
+
original_user_yaml_path, env_file_path,
|
207
208
|
common_utils.get_user_hash(), priority)
|
208
209
|
maybe_schedule_next_jobs()
|
209
210
|
|
@@ -312,6 +313,9 @@ if __name__ == '__main__':
|
|
312
313
|
parser.add_argument('dag_yaml',
|
313
314
|
type=str,
|
314
315
|
help='The path to the user job yaml file.')
|
316
|
+
parser.add_argument('--user-yaml-path',
|
317
|
+
type=str,
|
318
|
+
help='The path to the original user job yaml file.')
|
315
319
|
parser.add_argument('--job-id',
|
316
320
|
required=True,
|
317
321
|
type=int,
|
@@ -325,4 +329,5 @@ if __name__ == '__main__':
|
|
325
329
|
default=500,
|
326
330
|
help='Job priority (0-1000, lower is higher). Default: 500.')
|
327
331
|
args = parser.parse_args()
|
328
|
-
submit_job(args.job_id, args.dag_yaml, args.
|
332
|
+
submit_job(args.job_id, args.dag_yaml, args.user_yaml_path, args.env_file,
|
333
|
+
args.priority)
|
sky/jobs/server/core.py
CHANGED
@@ -88,6 +88,9 @@ def launch(
|
|
88
88
|
raise ValueError('Only single-task or chain DAG is '
|
89
89
|
f'allowed for job_launch. Dag: {dag}')
|
90
90
|
dag.validate()
|
91
|
+
|
92
|
+
user_dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
|
93
|
+
|
91
94
|
dag_utils.maybe_infer_and_fill_dag_and_task_names(dag)
|
92
95
|
|
93
96
|
task_names = set()
|
@@ -175,12 +178,20 @@ def launch(
|
|
175
178
|
controller_utils.translate_local_file_mounts_to_two_hop(
|
176
179
|
task_))
|
177
180
|
|
181
|
+
# Has to use `\` to avoid yapf issue.
|
178
182
|
with tempfile.NamedTemporaryFile(prefix=f'managed-dag-{dag.name}-',
|
179
|
-
mode='w') as f
|
183
|
+
mode='w') as f, \
|
184
|
+
tempfile.NamedTemporaryFile(prefix=f'managed-user-dag-{dag.name}-',
|
185
|
+
mode='w') as original_user_yaml_path:
|
186
|
+
original_user_yaml_path.write(user_dag_str)
|
187
|
+
original_user_yaml_path.flush()
|
188
|
+
|
180
189
|
dag_utils.dump_chain_dag_to_yaml(dag, f.name)
|
181
190
|
controller = controller_utils.Controllers.JOBS_CONTROLLER
|
182
191
|
controller_name = controller.value.cluster_name
|
183
192
|
prefix = managed_job_constants.JOBS_TASK_YAML_PREFIX
|
193
|
+
remote_original_user_yaml_path = (
|
194
|
+
f'{prefix}/{dag.name}-{dag_uuid}.original_user_yaml')
|
184
195
|
remote_user_yaml_path = f'{prefix}/{dag.name}-{dag_uuid}.yaml'
|
185
196
|
remote_user_config_path = f'{prefix}/{dag.name}-{dag_uuid}.config_yaml'
|
186
197
|
remote_env_file_path = f'{prefix}/{dag.name}-{dag_uuid}.env'
|
@@ -189,6 +200,8 @@ def launch(
|
|
189
200
|
task_resources=sum([list(t.resources) for t in dag.tasks], []))
|
190
201
|
|
191
202
|
vars_to_fill = {
|
203
|
+
'remote_original_user_yaml_path': remote_original_user_yaml_path,
|
204
|
+
'original_user_dag_path': original_user_yaml_path.name,
|
192
205
|
'remote_user_yaml_path': remote_user_yaml_path,
|
193
206
|
'user_yaml_path': f.name,
|
194
207
|
'local_to_controller_file_mounts': local_to_controller_file_mounts,
|
sky/jobs/state.py
CHANGED
@@ -122,7 +122,8 @@ def create_table(cursor, conn):
|
|
122
122
|
user_hash TEXT,
|
123
123
|
workspace TEXT DEFAULT NULL,
|
124
124
|
priority INTEGER DEFAULT 500,
|
125
|
-
entrypoint TEXT DEFAULT NULL
|
125
|
+
entrypoint TEXT DEFAULT NULL,
|
126
|
+
original_user_yaml_path TEXT DEFAULT NULL)""")
|
126
127
|
|
127
128
|
db_utils.add_column_to_table(cursor, conn, 'job_info', 'schedule_state',
|
128
129
|
'TEXT')
|
@@ -153,6 +154,8 @@ def create_table(cursor, conn):
|
|
153
154
|
value_to_replace_existing_entries=500)
|
154
155
|
|
155
156
|
db_utils.add_column_to_table(cursor, conn, 'job_info', 'entrypoint', 'TEXT')
|
157
|
+
db_utils.add_column_to_table(cursor, conn, 'job_info',
|
158
|
+
'original_user_yaml_path', 'TEXT')
|
156
159
|
conn.commit()
|
157
160
|
|
158
161
|
|
@@ -212,6 +215,7 @@ columns = [
|
|
212
215
|
'workspace',
|
213
216
|
'priority',
|
214
217
|
'entrypoint',
|
218
|
+
'original_user_yaml_path',
|
215
219
|
]
|
216
220
|
|
217
221
|
|
@@ -1013,19 +1017,16 @@ def get_managed_jobs(job_id: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
1013
1017
|
if job_dict['job_name'] is None:
|
1014
1018
|
job_dict['job_name'] = job_dict['task_name']
|
1015
1019
|
|
1016
|
-
# Add YAML content
|
1017
|
-
|
1018
|
-
if
|
1020
|
+
# Add user YAML content for managed jobs.
|
1021
|
+
yaml_path = job_dict.get('original_user_yaml_path')
|
1022
|
+
if yaml_path:
|
1019
1023
|
try:
|
1020
|
-
with open(
|
1021
|
-
job_dict['
|
1024
|
+
with open(yaml_path, 'r', encoding='utf-8') as f:
|
1025
|
+
job_dict['user_yaml'] = f.read()
|
1022
1026
|
except (FileNotFoundError, IOError, OSError):
|
1023
|
-
job_dict['
|
1024
|
-
|
1025
|
-
# Generate a command that could be used to launch this job
|
1026
|
-
# Format: sky jobs launch <yaml_path>
|
1027
|
+
job_dict['user_yaml'] = None
|
1027
1028
|
else:
|
1028
|
-
job_dict['
|
1029
|
+
job_dict['user_yaml'] = None
|
1029
1030
|
|
1030
1031
|
jobs.append(job_dict)
|
1031
1032
|
return jobs
|
@@ -1083,18 +1084,20 @@ def get_local_log_file(job_id: int, task_id: Optional[int]) -> Optional[str]:
|
|
1083
1084
|
# scheduler lock to work correctly.
|
1084
1085
|
|
1085
1086
|
|
1086
|
-
def scheduler_set_waiting(job_id: int, dag_yaml_path: str,
|
1087
|
+
def scheduler_set_waiting(job_id: int, dag_yaml_path: str,
|
1088
|
+
original_user_yaml_path: str, env_file_path: str,
|
1087
1089
|
user_hash: str, priority: int) -> None:
|
1088
1090
|
"""Do not call without holding the scheduler lock."""
|
1089
1091
|
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
1090
1092
|
updated_count = cursor.execute(
|
1091
1093
|
'UPDATE job_info SET '
|
1092
|
-
'schedule_state = (?), dag_yaml_path = (?),
|
1094
|
+
'schedule_state = (?), dag_yaml_path = (?), '
|
1095
|
+
'original_user_yaml_path = (?), env_file_path = (?), '
|
1093
1096
|
' user_hash = (?), priority = (?) '
|
1094
1097
|
'WHERE spot_job_id = (?) AND schedule_state = (?)',
|
1095
1098
|
(ManagedJobScheduleState.WAITING.value, dag_yaml_path,
|
1096
|
-
env_file_path, user_hash, priority,
|
1097
|
-
ManagedJobScheduleState.INACTIVE.value)).rowcount
|
1099
|
+
original_user_yaml_path, env_file_path, user_hash, priority,
|
1100
|
+
job_id, ManagedJobScheduleState.INACTIVE.value)).rowcount
|
1098
1101
|
assert updated_count == 1, (job_id, updated_count)
|
1099
1102
|
|
1100
1103
|
|
@@ -2342,6 +2342,7 @@ def get_endpoint_debug_message() -> str:
|
|
2342
2342
|
def combine_pod_config_fields(
|
2343
2343
|
cluster_yaml_path: str,
|
2344
2344
|
cluster_config_overrides: Dict[str, Any],
|
2345
|
+
cloud: Optional[clouds.Cloud] = None,
|
2345
2346
|
) -> None:
|
2346
2347
|
"""Adds or updates fields in the YAML with fields from the
|
2347
2348
|
~/.sky/config.yaml's kubernetes.pod_spec dict.
|
@@ -2386,11 +2387,17 @@ def combine_pod_config_fields(
|
|
2386
2387
|
yaml_obj = yaml.safe_load(yaml_content)
|
2387
2388
|
# We don't use override_configs in `skypilot_config.get_nested`, as merging
|
2388
2389
|
# the pod config requires special handling.
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2392
|
-
|
2393
|
-
'
|
2390
|
+
if isinstance(cloud, clouds.SSH):
|
2391
|
+
kubernetes_config = skypilot_config.get_nested(('ssh', 'pod_config'),
|
2392
|
+
default_value={},
|
2393
|
+
override_configs={})
|
2394
|
+
override_pod_config = (cluster_config_overrides.get('ssh', {}).get(
|
2395
|
+
'pod_config', {}))
|
2396
|
+
else:
|
2397
|
+
kubernetes_config = skypilot_config.get_nested(
|
2398
|
+
('kubernetes', 'pod_config'), default_value={}, override_configs={})
|
2399
|
+
override_pod_config = (cluster_config_overrides.get(
|
2400
|
+
'kubernetes', {}).get('pod_config', {}))
|
2394
2401
|
config_utils.merge_k8s_configs(kubernetes_config, override_pod_config)
|
2395
2402
|
|
2396
2403
|
# Merge the kubernetes config into the YAML for both head and worker nodes.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
"""Constants used by the Nebius provisioner."""
|
2
|
+
|
3
|
+
VERSION = 'v1'
|
4
|
+
|
5
|
+
# InfiniBand-capable instance platforms
|
6
|
+
INFINIBAND_INSTANCE_PLATFORMS = [
|
7
|
+
'gpu-h100-sxm',
|
8
|
+
'gpu-h200-sxm',
|
9
|
+
]
|
10
|
+
|
11
|
+
# InfiniBand environment variables for NCCL and UCX
|
12
|
+
INFINIBAND_ENV_VARS = {
|
13
|
+
'NCCL_IB_HCA': 'mlx5',
|
14
|
+
'UCX_NET_DEVICES': ('mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,'
|
15
|
+
'mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1')
|
16
|
+
}
|
17
|
+
|
18
|
+
# Docker run options for InfiniBand support
|
19
|
+
INFINIBAND_DOCKER_OPTIONS = ['--device=/dev/infiniband', '--cap-add=IPC_LOCK']
|
20
|
+
|
21
|
+
# InfiniBand fabric mapping by platform and region
|
22
|
+
# Based on Nebius documentation
|
23
|
+
INFINIBAND_FABRIC_MAPPING = {
|
24
|
+
# H100 platforms
|
25
|
+
('gpu-h100-sxm', 'eu-north1'): [
|
26
|
+
'fabric-2', 'fabric-3', 'fabric-4', 'fabric-6'
|
27
|
+
],
|
28
|
+
|
29
|
+
# H200 platforms
|
30
|
+
('gpu-h200-sxm', 'eu-north1'): ['fabric-7'],
|
31
|
+
('gpu-h200-sxm', 'eu-west1'): ['fabric-5'],
|
32
|
+
('gpu-h200-sxm', 'us-central1'): ['us-central1-a'],
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
def get_default_fabric(platform: str, region: str) -> str:
|
37
|
+
"""Get the default (first) fabric for a given platform and region."""
|
38
|
+
fabrics = INFINIBAND_FABRIC_MAPPING.get((platform, region), [])
|
39
|
+
if not fabrics:
|
40
|
+
# Select north europe region as default
|
41
|
+
fabrics = INFINIBAND_FABRIC_MAPPING.get(('gpu-h100-sxm', 'eu-north1'),
|
42
|
+
[])
|
43
|
+
if not fabrics:
|
44
|
+
raise ValueError(
|
45
|
+
f'No InfiniBand fabric available for platform {platform} '
|
46
|
+
f'in region {region}')
|
47
|
+
return fabrics[0]
|
sky/provision/nebius/instance.py
CHANGED
@@ -124,6 +124,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
124
124
|
node_type = 'head' if head_instance_id is None else 'worker'
|
125
125
|
try:
|
126
126
|
platform, preset = config.node_config['InstanceType'].split('_')
|
127
|
+
|
127
128
|
instance_id = utils.launch(
|
128
129
|
cluster_name_on_cloud=cluster_name_on_cloud,
|
129
130
|
node_type=node_type,
|
@@ -136,7 +137,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
136
137
|
associate_public_ip_address=(
|
137
138
|
not config.provider_config['use_internal_ips']),
|
138
139
|
filesystems=config.node_config.get('filesystems', []),
|
139
|
-
|
140
|
+
network_tier=config.node_config.get('network_tier'))
|
140
141
|
except Exception as e: # pylint: disable=broad-except
|
141
142
|
logger.warning(f'run_instances error: {e}')
|
142
143
|
raise
|
sky/provision/nebius/utils.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
"""Nebius library wrapper for SkyPilot."""
|
2
2
|
import time
|
3
|
-
from typing import Any, Dict, List
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
4
|
import uuid
|
5
5
|
|
6
6
|
from sky import sky_logging
|
7
7
|
from sky import skypilot_config
|
8
8
|
from sky.adaptors import nebius
|
9
|
+
from sky.provision.nebius import constants as nebius_constants
|
9
10
|
from sky.utils import common_utils
|
11
|
+
from sky.utils import resources_utils
|
10
12
|
|
11
13
|
logger = sky_logging.init_logger(__name__)
|
12
14
|
|
@@ -156,10 +158,17 @@ def start(instance_id: str) -> None:
|
|
156
158
|
f' to be ready.')
|
157
159
|
|
158
160
|
|
159
|
-
def launch(cluster_name_on_cloud: str,
|
160
|
-
|
161
|
-
|
162
|
-
|
161
|
+
def launch(cluster_name_on_cloud: str,
|
162
|
+
node_type: str,
|
163
|
+
platform: str,
|
164
|
+
preset: str,
|
165
|
+
region: str,
|
166
|
+
image_family: str,
|
167
|
+
disk_size: int,
|
168
|
+
user_data: str,
|
169
|
+
associate_public_ip_address: bool,
|
170
|
+
filesystems: List[Dict[str, Any]],
|
171
|
+
network_tier: Optional[resources_utils.NetworkTier] = None) -> str:
|
163
172
|
# Each node must have a unique name to avoid conflicts between
|
164
173
|
# multiple worker VMs. To ensure uniqueness,a UUID is appended
|
165
174
|
# to the node name.
|
@@ -173,11 +182,23 @@ def launch(cluster_name_on_cloud: str, node_type: str, platform: str,
|
|
173
182
|
# 8 GPU virtual machines can be grouped into a GPU cluster.
|
174
183
|
# The GPU clusters are built with InfiniBand secure high-speed networking.
|
175
184
|
# https://docs.nebius.com/compute/clusters/gpu
|
176
|
-
if platform in
|
185
|
+
if platform in nebius_constants.INFINIBAND_INSTANCE_PLATFORMS:
|
177
186
|
if preset == '8gpu-128vcpu-1600gb':
|
178
|
-
# Check is there fabric in config
|
179
187
|
fabric = skypilot_config.get_nested(('nebius', region, 'fabric'),
|
180
188
|
None)
|
189
|
+
|
190
|
+
# Auto-select fabric if network_tier=best and no fabric configured
|
191
|
+
if (fabric is None and
|
192
|
+
str(network_tier) == str(resources_utils.NetworkTier.BEST)):
|
193
|
+
try:
|
194
|
+
fabric = nebius_constants.get_default_fabric(
|
195
|
+
platform, region)
|
196
|
+
logger.info(f'Auto-selected InfiniBand fabric {fabric} '
|
197
|
+
f'for {platform} in {region}')
|
198
|
+
except ValueError as e:
|
199
|
+
logger.warning(
|
200
|
+
f'InfiniBand fabric auto-selection failed: {e}')
|
201
|
+
|
181
202
|
if fabric is None:
|
182
203
|
logger.warning(
|
183
204
|
f'Set up fabric for region {region} in ~/.sky/config.yaml '
|
sky/skylet/constants.py
CHANGED
@@ -367,6 +367,7 @@ RCLONE_CACHE_REFRESH_INTERVAL = 10
|
|
367
367
|
OVERRIDEABLE_CONFIG_KEYS_IN_TASK: List[Tuple[str, ...]] = [
|
368
368
|
('docker', 'run_options'),
|
369
369
|
('nvidia_gpus', 'disable_ecc'),
|
370
|
+
('ssh', 'pod_config'),
|
370
371
|
('kubernetes', 'pod_config'),
|
371
372
|
('kubernetes', 'provision_timeout'),
|
372
373
|
('gcp', 'managed_instance_group'),
|
sky/skypilot_config.py
CHANGED
@@ -167,7 +167,10 @@ def _get_loaded_config_path() -> List[Optional[str]]:
|
|
167
167
|
serialized = _get_config_context().config_path
|
168
168
|
if not serialized:
|
169
169
|
return []
|
170
|
-
|
170
|
+
config_paths = json.loads(serialized)
|
171
|
+
if config_paths is None:
|
172
|
+
return []
|
173
|
+
return config_paths
|
171
174
|
|
172
175
|
|
173
176
|
def _set_loaded_config_path(
|
@@ -3,6 +3,7 @@
|
|
3
3
|
name: {{dag_name}}
|
4
4
|
|
5
5
|
file_mounts:
|
6
|
+
{{remote_original_user_yaml_path}}: {{original_user_dag_path}}
|
6
7
|
{{remote_user_yaml_path}}: {{user_yaml_path}}
|
7
8
|
{%- if local_user_config_path is not none %}
|
8
9
|
{{remote_user_config_path}}: {{local_user_config_path}}
|
@@ -28,7 +29,7 @@ setup: |
|
|
28
29
|
grep -q 'export SKYPILOT_DEV=' ~/.bashrc || echo 'export SKYPILOT_DEV=1' >> ~/.bashrc
|
29
30
|
grep -q 'alias sky-env=' ~/.bashrc || echo 'alias sky-env="{{ sky_activate_python_env }}"' >> ~/.bashrc
|
30
31
|
{% endif %}
|
31
|
-
|
32
|
+
|
32
33
|
# Create systemd service file
|
33
34
|
mkdir -p ~/.config/systemd/user/
|
34
35
|
|
@@ -65,6 +66,7 @@ run: |
|
|
65
66
|
# CloudVmRayBackend._exec_code_on_head() calls
|
66
67
|
# managed_job_codegen.set_pending() before we get here.
|
67
68
|
python -u -m sky.jobs.scheduler {{remote_user_yaml_path}} \
|
69
|
+
--user-yaml-path {{remote_original_user_yaml_path}} \
|
68
70
|
--job-id $SKYPILOT_INTERNAL_JOB_ID \
|
69
71
|
--env-file {{remote_env_file_path}} \
|
70
72
|
--priority {{priority}}
|
sky/templates/nebius-ray.yml.j2
CHANGED
@@ -46,6 +46,7 @@ available_node_types:
|
|
46
46
|
InstanceType: {{instance_type}}
|
47
47
|
ImageId: {{image_id}}
|
48
48
|
DiskSize: {{disk_size}}
|
49
|
+
network_tier: {{network_tier}}
|
49
50
|
filesystems:
|
50
51
|
{%- for fs in filesystems %}
|
51
52
|
- filesystem_id: {{ fs.filesystem_id }}
|
@@ -152,6 +153,11 @@ setup_commands:
|
|
152
153
|
mkdir -p ~/.ssh; touch ~/.ssh/config;
|
153
154
|
{{ conda_installation_commands }}
|
154
155
|
{{ ray_skypilot_installation_commands }}
|
156
|
+
{%- if env_vars is defined %}
|
157
|
+
{%- for env_var, env_value in env_vars.items() %}
|
158
|
+
echo '{{env_var}}={{env_value}}' | sudo tee -a /etc/environment;
|
159
|
+
{%- endfor %}
|
160
|
+
{%- endif %}
|
155
161
|
sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
|
156
162
|
sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
|
157
163
|
mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
|
@@ -723,14 +723,16 @@ def main():
|
|
723
723
|
# Do not support changing anything besides hosts for now
|
724
724
|
if history is not None:
|
725
725
|
for key in ['user', 'identity_file', 'password']:
|
726
|
-
if
|
726
|
+
if not args.cleanup and history.get(
|
727
|
+
key) != cluster_config.get(key):
|
727
728
|
raise ValueError(
|
728
729
|
f'Cluster configuration has changed for field {key!r}. '
|
729
730
|
f'Previous value: {history.get(key)}, '
|
730
731
|
f'Current value: {cluster_config.get(key)}')
|
731
732
|
history_hosts_info = prepare_hosts_info(
|
732
733
|
cluster_name, history)
|
733
|
-
if history_hosts_info[0] != hosts_info[
|
734
|
+
if not args.cleanup and history_hosts_info[0] != hosts_info[
|
735
|
+
0]:
|
734
736
|
raise ValueError(
|
735
737
|
f'Cluster configuration has changed for master node. '
|
736
738
|
f'Previous value: {history_hosts_info[0]}, '
|
@@ -860,7 +862,7 @@ def deploy_cluster(head_node,
|
|
860
862
|
use_ssh_config=head_use_ssh_config,
|
861
863
|
# For SkySSHUpLineProcessor
|
862
864
|
print_output=True)
|
863
|
-
if result is None:
|
865
|
+
if not cleanup and result is None:
|
864
866
|
with ux_utils.print_exception_no_traceback():
|
865
867
|
raise RuntimeError(
|
866
868
|
f'Failed to SSH to head node ({head_node}). '
|
sky/utils/resources_utils.py
CHANGED
@@ -63,7 +63,9 @@ class NetworkTier(enum.Enum):
|
|
63
63
|
def cli_help_message(cls) -> str:
|
64
64
|
return (
|
65
65
|
f'Network tier. Could be one of {", ".join(cls.supported_tiers())}'
|
66
|
-
f'.
|
66
|
+
f'. If {cls.BEST.value} is specified, use the best network tier '
|
67
|
+
'available on the specified instance. '
|
68
|
+
f'Default: {cls.STANDARD.value}')
|
67
69
|
|
68
70
|
@classmethod
|
69
71
|
def from_str(cls, tier: str) -> 'NetworkTier':
|