skypilot-nightly 1.0.0.dev20250424__py3-none-any.whl → 1.0.0.dev20250426__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/aws.py +57 -4
- sky/adaptors/common.py +2 -1
- sky/adaptors/kubernetes.py +14 -9
- sky/cli.py +1 -1
- sky/client/cli.py +1 -1
- sky/client/sdk.py +7 -4
- sky/clouds/aws.py +3 -4
- sky/clouds/gcp.py +28 -12
- sky/clouds/runpod.py +14 -5
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +3 -2
- sky/clouds/service_catalog/runpod_catalog.py +0 -10
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/data/storage.py +4 -4
- sky/exceptions.py +5 -0
- sky/jobs/server/core.py +31 -3
- sky/provision/aws/config.py +30 -11
- sky/provision/aws/instance.py +24 -10
- sky/provision/runpod/instance.py +17 -14
- sky/provision/runpod/utils.py +4 -2
- sky/server/common.py +25 -4
- sky/templates/runpod-ray.yml.j2 +1 -0
- {skypilot_nightly-1.0.0.dev20250424.dist-info → skypilot_nightly-1.0.0.dev20250426.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250424.dist-info → skypilot_nightly-1.0.0.dev20250426.dist-info}/RECORD +36 -36
- /sky/dashboard/out/_next/static/{MqmRhu1oPMgLy6v25hibm → WO8lTFPfj-lO3_gDGEiN8}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{MqmRhu1oPMgLy6v25hibm → WO8lTFPfj-lO3_gDGEiN8}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250424.dist-info → skypilot_nightly-1.0.0.dev20250426.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250424.dist-info → skypilot_nightly-1.0.0.dev20250426.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250424.dist-info → skypilot_nightly-1.0.0.dev20250426.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250424.dist-info → skypilot_nightly-1.0.0.dev20250426.dist-info}/top_level.txt +0 -0
sky/dashboard/out/jobs.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="preload" href="/dashboard/skypilot.svg" as="image" fetchpriority="high"/><meta name="next-head-count" content="3"/><link rel="preload" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-830f59b8404e96b8.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js" defer=""></script><script src="/dashboard/_next/static/chunks/678-206dddca808e6d16.js" defer=""></script><script src="/dashboard/_next/static/chunks/312-c3c8845990db8ffc.js" defer=""></script><script src="/dashboard/_next/static/chunks/979-7bf73a4c7cea0f5c.js" defer=""></script><script src="/dashboard/_next/static/chunks/845-9e60713e0c441abc.js" defer=""></script><script src="/dashboard/_next/static/chunks/236-2db3ee3fba33dd9e.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js" defer=""></script><script src="/dashboard/_next/static/MqmRhu1oPMgLy6v25hibm/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/MqmRhu1oPMgLy6v25hibm/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="min-h-screen bg-gray-50"><div class="fixed top-0 left-0 right-0 z-50 shadow-sm"><div class="fixed top-0 left-0 right-0 bg-white z-30 h-14 px-4 border-b border-gray-200 shadow-sm"><div class="flex items-center h-full"><div class="flex items-center space-x-4 mr-6"><a class="flex items-center px-1 pt-1 h-full" href="/dashboard"><div class="h-20 w-20 flex items-center justify-center"><img alt="SkyPilot Logo" fetchpriority="high" width="80" height="80" decoding="async" data-nimg="1" class="w-full h-full object-contain" style="color:transparent" src="/dashboard/skypilot.svg"/></div></a></div><div class="flex items-center space-x-2 md:space-x-6 mr-6"><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/clusters"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="20" height="8" x="2" y="2" rx="2" ry="2"></rect><rect width="20" height="8" x="2" y="14" rx="2" ry="2"></rect><line x1="6" x2="6.01" y1="6" y2="6"></line><line x1="6" x2="6.01" y1="18" y2="18"></line></svg><span>Clusters</span></a><a class="inline-flex items-center border-b-2 border-transparent text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/jobs"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 20V4a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><rect width="20" height="14" x="2" y="6" rx="2"></rect></svg><span>Jobs</span></a><div class="inline-flex items-center px-1 pt-1 text-gray-400"><svg class="w-4 h-4" viewBox="0 0 423.683 423.683" width="24" height="24" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" fill="currentColor" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><g><path d="M54.376,287.577h310.459c26.48,0,48.02-13.979,48.02-40.453c0-17.916-10.001-34.07-25.559-42.292 c-19.021-72.951-86.061-125.196-162.002-125.223v-3.431h-3.854V61.814h3.854v-9.569h-31.38v9.569h3.854v14.363h-3.854v3.431 c-75.941,0.026-142.97,52.272-161.988,125.217c-15.56,8.216-25.573,24.376-25.573,42.291 C6.36,273.597,27.896,287.577,54.376,287.577z M47.676,227.145l7.214-2.424l1.617-7.447 c13.884-64.232,71.707-110.862,137.467-110.862h31.274c65.763,0,123.582,46.63,137.473,110.862l1.607,7.447l7.223,2.424 c8.678,2.92,14.506,10.946,14.506,19.979c0,11.703-9.517,13.647-21.221,13.647H54.376c-11.7,0-21.22-1.944-21.22-13.647 C33.162,238.091,38.984,230.065,47.676,227.145z M423.683,334.602v36.836H0v-36.836h25.348v-18.418h372.99v18.418H423.683z"></path></g></g></svg><span class="ml-2">Services</span><span class="text-xs ml-2 px-1.5 py-0.5 bg-gray-100 text-gray-500 rounded">Soon</span></div></div><div class="flex items-center space-x-1 ml-auto"><a href="https://skypilot.readthedocs.io/en/latest/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center px-2 py-1 text-gray-600 hover:text-blue-600 transition-colors duration-150 cursor-pointer" title="Docs"><span class="mr-1">Docs</span><svg class="w-3.5 h-3.5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path><polyline points="15 3 21 3 21 9"></polyline><line x1="10" y1="14" x2="21" y2="3"></line></svg></a><div class="border-l border-gray-200 h-6 mx-1"></div><a href="https://github.com/skypilot-org/skypilot" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="GitHub"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"></path></svg></a><a href="https://slack.skypilot.co/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Slack"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path transform="scale(0.85) translate(1.8, 1.8)" d="M5.042 15.165a2.528 2.528 0 0 1-2.52 2.523A2.528 2.528 0 0 1 0 15.165a2.527 2.527 0 0 1 2.522-2.52h2.52v2.52zM6.313 15.165a2.527 2.527 0 0 1 2.521-2.52 2.527 2.527 0 0 1 2.521 2.52v6.313A2.528 2.528 0 0 1 8.834 24a2.528 2.528 0 0 1-2.521-2.522v-6.313zM8.834 5.042a2.528 2.528 0 0 1-2.521-2.52A2.528 2.528 0 0 1 8.834 0a2.528 2.528 0 0 1 2.521 2.522v2.52H8.834zM8.834 6.313a2.528 2.528 0 0 1 2.521 2.521 2.528 2.528 0 0 1-2.521 2.521H2.522A2.528 2.528 0 0 1 0 8.834a2.528 2.528 0 0 1 2.522-2.521h6.312zM18.956 8.834a2.528 2.528 0 0 1 2.522-2.521A2.528 2.528 0 0 1 24 8.834a2.528 2.528 0 0 1-2.522 2.521h-2.522V8.834zM17.688 8.834a2.528 2.528 0 0 1-2.523 2.521 2.527 2.527 0 0 1-2.52-2.521V2.522A2.527 2.527 0 0 1 15.165 0a2.528 2.528 0 0 1 2.523 2.522v6.312zM15.165 18.956a2.528 2.528 0 0 1 2.523 2.522A2.528 2.528 0 0 1 15.165 24a2.527 2.527 0 0 1-2.52-2.522v-2.522h2.52zM15.165 17.688a2.527 2.527 0 0 1-2.52-2.523 2.526 2.526 0 0 1 2.52-2.52h6.313A2.527 2.527 0 0 1 24 15.165a2.528 2.528 0 0 1-2.522 2.523h-6.313z"></path></svg></a><a href="https://github.com/skypilot-org/skypilot/issues/new" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Leave Feedback"><svg class="w-5 h-5" stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g><path fill="none" d="M0 0h24v24H0z"></path><path d="M6.455 19L2 22.5V4a1 1 0 0 1 1-1h18a1 1 0 0 1 1 1v14a1 1 0 0 1-1 1H6.455zM4 18.385L5.763 17H20V5H4v13.385zM11 13h2v2h-2v-2zm0-6h2v5h-2V7z"></path></g></svg></a></div></div></div></div><div class="transition-all duration-200 ease-in-out min-h-screen" style="padding-top:56px"><main class="p-6"><div class="flex items-center justify-between mb-4 h-5"><div class="text-base"><a class="text-sky-blue hover:underline leading-none" href="/dashboard/jobs">Managed Jobs</a></div><div class="flex items-center space-x-2"><button class="inline-flex items-center justify-center whitespace-nowrap text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 hover:bg-accent h-9 rounded-md px-3 text-sky-blue hover:text-sky-blue-bright" title="Refresh"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-rotate-cw h-4 w-4 mr-1.5"><path d="M21 12a9 9 0 1 1-9-9c2.52 0 4.93 1 6.74 2.74L21 8"></path><path d="M21 3v5h-5"></path></svg><span>Refresh</span></button></div></div><div class="relative"><div class="flex flex-col space-y-1 mb-1"><div class="flex flex-wrap items-center text-sm mb-1"><span class="mr-2 text-sm font-medium">Statuses:</span><div class="flex flex-wrap gap-2 items-center"></div></div></div><div class="rounded-lg border bg-card text-card-foreground shadow-sm"><div class="relative w-full overflow-auto"><table class="w-full caption-bottom text-base"><thead class="[&_tr]:border-b"><tr class="border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted"><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">ID</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Name</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">User</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Submitted</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Duration</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Status</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Resources</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Cluster</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Region</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Recoveries</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0">Details</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0">Logs</th></tr></thead><tbody class="[&_tr:last-child]:border-0"><tr class="border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted"><td class="p-4 align-middle [&:has([role=checkbox])]:pr-0 text-center py-6" colSpan="12"><div class="flex flex-col items-center space-y-4"><p class="text-gray-500">No active jobs</p></div></td></tr></tbody></table></div></div></div></main></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"MqmRhu1oPMgLy6v25hibm","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="preload" href="/dashboard/skypilot.svg" as="image" fetchpriority="high"/><meta name="next-head-count" content="3"/><link rel="preload" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/c6933bbb2ce7f4dd.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-830f59b8404e96b8.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-87d061ee6ed71b28.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-e0e2335212e72357.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js" defer=""></script><script src="/dashboard/_next/static/chunks/678-206dddca808e6d16.js" defer=""></script><script src="/dashboard/_next/static/chunks/312-c3c8845990db8ffc.js" defer=""></script><script src="/dashboard/_next/static/chunks/979-7bf73a4c7cea0f5c.js" defer=""></script><script src="/dashboard/_next/static/chunks/845-9e60713e0c441abc.js" defer=""></script><script src="/dashboard/_next/static/chunks/236-2db3ee3fba33dd9e.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js" defer=""></script><script src="/dashboard/_next/static/WO8lTFPfj-lO3_gDGEiN8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/WO8lTFPfj-lO3_gDGEiN8/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="min-h-screen bg-gray-50"><div class="fixed top-0 left-0 right-0 z-50 shadow-sm"><div class="fixed top-0 left-0 right-0 bg-white z-30 h-14 px-4 border-b border-gray-200 shadow-sm"><div class="flex items-center h-full"><div class="flex items-center space-x-4 mr-6"><a class="flex items-center px-1 pt-1 h-full" href="/dashboard"><div class="h-20 w-20 flex items-center justify-center"><img alt="SkyPilot Logo" fetchpriority="high" width="80" height="80" decoding="async" data-nimg="1" class="w-full h-full object-contain" style="color:transparent" src="/dashboard/skypilot.svg"/></div></a></div><div class="flex items-center space-x-2 md:space-x-6 mr-6"><a class="inline-flex items-center border-b-2 border-transparent hover:text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/clusters"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect width="20" height="8" x="2" y="2" rx="2" ry="2"></rect><rect width="20" height="8" x="2" y="14" rx="2" ry="2"></rect><line x1="6" x2="6.01" y1="6" y2="6"></line><line x1="6" x2="6.01" y1="18" y2="18"></line></svg><span>Clusters</span></a><a class="inline-flex items-center border-b-2 border-transparent text-blue-600 px-1 pt-1 space-x-2" href="/dashboard/jobs"><svg class="w-4 h-4" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M16 20V4a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><rect width="20" height="14" x="2" y="6" rx="2"></rect></svg><span>Jobs</span></a><div class="inline-flex items-center px-1 pt-1 text-gray-400"><svg class="w-4 h-4" viewBox="0 0 423.683 423.683" width="24" height="24" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" fill="currentColor" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><g><path d="M54.376,287.577h310.459c26.48,0,48.02-13.979,48.02-40.453c0-17.916-10.001-34.07-25.559-42.292 c-19.021-72.951-86.061-125.196-162.002-125.223v-3.431h-3.854V61.814h3.854v-9.569h-31.38v9.569h3.854v14.363h-3.854v3.431 c-75.941,0.026-142.97,52.272-161.988,125.217c-15.56,8.216-25.573,24.376-25.573,42.291 C6.36,273.597,27.896,287.577,54.376,287.577z M47.676,227.145l7.214-2.424l1.617-7.447 c13.884-64.232,71.707-110.862,137.467-110.862h31.274c65.763,0,123.582,46.63,137.473,110.862l1.607,7.447l7.223,2.424 c8.678,2.92,14.506,10.946,14.506,19.979c0,11.703-9.517,13.647-21.221,13.647H54.376c-11.7,0-21.22-1.944-21.22-13.647 C33.162,238.091,38.984,230.065,47.676,227.145z M423.683,334.602v36.836H0v-36.836h25.348v-18.418h372.99v18.418H423.683z"></path></g></g></svg><span class="ml-2">Services</span><span class="text-xs ml-2 px-1.5 py-0.5 bg-gray-100 text-gray-500 rounded">Soon</span></div></div><div class="flex items-center space-x-1 ml-auto"><a href="https://skypilot.readthedocs.io/en/latest/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center px-2 py-1 text-gray-600 hover:text-blue-600 transition-colors duration-150 cursor-pointer" title="Docs"><span class="mr-1">Docs</span><svg class="w-3.5 h-3.5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path><polyline points="15 3 21 3 21 9"></polyline><line x1="10" y1="14" x2="21" y2="3"></line></svg></a><div class="border-l border-gray-200 h-6 mx-1"></div><a href="https://github.com/skypilot-org/skypilot" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="GitHub"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"></path></svg></a><a href="https://slack.skypilot.co/" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Slack"><svg class="w-5 h-5" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor"><path transform="scale(0.85) translate(1.8, 1.8)" d="M5.042 15.165a2.528 2.528 0 0 1-2.52 2.523A2.528 2.528 0 0 1 0 15.165a2.527 2.527 0 0 1 2.522-2.52h2.52v2.52zM6.313 15.165a2.527 2.527 0 0 1 2.521-2.52 2.527 2.527 0 0 1 2.521 2.52v6.313A2.528 2.528 0 0 1 8.834 24a2.528 2.528 0 0 1-2.521-2.522v-6.313zM8.834 5.042a2.528 2.528 0 0 1-2.521-2.52A2.528 2.528 0 0 1 8.834 0a2.528 2.528 0 0 1 2.521 2.522v2.52H8.834zM8.834 6.313a2.528 2.528 0 0 1 2.521 2.521 2.528 2.528 0 0 1-2.521 2.521H2.522A2.528 2.528 0 0 1 0 8.834a2.528 2.528 0 0 1 2.522-2.521h6.312zM18.956 8.834a2.528 2.528 0 0 1 2.522-2.521A2.528 2.528 0 0 1 24 8.834a2.528 2.528 0 0 1-2.522 2.521h-2.522V8.834zM17.688 8.834a2.528 2.528 0 0 1-2.523 2.521 2.527 2.527 0 0 1-2.52-2.521V2.522A2.527 2.527 0 0 1 15.165 0a2.528 2.528 0 0 1 2.523 2.522v6.312zM15.165 18.956a2.528 2.528 0 0 1 2.523 2.522A2.528 2.528 0 0 1 15.165 24a2.527 2.527 0 0 1-2.52-2.522v-2.522h2.52zM15.165 17.688a2.527 2.527 0 0 1-2.52-2.523 2.526 2.526 0 0 1 2.52-2.52h6.313A2.527 2.527 0 0 1 24 15.165a2.528 2.528 0 0 1-2.522 2.523h-6.313z"></path></svg></a><a href="https://github.com/skypilot-org/skypilot/issues/new" target="_blank" rel="noopener noreferrer" class="inline-flex items-center justify-center p-2 rounded-full text-gray-600 hover:bg-gray-100 transition-colors duration-150 cursor-pointer" title="Leave Feedback"><svg class="w-5 h-5" stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><g><path fill="none" d="M0 0h24v24H0z"></path><path d="M6.455 19L2 22.5V4a1 1 0 0 1 1-1h18a1 1 0 0 1 1 1v14a1 1 0 0 1-1 1H6.455zM4 18.385L5.763 17H20V5H4v13.385zM11 13h2v2h-2v-2zm0-6h2v5h-2V7z"></path></g></svg></a></div></div></div></div><div class="transition-all duration-200 ease-in-out min-h-screen" style="padding-top:56px"><main class="p-6"><div class="flex items-center justify-between mb-4 h-5"><div class="text-base"><a class="text-sky-blue hover:underline leading-none" href="/dashboard/jobs">Managed Jobs</a></div><div class="flex items-center space-x-2"><button class="inline-flex items-center justify-center whitespace-nowrap text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 hover:bg-accent h-9 rounded-md px-3 text-sky-blue hover:text-sky-blue-bright" title="Refresh"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-rotate-cw h-4 w-4 mr-1.5"><path d="M21 12a9 9 0 1 1-9-9c2.52 0 4.93 1 6.74 2.74L21 8"></path><path d="M21 3v5h-5"></path></svg><span>Refresh</span></button></div></div><div class="relative"><div class="flex flex-col space-y-1 mb-1"><div class="flex flex-wrap items-center text-sm mb-1"><span class="mr-2 text-sm font-medium">Statuses:</span><div class="flex flex-wrap gap-2 items-center"></div></div></div><div class="rounded-lg border bg-card text-card-foreground shadow-sm"><div class="relative w-full overflow-auto"><table class="w-full caption-bottom text-base"><thead class="[&_tr]:border-b"><tr class="border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted"><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">ID</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Name</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">User</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Submitted</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Duration</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Status</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Resources</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Cluster</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Region</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0 sortable whitespace-nowrap">Recoveries</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0">Details</th><th class="h-12 px-4 text-left align-middle font-medium text-[hsl(var(--text-strong))] [&:has([role=checkbox])]:pr-0">Logs</th></tr></thead><tbody class="[&_tr:last-child]:border-0"><tr class="border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted"><td class="p-4 align-middle [&:has([role=checkbox])]:pr-0 text-center py-6" colSpan="12"><div class="flex flex-col items-center space-y-4"><p class="text-gray-500">No active jobs</p></div></td></tr></tbody></table></div></div></div></main></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"WO8lTFPfj-lO3_gDGEiN8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/data/storage.py
CHANGED
@@ -38,8 +38,8 @@ from sky.utils import status_lib
|
|
38
38
|
from sky.utils import ux_utils
|
39
39
|
|
40
40
|
if typing.TYPE_CHECKING:
|
41
|
-
import boto3 # type: ignore
|
42
41
|
from google.cloud import storage # type: ignore
|
42
|
+
import mypy_boto3_s3
|
43
43
|
|
44
44
|
logger = sky_logging.init_logger(__name__)
|
45
45
|
|
@@ -1363,7 +1363,7 @@ class S3Store(AbstractStore):
|
|
1363
1363
|
is_sky_managed: Optional[bool] = None,
|
1364
1364
|
sync_on_reconstruction: bool = True,
|
1365
1365
|
_bucket_sub_path: Optional[str] = None):
|
1366
|
-
self.client: '
|
1366
|
+
self.client: 'mypy_boto3_s3.Client'
|
1367
1367
|
self.bucket: 'StorageHandle'
|
1368
1368
|
# TODO(romilb): This is purely a stopgap fix for
|
1369
1369
|
# https://github.com/skypilot-org/skypilot/issues/3405
|
@@ -3295,7 +3295,7 @@ class R2Store(AbstractStore):
|
|
3295
3295
|
is_sky_managed: Optional[bool] = None,
|
3296
3296
|
sync_on_reconstruction: Optional[bool] = True,
|
3297
3297
|
_bucket_sub_path: Optional[str] = None):
|
3298
|
-
self.client: '
|
3298
|
+
self.client: 'mypy_boto3_s3.Client'
|
3299
3299
|
self.bucket: 'StorageHandle'
|
3300
3300
|
super().__init__(name, source, region, is_sky_managed,
|
3301
3301
|
sync_on_reconstruction, _bucket_sub_path)
|
@@ -4700,7 +4700,7 @@ class NebiusStore(AbstractStore):
|
|
4700
4700
|
is_sky_managed: Optional[bool] = None,
|
4701
4701
|
sync_on_reconstruction: bool = True,
|
4702
4702
|
_bucket_sub_path: Optional[str] = None):
|
4703
|
-
self.client: '
|
4703
|
+
self.client: 'mypy_boto3_s3.Client'
|
4704
4704
|
self.bucket: 'StorageHandle'
|
4705
4705
|
super().__init__(name, source, region, is_sky_managed,
|
4706
4706
|
sync_on_reconstruction, _bucket_sub_path)
|
sky/exceptions.py
CHANGED
@@ -293,6 +293,11 @@ class ClusterDoesNotExist(ValueError):
|
|
293
293
|
pass
|
294
294
|
|
295
295
|
|
296
|
+
class CachedClusterUnavailable(Exception):
|
297
|
+
"""Raised when a cached cluster record is unavailable."""
|
298
|
+
pass
|
299
|
+
|
300
|
+
|
296
301
|
class NotSupportedError(Exception):
|
297
302
|
"""Raised when a feature is not supported."""
|
298
303
|
pass
|
sky/jobs/server/core.py
CHANGED
@@ -14,6 +14,7 @@ from sky import backends
|
|
14
14
|
from sky import core
|
15
15
|
from sky import exceptions
|
16
16
|
from sky import execution
|
17
|
+
from sky import global_user_state
|
17
18
|
from sky import provision as provision_lib
|
18
19
|
from sky import sky_logging
|
19
20
|
from sky import task as task_lib
|
@@ -64,6 +65,8 @@ def launch(
|
|
64
65
|
ValueError: cluster does not exist. Or, the entrypoint is not a valid
|
65
66
|
chain dag.
|
66
67
|
sky.exceptions.NotSupportedError: the feature is not supported.
|
68
|
+
sky.exceptions.CachedClusterUnavailable: cached jobs controller cluster
|
69
|
+
is unavailable
|
67
70
|
|
68
71
|
Returns:
|
69
72
|
job_id: Optional[int]; the job ID of the submitted job. None if the
|
@@ -103,6 +106,31 @@ def launch(
|
|
103
106
|
with rich_utils.safe_status(
|
104
107
|
ux_utils.spinner_message('Initializing managed job')):
|
105
108
|
|
109
|
+
# Check whether cached jobs controller cluster is accessible
|
110
|
+
cluster_name = (
|
111
|
+
controller_utils.Controllers.JOBS_CONTROLLER.value.cluster_name)
|
112
|
+
record = global_user_state.get_cluster_from_name(cluster_name)
|
113
|
+
if record is not None:
|
114
|
+
# there is a cached jobs controller cluster
|
115
|
+
try:
|
116
|
+
# TODO: do something with returned status?
|
117
|
+
_, _ = backend_utils.refresh_cluster_status_handle(
|
118
|
+
cluster_name=cluster_name,
|
119
|
+
force_refresh_statuses=set(status_lib.ClusterStatus),
|
120
|
+
acquire_per_cluster_status_lock=False)
|
121
|
+
except (exceptions.ClusterOwnerIdentityMismatchError,
|
122
|
+
exceptions.CloudUserIdentityError,
|
123
|
+
exceptions.ClusterStatusFetchingError) as e:
|
124
|
+
# we weren't able to refresh the cluster for its status.
|
125
|
+
with ux_utils.print_exception_no_traceback():
|
126
|
+
raise exceptions.CachedClusterUnavailable(
|
127
|
+
f'Cached jobs controller cluster '
|
128
|
+
f'{cluster_name} cannot be refreshed. Please check if '
|
129
|
+
'the cluster is accessible. If the cluster was '
|
130
|
+
'removed, consider removing the cluster from SkyPilot '
|
131
|
+
f'with:\n\n`sky down {cluster_name} --purge`\n\n'
|
132
|
+
f'Reason: {common_utils.format_exception(e)}')
|
133
|
+
|
106
134
|
local_to_controller_file_mounts = {}
|
107
135
|
|
108
136
|
if storage_lib.get_cached_enabled_storage_cloud_names_or_refresh():
|
@@ -142,11 +170,11 @@ def launch(
|
|
142
170
|
remote_user_config_path = f'{prefix}/{dag.name}-{dag_uuid}.config_yaml'
|
143
171
|
remote_env_file_path = f'{prefix}/{dag.name}-{dag_uuid}.env'
|
144
172
|
controller_resources = controller_utils.get_controller_resources(
|
145
|
-
controller=
|
173
|
+
controller=controller,
|
146
174
|
task_resources=sum([list(t.resources) for t in dag.tasks], []))
|
147
175
|
controller_idle_minutes_to_autostop, controller_down = (
|
148
176
|
controller_utils.get_controller_autostop_config(
|
149
|
-
controller=
|
177
|
+
controller=controller))
|
150
178
|
|
151
179
|
vars_to_fill = {
|
152
180
|
'remote_user_yaml_path': remote_user_yaml_path,
|
@@ -162,7 +190,7 @@ def launch(
|
|
162
190
|
'dashboard_setup_cmd': managed_job_constants.DASHBOARD_SETUP_CMD,
|
163
191
|
'dashboard_user_id': common.SERVER_ID,
|
164
192
|
**controller_utils.shared_controller_vars_to_fill(
|
165
|
-
|
193
|
+
controller,
|
166
194
|
remote_user_config_path=remote_user_config_path,
|
167
195
|
local_user_config=mutated_user_config,
|
168
196
|
),
|
sky/provision/aws/config.py
CHANGED
@@ -11,6 +11,7 @@ import copy
|
|
11
11
|
import json
|
12
12
|
import logging
|
13
13
|
import time
|
14
|
+
import typing
|
14
15
|
from typing import Any, Dict, List, Optional, Set, Tuple
|
15
16
|
|
16
17
|
import colorama
|
@@ -23,6 +24,10 @@ from sky.provision.aws import utils
|
|
23
24
|
from sky.utils import annotations
|
24
25
|
from sky.utils import common_utils
|
25
26
|
|
27
|
+
if typing.TYPE_CHECKING:
|
28
|
+
import mypy_boto3_ec2
|
29
|
+
from mypy_boto3_ec2 import type_defs as ec2_type_defs
|
30
|
+
|
26
31
|
logger = sky_logging.init_logger(__name__)
|
27
32
|
|
28
33
|
RAY = 'ray-autoscaler'
|
@@ -223,7 +228,8 @@ def _configure_iam_role(iam) -> Dict[str, Any]:
|
|
223
228
|
|
224
229
|
|
225
230
|
@annotations.lru_cache(scope='request', maxsize=128) # Keep bounded.
|
226
|
-
def _get_route_tables(ec2
|
231
|
+
def _get_route_tables(ec2: 'mypy_boto3_ec2.ServiceResource',
|
232
|
+
vpc_id: Optional[str], region: str,
|
227
233
|
main: bool) -> List[Any]:
|
228
234
|
"""Get route tables associated with a VPC and region
|
229
235
|
|
@@ -248,7 +254,8 @@ def _get_route_tables(ec2, vpc_id: Optional[str], region: str,
|
|
248
254
|
'RouteTables', [])
|
249
255
|
|
250
256
|
|
251
|
-
def _is_subnet_public(ec2, subnet_id,
|
257
|
+
def _is_subnet_public(ec2: 'mypy_boto3_ec2.ServiceResource', subnet_id,
|
258
|
+
vpc_id: Optional[str]) -> bool:
|
252
259
|
"""Checks if a subnet is public by existence of a route to an IGW.
|
253
260
|
|
254
261
|
Conventionally, public subnets connect to a IGW, and private subnets to a
|
@@ -441,10 +448,14 @@ def _usable_subnets(
|
|
441
448
|
return subnets, first_subnet_vpc_id
|
442
449
|
|
443
450
|
|
444
|
-
def _vpc_id_from_security_group_ids(ec2
|
451
|
+
def _vpc_id_from_security_group_ids(ec2: 'mypy_boto3_ec2.ServiceResource',
|
452
|
+
sg_ids: List[str]) -> Any:
|
445
453
|
# sort security group IDs to support deterministic unit test stubbing
|
446
454
|
sg_ids = sorted(set(sg_ids))
|
447
|
-
filters
|
455
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [{
|
456
|
+
'Name': 'group-id',
|
457
|
+
'Values': sg_ids
|
458
|
+
}]
|
448
459
|
security_groups = ec2.security_groups.filter(Filters=filters)
|
449
460
|
vpc_ids = [sg.vpc_id for sg in security_groups]
|
450
461
|
vpc_ids = list(set(vpc_ids))
|
@@ -462,7 +473,8 @@ def _vpc_id_from_security_group_ids(ec2, sg_ids: List[str]) -> Any:
|
|
462
473
|
return vpc_ids[0]
|
463
474
|
|
464
475
|
|
465
|
-
def _get_vpc_id_by_name(ec2, vpc_name: str,
|
476
|
+
def _get_vpc_id_by_name(ec2: 'mypy_boto3_ec2.ServiceResource', vpc_name: str,
|
477
|
+
region: str) -> str:
|
466
478
|
"""Returns the VPC ID of the unique VPC with a given name.
|
467
479
|
|
468
480
|
Exits with code 1 if:
|
@@ -470,7 +482,10 @@ def _get_vpc_id_by_name(ec2, vpc_name: str, region: str) -> str:
|
|
470
482
|
- More than 1 VPC with the given name are found in the current region.
|
471
483
|
"""
|
472
484
|
# Look in the 'Name' tag (shown as Name column in console).
|
473
|
-
filters
|
485
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [{
|
486
|
+
'Name': 'tag:Name',
|
487
|
+
'Values': [vpc_name]
|
488
|
+
}]
|
474
489
|
vpcs = list(ec2.vpcs.filter(Filters=filters))
|
475
490
|
if not vpcs:
|
476
491
|
_skypilot_log_error_and_exit_for_failover(
|
@@ -486,8 +501,9 @@ def _get_vpc_id_by_name(ec2, vpc_name: str, region: str) -> str:
|
|
486
501
|
return vpcs[0].id
|
487
502
|
|
488
503
|
|
489
|
-
def _get_subnet_and_vpc_id(ec2
|
490
|
-
|
504
|
+
def _get_subnet_and_vpc_id(ec2: 'mypy_boto3_ec2.ServiceResource',
|
505
|
+
security_group_ids: Optional[List[str]], region: str,
|
506
|
+
availability_zone: Optional[str],
|
491
507
|
use_internal_ips: bool,
|
492
508
|
vpc_name: Optional[str]) -> Tuple[Any, str]:
|
493
509
|
if vpc_name is not None:
|
@@ -514,7 +530,8 @@ def _get_subnet_and_vpc_id(ec2, security_group_ids: Optional[List[str]],
|
|
514
530
|
return subnets, vpc_id
|
515
531
|
|
516
532
|
|
517
|
-
def _configure_security_group(ec2
|
533
|
+
def _configure_security_group(ec2: 'mypy_boto3_ec2.ServiceResource',
|
534
|
+
vpc_id: str, expected_sg_name: str,
|
518
535
|
extended_ip_rules: List) -> List[str]:
|
519
536
|
security_group = _get_or_create_vpc_security_group(ec2, vpc_id,
|
520
537
|
expected_sg_name)
|
@@ -551,7 +568,8 @@ def _configure_security_group(ec2, vpc_id: str, expected_sg_name: str,
|
|
551
568
|
return sg_ids
|
552
569
|
|
553
570
|
|
554
|
-
def _get_or_create_vpc_security_group(ec2
|
571
|
+
def _get_or_create_vpc_security_group(ec2: 'mypy_boto3_ec2.ServiceResource',
|
572
|
+
vpc_id: str,
|
555
573
|
expected_sg_name: str) -> Any:
|
556
574
|
"""Find or create a security group in the specified VPC.
|
557
575
|
|
@@ -612,7 +630,8 @@ def _get_or_create_vpc_security_group(ec2, vpc_id: str,
|
|
612
630
|
return security_group
|
613
631
|
|
614
632
|
|
615
|
-
def _get_security_group_from_vpc_id(ec2
|
633
|
+
def _get_security_group_from_vpc_id(ec2: 'mypy_boto3_ec2.ServiceResource',
|
634
|
+
vpc_id: str,
|
616
635
|
group_name: str) -> Optional[Any]:
|
617
636
|
"""Get security group by VPC ID and group name."""
|
618
637
|
existing_groups = list(
|
sky/provision/aws/instance.py
CHANGED
@@ -9,6 +9,7 @@ import logging
|
|
9
9
|
from multiprocessing import pool
|
10
10
|
import re
|
11
11
|
import time
|
12
|
+
import typing
|
12
13
|
from typing import Any, Callable, Dict, List, Optional, Set, TypeVar
|
13
14
|
|
14
15
|
from sky import sky_logging
|
@@ -23,6 +24,11 @@ from sky.utils import resources_utils
|
|
23
24
|
from sky.utils import status_lib
|
24
25
|
from sky.utils import ux_utils
|
25
26
|
|
27
|
+
if typing.TYPE_CHECKING:
|
28
|
+
from botocore import waiter as botowaiter
|
29
|
+
import mypy_boto3_ec2
|
30
|
+
from mypy_boto3_ec2 import type_defs as ec2_type_defs
|
31
|
+
|
26
32
|
logger = sky_logging.init_logger(__name__)
|
27
33
|
|
28
34
|
_T = TypeVar('_T')
|
@@ -55,7 +61,9 @@ _RESUME_PER_INSTANCE_TIMEOUT = 120 # 2 minutes
|
|
55
61
|
# https://aws.amazon.com/ec2/pricing/on-demand/#Data_Transfer_within_the_same_AWS_Region
|
56
62
|
|
57
63
|
|
58
|
-
def _default_ec2_resource(
|
64
|
+
def _default_ec2_resource(
|
65
|
+
region: str,
|
66
|
+
check_credentials: bool = True) -> 'mypy_boto3_ec2.ServiceResource':
|
59
67
|
if not hasattr(aws, 'version'):
|
60
68
|
# For backward compatibility, reload the module if the aws module was
|
61
69
|
# imported before and stale. Used for, e.g., a live jobs controller
|
@@ -99,7 +107,8 @@ def _default_ec2_resource(region: str, check_credentials: bool = True) -> Any:
|
|
99
107
|
check_credentials=check_credentials)
|
100
108
|
|
101
109
|
|
102
|
-
def _cluster_name_filter(
|
110
|
+
def _cluster_name_filter(
|
111
|
+
cluster_name_on_cloud: str) -> List['ec2_type_defs.FilterTypeDef']:
|
103
112
|
return [{
|
104
113
|
'Name': f'tag:{constants.TAG_RAY_CLUSTER_NAME}',
|
105
114
|
'Values': [cluster_name_on_cloud],
|
@@ -282,7 +291,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
282
291
|
|
283
292
|
# sort tags by key to support deterministic unit test stubbing
|
284
293
|
tags = dict(sorted(copy.deepcopy(config.tags).items()))
|
285
|
-
filters = [{
|
294
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [{
|
286
295
|
'Name': 'instance-state-name',
|
287
296
|
'Values': ['pending', 'running', 'stopping', 'stopped'],
|
288
297
|
}, {
|
@@ -551,7 +560,8 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
551
560
|
created_instance_ids=created_instance_ids)
|
552
561
|
|
553
562
|
|
554
|
-
def _filter_instances(ec2
|
563
|
+
def _filter_instances(ec2: 'mypy_boto3_ec2.ServiceResource',
|
564
|
+
filters: List['ec2_type_defs.FilterTypeDef'],
|
555
565
|
included_instances: Optional[List[str]],
|
556
566
|
excluded_instances: Optional[List[str]]):
|
557
567
|
instances = ec2.instances.filter(Filters=filters)
|
@@ -616,7 +626,7 @@ def stop_instances(
|
|
616
626
|
assert provider_config is not None, (cluster_name_on_cloud, provider_config)
|
617
627
|
region = provider_config['region']
|
618
628
|
ec2 = _default_ec2_resource(region)
|
619
|
-
filters: List[
|
629
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [
|
620
630
|
{
|
621
631
|
'Name': 'instance-state-name',
|
622
632
|
'Values': ['pending', 'running'],
|
@@ -653,7 +663,7 @@ def terminate_instances(
|
|
653
663
|
managed_by_skypilot = provider_config['security_group'].get(
|
654
664
|
'ManagedBySkyPilot', True)
|
655
665
|
ec2 = _default_ec2_resource(region)
|
656
|
-
filters = [
|
666
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [
|
657
667
|
{
|
658
668
|
'Name': 'instance-state-name',
|
659
669
|
# exclude 'shutting-down' or 'terminated' states
|
@@ -751,7 +761,7 @@ def open_ports(
|
|
751
761
|
region = provider_config['region']
|
752
762
|
ec2 = _default_ec2_resource(region)
|
753
763
|
sg_name = provider_config['security_group']['GroupName']
|
754
|
-
filters = [
|
764
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [
|
755
765
|
{
|
756
766
|
'Name': 'instance-state-name',
|
757
767
|
# exclude 'shutting-down' or 'terminated' states
|
@@ -789,6 +799,7 @@ def open_ports(
|
|
789
799
|
range(existing_rule['FromPort'], existing_rule['ToPort'] + 1))
|
790
800
|
elif existing_rule['IpProtocol'] == '-1':
|
791
801
|
# For AWS, IpProtocol = -1 means all traffic
|
802
|
+
all_traffic_allowed: bool = False
|
792
803
|
for group_pairs in existing_rule['UserIdGroupPairs']:
|
793
804
|
if group_pairs['GroupId'] != sg.id:
|
794
805
|
# We skip the port opening when the rule allows access from
|
@@ -797,8 +808,10 @@ def open_ports(
|
|
797
808
|
# The security group created by SkyPilot allows all traffic
|
798
809
|
# from the same security group, which should not be skipped.
|
799
810
|
existing_ports.add(-1)
|
811
|
+
all_traffic_allowed = True
|
800
812
|
break
|
801
|
-
|
813
|
+
if all_traffic_allowed:
|
814
|
+
break
|
802
815
|
|
803
816
|
ports_to_open = []
|
804
817
|
# Do not need to open any ports when all traffic is already allowed.
|
@@ -875,7 +888,7 @@ def wait_instances(region: str, cluster_name_on_cloud: str,
|
|
875
888
|
ec2 = _default_ec2_resource(region)
|
876
889
|
client = ec2.meta.client
|
877
890
|
|
878
|
-
filters = [
|
891
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [
|
879
892
|
{
|
880
893
|
'Name': f'tag:{constants.TAG_RAY_CLUSTER_NAME}',
|
881
894
|
'Values': [cluster_name_on_cloud],
|
@@ -904,6 +917,7 @@ def wait_instances(region: str, cluster_name_on_cloud: str,
|
|
904
917
|
raise RuntimeError(
|
905
918
|
f'No instances found for cluster {cluster_name_on_cloud}.')
|
906
919
|
|
920
|
+
waiter: 'botowaiter.Waiter'
|
907
921
|
if state == status_lib.ClusterStatus.UP:
|
908
922
|
waiter = client.get_waiter('instance_running')
|
909
923
|
elif state == status_lib.ClusterStatus.STOPPED:
|
@@ -922,7 +936,7 @@ def get_cluster_info(
|
|
922
936
|
provider_config: Optional[Dict[str, Any]] = None) -> common.ClusterInfo:
|
923
937
|
"""See sky/provision/__init__.py"""
|
924
938
|
ec2 = _default_ec2_resource(region)
|
925
|
-
filters = [
|
939
|
+
filters: List['ec2_type_defs.FilterTypeDef'] = [
|
926
940
|
{
|
927
941
|
'Name': 'instance-state-name',
|
928
942
|
'Values': ['running'],
|
sky/provision/runpod/instance.py
CHANGED
@@ -70,13 +70,14 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
70
70
|
f'Cluster {cluster_name_on_cloud} has no head node.')
|
71
71
|
logger.info(f'Cluster {cluster_name_on_cloud} already has '
|
72
72
|
f'{len(exist_instances)} nodes, no need to start more.')
|
73
|
-
return common.ProvisionRecord(
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
73
|
+
return common.ProvisionRecord(
|
74
|
+
provider_name='runpod',
|
75
|
+
cluster_name=cluster_name_on_cloud,
|
76
|
+
region=region,
|
77
|
+
zone=config.provider_config['availability_zone'],
|
78
|
+
head_instance_id=head_instance_id,
|
79
|
+
resumed_instance_ids=[],
|
80
|
+
created_instance_ids=[])
|
80
81
|
|
81
82
|
created_instance_ids = []
|
82
83
|
for _ in range(to_start_count):
|
@@ -87,6 +88,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
87
88
|
node_type=node_type,
|
88
89
|
instance_type=config.node_config['InstanceType'],
|
89
90
|
region=region,
|
91
|
+
zone=config.provider_config['availability_zone'],
|
90
92
|
disk_size=config.node_config['DiskSize'],
|
91
93
|
image_name=config.node_config['ImageId'],
|
92
94
|
ports=config.ports_to_open_on_launch,
|
@@ -118,13 +120,14 @@ def run_instances(region: str, cluster_name_on_cloud: str,
|
|
118
120
|
|
119
121
|
time.sleep(POLL_INTERVAL)
|
120
122
|
assert head_instance_id is not None, 'head_instance_id should not be None'
|
121
|
-
return common.ProvisionRecord(
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
123
|
+
return common.ProvisionRecord(
|
124
|
+
provider_name='runpod',
|
125
|
+
cluster_name=cluster_name_on_cloud,
|
126
|
+
region=region,
|
127
|
+
zone=config.provider_config['availability_zone'],
|
128
|
+
head_instance_id=head_instance_id,
|
129
|
+
resumed_instance_ids=[],
|
130
|
+
created_instance_ids=created_instance_ids)
|
128
131
|
|
129
132
|
|
130
133
|
def wait_instances(region: str, cluster_name_on_cloud: str,
|
sky/provision/runpod/utils.py
CHANGED
@@ -264,8 +264,9 @@ def _create_template_for_docker_login(
|
|
264
264
|
|
265
265
|
|
266
266
|
def launch(cluster_name: str, node_type: str, instance_type: str, region: str,
|
267
|
-
disk_size: int, image_name: str,
|
268
|
-
|
267
|
+
zone: str, disk_size: int, image_name: str,
|
268
|
+
ports: Optional[List[int]], public_key: str,
|
269
|
+
preemptible: Optional[bool], bid_per_gpu: float,
|
269
270
|
docker_login_config: Optional[Dict[str, str]]) -> str:
|
270
271
|
"""Launches an instance with the given parameters.
|
271
272
|
|
@@ -332,6 +333,7 @@ def launch(cluster_name: str, node_type: str, instance_type: str, region: str,
|
|
332
333
|
'min_memory_in_gb': gpu_specs['memoryInGb'] * gpu_quantity,
|
333
334
|
'gpu_count': gpu_quantity,
|
334
335
|
'country_code': region,
|
336
|
+
'data_center_id': zone,
|
335
337
|
'ports': ports_str,
|
336
338
|
'support_public_ip': True,
|
337
339
|
'docker_args': docker_args,
|
sky/server/common.py
CHANGED
@@ -13,6 +13,7 @@ import sys
|
|
13
13
|
import time
|
14
14
|
import typing
|
15
15
|
from typing import Any, Dict, Optional
|
16
|
+
from urllib import parse
|
16
17
|
import uuid
|
17
18
|
|
18
19
|
import colorama
|
@@ -150,6 +151,23 @@ def get_server_url(host: Optional[str] = None) -> str:
|
|
150
151
|
return url.rstrip('/')
|
151
152
|
|
152
153
|
|
154
|
+
@annotations.lru_cache(scope='global')
|
155
|
+
def get_dashboard_url(server_url: str) -> str:
|
156
|
+
# The server_url may include username or password with the
|
157
|
+
# format of https://username:password@example.com:8080/path
|
158
|
+
# We need to remove the username and password and only
|
159
|
+
# return `https://example.com:8080/path`
|
160
|
+
parsed = parse.urlparse(server_url)
|
161
|
+
# Reconstruct the URL without credentials but keeping the scheme
|
162
|
+
dashboard_url = f'{parsed.scheme}://{parsed.hostname}'
|
163
|
+
if parsed.port:
|
164
|
+
dashboard_url = f'{dashboard_url}:{parsed.port}'
|
165
|
+
if parsed.path:
|
166
|
+
dashboard_url = f'{dashboard_url}{parsed.path}'
|
167
|
+
dashboard_url = dashboard_url.rstrip('/')
|
168
|
+
return f'{dashboard_url}/dashboard'
|
169
|
+
|
170
|
+
|
153
171
|
@annotations.lru_cache(scope='global')
|
154
172
|
def is_api_server_local():
|
155
173
|
return get_server_url() in AVAILABLE_LOCAL_API_SERVER_URLS
|
@@ -314,8 +332,9 @@ def _start_api_server(deploy: bool = False,
|
|
314
332
|
else:
|
315
333
|
break
|
316
334
|
|
317
|
-
|
318
|
-
|
335
|
+
server_url = get_server_url(host)
|
336
|
+
dashboard_msg = (f'Dashboard: {get_dashboard_url(server_url)}')
|
337
|
+
api_server_info = get_api_server_status(server_url)
|
319
338
|
if api_server_info.version == _DEV_VERSION:
|
320
339
|
dashboard_msg += (
|
321
340
|
f'\n{colorama.Style.RESET_ALL}{ux_utils.INDENT_SYMBOL}'
|
@@ -323,11 +342,13 @@ def _start_api_server(deploy: bool = False,
|
|
323
342
|
if not os.path.isdir(server_constants.DASHBOARD_DIR):
|
324
343
|
dashboard_msg += (
|
325
344
|
'Dashboard is not built, '
|
326
|
-
'to build: npm --prefix sky/dashboard
|
345
|
+
'to build: npm --prefix sky/dashboard install '
|
346
|
+
'&& npm --prefix sky/dashboard run build')
|
327
347
|
else:
|
328
348
|
dashboard_msg += (
|
329
349
|
'Dashboard may be stale when installed from source, '
|
330
|
-
'to rebuild: npm --prefix sky/dashboard
|
350
|
+
'to rebuild: npm --prefix sky/dashboard install '
|
351
|
+
'&& npm --prefix sky/dashboard run build')
|
331
352
|
dashboard_msg += f'{colorama.Style.RESET_ALL}'
|
332
353
|
logger.info(
|
333
354
|
ux_utils.finishing_message(
|
sky/templates/runpod-ray.yml.j2
CHANGED
@@ -9,6 +9,7 @@ provider:
|
|
9
9
|
type: external
|
10
10
|
module: sky.provision.runpod
|
11
11
|
region: "{{region}}"
|
12
|
+
availability_zone: "{{availability_zone}}"
|
12
13
|
disable_launch_config_check: true
|
13
14
|
# For RunPod, we directly set the image id for the docker as runtime environment
|
14
15
|
# support, thus we need to avoid the DockerInitializer detects the docker field
|