skypilot-nightly 1.0.0.dev20250707__py3-none-any.whl → 1.0.0.dev20250709__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +52 -8
- sky/client/common.py +6 -1
- sky/clouds/aws.py +5 -0
- sky/clouds/azure.py +3 -0
- sky/clouds/cloud.py +3 -0
- sky/clouds/cudo.py +3 -0
- sky/clouds/do.py +3 -0
- sky/clouds/fluidstack.py +3 -0
- sky/clouds/gcp.py +3 -2
- sky/clouds/ibm.py +3 -0
- sky/clouds/kubernetes.py +63 -24
- sky/clouds/lambda_cloud.py +3 -0
- sky/clouds/nebius.py +3 -0
- sky/clouds/oci.py +3 -0
- sky/clouds/paperspace.py +3 -0
- sky/clouds/runpod.py +3 -0
- sky/clouds/scp.py +3 -0
- sky/clouds/utils/gcp_utils.py +61 -1
- sky/clouds/vast.py +3 -0
- sky/clouds/vsphere.py +3 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +6 -3
- sky/jobs/state.py +6 -3
- sky/provision/__init__.py +11 -0
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +106 -13
- sky/provision/gcp/constants.py +0 -3
- sky/provision/gcp/instance.py +21 -0
- sky/provision/kubernetes/instance.py +16 -0
- sky/provision/kubernetes/utils.py +9 -2
- sky/resources.py +1 -30
- sky/server/metrics.py +2 -3
- sky/server/requests/executor.py +2 -5
- sky/server/requests/payloads.py +1 -0
- sky/server/requests/requests.py +94 -4
- sky/server/server.py +20 -6
- sky/server/uvicorn.py +4 -1
- sky/skylet/constants.py +6 -2
- sky/templates/gcp-ray.yml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +8 -2
- sky/utils/resources_utils.py +30 -0
- sky/utils/schemas.py +22 -0
- {skypilot_nightly-1.0.0.dev20250707.dist-info → skypilot_nightly-1.0.0.dev20250709.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250707.dist-info → skypilot_nightly-1.0.0.dev20250709.dist-info}/RECORD +65 -65
- /sky/dashboard/out/_next/static/{wEkAg9F21A-COXJLf20VU → EqELoF4IXcALfWVihInou}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{wEkAg9F21A-COXJLf20VU → EqELoF4IXcALfWVihInou}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250707.dist-info → skypilot_nightly-1.0.0.dev20250709.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250707.dist-info → skypilot_nightly-1.0.0.dev20250709.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250707.dist-info → skypilot_nightly-1.0.0.dev20250709.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250707.dist-info → skypilot_nightly-1.0.0.dev20250709.dist-info}/top_level.txt +0 -0
sky/dashboard/out/config.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/config-a2673b256b6d416f.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/config-a2673b256b6d416f.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/config","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/index.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/index-927ddeebe57a8ac3.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/index-927ddeebe57a8ac3.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra/%5Bcontext%5D-8b0809f59034d509.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra/%5Bcontext%5D-8b0809f59034d509.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/infra/[context]","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/infra.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/infra","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/804-9f5e98ce84d46bdd.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6ff4e45dfb49d11d.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-52ad1ca228faa776.js" defer=""></script><script src="/dashboard/_next/static/chunks/9470-21d059a1dfa03f61.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-909d53833da080cb.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/%5Bjob%5D-c4d5cfac7fbc0668.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/804-9f5e98ce84d46bdd.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6ff4e45dfb49d11d.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-52ad1ca228faa776.js" defer=""></script><script src="/dashboard/_next/static/chunks/9470-21d059a1dfa03f61.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-909d53833da080cb.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/%5Bjob%5D-c4d5cfac7fbc0668.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/[job]","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/jobs.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/users.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-cd43fb3c122eedde.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-cd43fb3c122eedde.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/volumes.html
CHANGED
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-4ebf6484f7216387.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-4ebf6484f7216387.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/804-9f5e98ce84d46bdd.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/3947-b059261d6fa88a1f.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6ff4e45dfb49d11d.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-52ad1ca228faa776.js" defer=""></script><script src="/dashboard/_next/static/chunks/9470-21d059a1dfa03f61.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-d0dc765474fa0eca.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-909d53833da080cb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1043-1b39779691bb4030.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-fcfad0ddf92ec7ab.js" defer=""></script><script src="/dashboard/_next/static/chunks/938-044ad21de8b4626b.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-726e5a3f00b67185.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-7c0187f43757a548.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/804-9f5e98ce84d46bdd.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/3947-b059261d6fa88a1f.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6ff4e45dfb49d11d.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-52ad1ca228faa776.js" defer=""></script><script src="/dashboard/_next/static/chunks/9470-21d059a1dfa03f61.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-d0dc765474fa0eca.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-909d53833da080cb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1043-1b39779691bb4030.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-fcfad0ddf92ec7ab.js" defer=""></script><script src="/dashboard/_next/static/chunks/938-044ad21de8b4626b.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-726e5a3f00b67185.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-7c0187f43757a548.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
@@ -1 +1 @@
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-06bde99155fa6292.js" defer=""></script><script src="/dashboard/_next/static/
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0da6afe66176678a.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0da6afe66176678a.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-9a81ea998672c303.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-a37b06ddb64521fd.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-06bde99155fa6292.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/EqELoF4IXcALfWVihInou/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"EqELoF4IXcALfWVihInou","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/global_user_state.py
CHANGED
@@ -391,7 +391,8 @@ def initialize_and_get_db() -> sqlalchemy.engine.Engine:
|
|
391
391
|
conn_string = skypilot_config.get_nested(('db',), None)
|
392
392
|
if conn_string:
|
393
393
|
logger.debug(f'using db URI from {conn_string}')
|
394
|
-
_SQLALCHEMY_ENGINE = sqlalchemy.create_engine(
|
394
|
+
_SQLALCHEMY_ENGINE = sqlalchemy.create_engine(
|
395
|
+
conn_string, poolclass=sqlalchemy.NullPool)
|
395
396
|
else:
|
396
397
|
db_path = os.path.expanduser('~/.sky/state.db')
|
397
398
|
pathlib.Path(db_path).parents[0].mkdir(parents=True,
|
@@ -497,10 +498,12 @@ def add_or_update_user(user: models.User,
|
|
497
498
|
))
|
498
499
|
|
499
500
|
result = session.execute(upsert_stmnt)
|
501
|
+
row = result.fetchone()
|
502
|
+
|
503
|
+
ret = bool(row.was_inserted) if row else False
|
500
504
|
session.commit()
|
501
505
|
|
502
|
-
|
503
|
-
return bool(row.was_inserted) if row else False
|
506
|
+
return ret
|
504
507
|
else:
|
505
508
|
raise ValueError('Unsupported database dialect')
|
506
509
|
|
sky/jobs/state.py
CHANGED
@@ -219,7 +219,8 @@ def initialize_and_get_db() -> sqlalchemy.engine.Engine:
|
|
219
219
|
conn_string = skypilot_config.get_nested(('db',), None)
|
220
220
|
if conn_string:
|
221
221
|
logger.debug(f'using db URI from {conn_string}')
|
222
|
-
_SQLALCHEMY_ENGINE = sqlalchemy.create_engine(
|
222
|
+
_SQLALCHEMY_ENGINE = sqlalchemy.create_engine(
|
223
|
+
conn_string, poolclass=sqlalchemy.NullPool)
|
223
224
|
else:
|
224
225
|
db_path = os.path.expanduser('~/.sky/spot_jobs.db')
|
225
226
|
pathlib.Path(db_path).parents[0].mkdir(parents=True,
|
@@ -544,14 +545,16 @@ def set_job_info_without_job_id(name: str, workspace: str,
|
|
544
545
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
545
546
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
546
547
|
result = session.execute(insert_stmt)
|
548
|
+
ret = result.lastrowid
|
547
549
|
session.commit()
|
548
|
-
return
|
550
|
+
return ret
|
549
551
|
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
550
552
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
551
553
|
result = session.execute(
|
552
554
|
insert_stmt.returning(job_info_table.c.spot_job_id))
|
555
|
+
ret = result.scalar()
|
553
556
|
session.commit()
|
554
|
-
return
|
557
|
+
return ret
|
555
558
|
else:
|
556
559
|
raise ValueError('Unsupported database dialect')
|
557
560
|
|
sky/provision/__init__.py
CHANGED
@@ -166,6 +166,17 @@ def terminate_instances(
|
|
166
166
|
raise NotImplementedError
|
167
167
|
|
168
168
|
|
169
|
+
@_route_to_cloud_impl
|
170
|
+
def cleanup_custom_multi_network(
|
171
|
+
provider_name: str,
|
172
|
+
cluster_name_on_cloud: str,
|
173
|
+
provider_config: Dict[str, Any],
|
174
|
+
failover: bool = False,
|
175
|
+
) -> None:
|
176
|
+
"""Cleanup custom multi-network."""
|
177
|
+
raise NotImplementedError
|
178
|
+
|
179
|
+
|
169
180
|
@_route_to_cloud_impl
|
170
181
|
def open_ports(
|
171
182
|
provider_name: str,
|
sky/provision/gcp/__init__.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""GCP provisioner for SkyPilot."""
|
2
2
|
|
3
3
|
from sky.provision.gcp.config import bootstrap_instances
|
4
|
+
from sky.provision.gcp.instance import cleanup_custom_multi_network
|
4
5
|
from sky.provision.gcp.instance import cleanup_ports
|
5
6
|
from sky.provision.gcp.instance import get_cluster_info
|
6
7
|
from sky.provision.gcp.instance import open_ports
|
sky/provision/gcp/config.py
CHANGED
@@ -532,7 +532,23 @@ def _check_firewall_rules(cluster_name: str, vpc_name: str, project_id: str,
|
|
532
532
|
return True
|
533
533
|
|
534
534
|
|
535
|
-
def
|
535
|
+
def _delete_rules(project_id: str, compute, rules, vpc_name: str):
|
536
|
+
for rule_ori in rules:
|
537
|
+
# Query firewall rule by its name (unique in a project).
|
538
|
+
rule_name = rule_ori['name'].format(VPC_NAME=vpc_name)
|
539
|
+
rule_list = _list_firewall_rules(project_id,
|
540
|
+
compute,
|
541
|
+
filter=f'(name={rule_name})')
|
542
|
+
for rule in rule_list:
|
543
|
+
logger.info(f'Deleting firewall rule {rule["name"]}')
|
544
|
+
_delete_firewall_rule(project_id, compute, rule['name'])
|
545
|
+
|
546
|
+
|
547
|
+
def _create_rules(project_id: str,
|
548
|
+
compute,
|
549
|
+
rules,
|
550
|
+
vpc_name,
|
551
|
+
recreate: bool = True):
|
536
552
|
opertaions = []
|
537
553
|
for rule in rules:
|
538
554
|
# Query firewall rule by its name (unique in a project).
|
@@ -542,7 +558,11 @@ def _create_rules(project_id: str, compute, rules, vpc_name):
|
|
542
558
|
compute,
|
543
559
|
filter=f'(name={rule_name})')
|
544
560
|
if rule_list:
|
545
|
-
|
561
|
+
if recreate:
|
562
|
+
_delete_firewall_rule(project_id, compute, rule_name)
|
563
|
+
else:
|
564
|
+
logger.info(f'Rule {rule_name} already exists')
|
565
|
+
continue
|
546
566
|
|
547
567
|
body = rule.copy()
|
548
568
|
body['name'] = body['name'].format(VPC_NAME=vpc_name)
|
@@ -694,8 +714,6 @@ def get_gpu_direct_usable_vpcs_and_subnets(
|
|
694
714
|
) -> List[Tuple[str, 'google.cloud.compute_v1.types.compute.Subnetwork']]:
|
695
715
|
"""Return a list of usable VPCs and subnets for GPU Direct."""
|
696
716
|
project_id = config.provider_config['project_id']
|
697
|
-
vpc_prefix = constants.SKYPILOT
|
698
|
-
cluster_prefix = cluster_name[:constants.CLUSTER_PREFIX_LENGTH]
|
699
717
|
vpc_subnet_pairs = []
|
700
718
|
|
701
719
|
# TODO(hailong): Determine the num_vpcs per different GPU Direct types
|
@@ -703,10 +721,7 @@ def get_gpu_direct_usable_vpcs_and_subnets(
|
|
703
721
|
|
704
722
|
cidr_prefix = constants.SKYPILOT_GPU_DIRECT_VPC_CIDR_PREFIX
|
705
723
|
for i in range(num_vpcs):
|
706
|
-
|
707
|
-
vpc_name = f'{vpc_prefix}-{cluster_prefix}-mgmt-net'
|
708
|
-
else:
|
709
|
-
vpc_name = f'{vpc_prefix}-{cluster_prefix}-data-net-{i}'
|
724
|
+
vpc_name = get_gpu_direct_vpc_name(cluster_name, i)
|
710
725
|
subnet_name = f'{vpc_name}-sub'
|
711
726
|
subnet_cidr_range = f'{cidr_prefix}.{i}.0/24'
|
712
727
|
# Check if VPC exists
|
@@ -729,12 +744,72 @@ def get_gpu_direct_usable_vpcs_and_subnets(
|
|
729
744
|
compute,
|
730
745
|
network=vpc_name)
|
731
746
|
# Apply firewall rules
|
732
|
-
|
733
|
-
|
747
|
+
# No need to recreate the rules if exist,
|
748
|
+
# as they are totally managed by SkyPilot,
|
749
|
+
# in this case, we can skip the rules creation during failover
|
750
|
+
_create_rules(project_id,
|
751
|
+
compute,
|
752
|
+
constants.FIREWALL_RULES_TEMPLATE,
|
753
|
+
vpc_name,
|
754
|
+
recreate=False)
|
734
755
|
vpc_subnet_pairs.append((vpc_name, subnets[0]))
|
735
756
|
return vpc_subnet_pairs
|
736
757
|
|
737
758
|
|
759
|
+
def get_gpu_direct_vpc_name(cluster_name: str, i: int) -> str:
|
760
|
+
"""Get the name of the GPU Direct VPC."""
|
761
|
+
if i == 0:
|
762
|
+
return f'{cluster_name}-mgmt-net'
|
763
|
+
else:
|
764
|
+
return f'{cluster_name}-data-net-{i}'
|
765
|
+
|
766
|
+
|
767
|
+
def delete_gpu_direct_vpcs_and_subnets(
|
768
|
+
cluster_name: str,
|
769
|
+
project_id: str,
|
770
|
+
region: str,
|
771
|
+
keep_global_resources: bool = False,
|
772
|
+
):
|
773
|
+
"""Delete GPU Direct subnets, firewalls, and VPCs.
|
774
|
+
|
775
|
+
Args:
|
776
|
+
cluster_name: The name of the cluster.
|
777
|
+
project_id: The ID of the project.
|
778
|
+
region: The region of the cluster.
|
779
|
+
keep_global_resources: Whether to keep the global resources. If True,
|
780
|
+
only delete the subnets. Otherwise, delete all the firewalls,
|
781
|
+
subnets, and VPCs.
|
782
|
+
"""
|
783
|
+
compute = _create_compute()
|
784
|
+
|
785
|
+
# TODO(hailong): Determine the num_vpcs per different GPU Direct types
|
786
|
+
num_vpcs = constants.SKYPILOT_GPU_DIRECT_VPC_NUM
|
787
|
+
|
788
|
+
for i in range(num_vpcs):
|
789
|
+
vpc_name = get_gpu_direct_vpc_name(cluster_name, i)
|
790
|
+
# Check if VPC exists
|
791
|
+
vpc_list = _list_vpcnets(project_id, compute, filter=f'name={vpc_name}')
|
792
|
+
if not vpc_list:
|
793
|
+
continue
|
794
|
+
for vpc in vpc_list:
|
795
|
+
subnets = _list_subnets(project_id,
|
796
|
+
region,
|
797
|
+
compute,
|
798
|
+
network=vpc['name'])
|
799
|
+
for subnet in subnets:
|
800
|
+
logger.info(f'Deleting subnet {subnet["name"]}')
|
801
|
+
_delete_subnet(project_id, region, compute, subnet['name'])
|
802
|
+
|
803
|
+
if not keep_global_resources:
|
804
|
+
# For failover, keep_global_resources would be true,
|
805
|
+
# we don't delete the rules and VPCs,
|
806
|
+
# which are global resources and can be reused.
|
807
|
+
_delete_rules(project_id, compute,
|
808
|
+
constants.FIREWALL_RULES_TEMPLATE, vpc['name'])
|
809
|
+
logger.info(f'Deleting VPC {vpc["name"]}')
|
810
|
+
_delete_vpcnet(project_id, compute, vpc['name'])
|
811
|
+
|
812
|
+
|
738
813
|
def _configure_placement_policy(region: str, cluster_name: str,
|
739
814
|
config: common.ProvisionConfig, compute):
|
740
815
|
"""Configure placement group for GPU Direct."""
|
@@ -756,8 +831,7 @@ def _configure_placement_policy(region: str, cluster_name: str,
|
|
756
831
|
constants.COMPACT_GROUP_PLACEMENT_POLICY or mig_configuration):
|
757
832
|
return config
|
758
833
|
|
759
|
-
|
760
|
-
policy_name = f'{cluster_prefix}-placement-policy'
|
834
|
+
policy_name = f'{cluster_name}-placement-policy'
|
761
835
|
resource_policy = {
|
762
836
|
'name': policy_name,
|
763
837
|
'groupPlacementPolicy': {
|
@@ -790,7 +864,8 @@ def _configure_subnet(region: str, cluster_name: str,
|
|
790
864
|
enable_gpu_direct = config.provider_config.get('enable_gpu_direct', False)
|
791
865
|
enable_gvnic = config.provider_config.get('enable_gvnic', False)
|
792
866
|
network_tier = config.provider_config.get('network_tier', 'standard')
|
793
|
-
if enable_gpu_direct or
|
867
|
+
if (enable_gpu_direct or
|
868
|
+
network_tier == resources_utils.NetworkTier.BEST.value):
|
794
869
|
if not enable_gvnic:
|
795
870
|
logger.warning(
|
796
871
|
'Enable GPU Direct requires gvnic to be enabled, enabling gvnic'
|
@@ -892,6 +967,14 @@ def _list_vpcnets(project_id: str, compute, filter=None): # pylint: disable=red
|
|
892
967
|
if 'items' in response else [])
|
893
968
|
|
894
969
|
|
970
|
+
def _delete_vpcnet(project_id: str, compute, vpcnet_name: str):
|
971
|
+
operation = compute.networks().delete(
|
972
|
+
project=project_id,
|
973
|
+
network=vpcnet_name,
|
974
|
+
).execute()
|
975
|
+
return wait_for_compute_global_operation(project_id, operation, compute)
|
976
|
+
|
977
|
+
|
895
978
|
def _list_subnets(
|
896
979
|
project_id: str,
|
897
980
|
region: str,
|
@@ -1003,6 +1086,16 @@ def _create_subnet(project_id: str, region: str, compute, vpc_name: str,
|
|
1003
1086
|
return response
|
1004
1087
|
|
1005
1088
|
|
1089
|
+
def _delete_subnet(project_id: str, region: str, compute, subnet_name: str):
|
1090
|
+
operation = compute.subnetworks().delete(
|
1091
|
+
project=project_id,
|
1092
|
+
region=region,
|
1093
|
+
subnetwork=subnet_name,
|
1094
|
+
).execute()
|
1095
|
+
return wait_for_compute_region_operation(project_id, region, operation,
|
1096
|
+
compute)
|
1097
|
+
|
1098
|
+
|
1006
1099
|
def _create_placement_policy(project_id: str, region: str, compute,
|
1007
1100
|
placement_policy: dict):
|
1008
1101
|
operation = compute.resourcePolicies().insert(
|
sky/provision/gcp/constants.py
CHANGED
@@ -48,9 +48,6 @@ GPU_DIRECT_TCPX_INSTANCE_TYPES = [
|
|
48
48
|
'a3-edgegpu-8g',
|
49
49
|
'a3-highgpu-8g',
|
50
50
|
]
|
51
|
-
# The prefix length of the cluster name.
|
52
|
-
# To make sure the VPC and subnet names are within the GCP limits.
|
53
|
-
CLUSTER_PREFIX_LENGTH = 10
|
54
51
|
|
55
52
|
COMPACT_GROUP_PLACEMENT_POLICY = 'compact'
|
56
53
|
COLLOCATED_COLLOCATION = 'COLLOCATED'
|
sky/provision/gcp/instance.py
CHANGED
@@ -10,9 +10,11 @@ from sky import sky_logging
|
|
10
10
|
from sky.adaptors import gcp
|
11
11
|
from sky.provision import common
|
12
12
|
from sky.provision import constants as provision_constants
|
13
|
+
from sky.provision.gcp import config as gcp_config
|
13
14
|
from sky.provision.gcp import constants
|
14
15
|
from sky.provision.gcp import instance_utils
|
15
16
|
from sky.utils import common_utils
|
17
|
+
from sky.utils import resources_utils
|
16
18
|
from sky.utils import status_lib
|
17
19
|
|
18
20
|
logger = sky_logging.init_logger(__name__)
|
@@ -572,6 +574,25 @@ def terminate_instances(
|
|
572
574
|
# time (same as what we did in ray's node_provider).
|
573
575
|
|
574
576
|
|
577
|
+
def cleanup_custom_multi_network(
|
578
|
+
cluster_name_on_cloud: str,
|
579
|
+
provider_config: Optional[Dict[str, Any]] = None,
|
580
|
+
failover: bool = False,
|
581
|
+
) -> None:
|
582
|
+
"""See sky/provision/__init__.py"""
|
583
|
+
assert provider_config is not None, cluster_name_on_cloud
|
584
|
+
project_id = provider_config['project_id']
|
585
|
+
region = provider_config['region']
|
586
|
+
enable_gpu_direct = provider_config.get('enable_gpu_direct', False)
|
587
|
+
network_tier = provider_config.get('network_tier', 'standard')
|
588
|
+
|
589
|
+
if (enable_gpu_direct or
|
590
|
+
network_tier == resources_utils.NetworkTier.BEST.value):
|
591
|
+
gcp_config.delete_gpu_direct_vpcs_and_subnets(cluster_name_on_cloud,
|
592
|
+
project_id, region,
|
593
|
+
failover)
|
594
|
+
|
595
|
+
|
575
596
|
def open_ports(
|
576
597
|
cluster_name_on_cloud: str,
|
577
598
|
ports: List[str],
|
@@ -876,6 +876,22 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
876
876
|
pod_spec_copy['spec']['tolerations'] = existing_tolerations + [
|
877
877
|
tpu_toleration
|
878
878
|
]
|
879
|
+
# Add GPU toleration if GPU is requested.
|
880
|
+
# The nodes provisioned by DWS with flex start with queued provisioning
|
881
|
+
# mode have the GPU taint, so we have to add the GPU toleration.
|
882
|
+
# No need to check if DWS is enabled here since this has no side effect
|
883
|
+
# to the non-DWS case.
|
884
|
+
if needs_gpus:
|
885
|
+
gpu_toleration = {
|
886
|
+
'key': kubernetes_utils.get_gpu_resource_key(),
|
887
|
+
'operator': 'Exists',
|
888
|
+
'effect': 'NoSchedule'
|
889
|
+
}
|
890
|
+
# Preserve existing tolerations if any
|
891
|
+
existing_tolerations = pod_spec_copy['spec'].get('tolerations', [])
|
892
|
+
pod_spec_copy['spec']['tolerations'] = existing_tolerations + [
|
893
|
+
gpu_toleration
|
894
|
+
]
|
879
895
|
|
880
896
|
if to_create_deployment:
|
881
897
|
volume.create_persistent_volume_claim(namespace, context, pvc_spec)
|
@@ -313,6 +313,9 @@ def get_gke_accelerator_name(accelerator: str) -> str:
|
|
313
313
|
# A100-80GB, L4, H100-80GB and H100-MEGA-80GB
|
314
314
|
# have a different name pattern.
|
315
315
|
return 'nvidia-{}'.format(accelerator.lower())
|
316
|
+
elif accelerator == 'H200':
|
317
|
+
# H200s on GCP use this label format
|
318
|
+
return 'nvidia-h200-141gb'
|
316
319
|
elif accelerator.startswith('tpu-'):
|
317
320
|
return accelerator
|
318
321
|
else:
|
@@ -451,7 +454,10 @@ class GKELabelFormatter(GPULabelFormatter):
|
|
451
454
|
|
452
455
|
e.g. tpu-v5-lite-podslice:8 -> '2x4'
|
453
456
|
"""
|
454
|
-
|
457
|
+
# If the TPU type is in the GKE_TPU_ACCELERATOR_TO_GENERATION, it means
|
458
|
+
# that it has been normalized before, no need to normalize again.
|
459
|
+
if acc_type not in GKE_TPU_ACCELERATOR_TO_GENERATION:
|
460
|
+
acc_type, acc_count = normalize_tpu_accelerator_name(acc_type)
|
455
461
|
count_to_topology = cls.GKE_TPU_TOPOLOGIES.get(acc_type,
|
456
462
|
{}).get(acc_count, None)
|
457
463
|
if count_to_topology is None:
|
@@ -1203,7 +1209,8 @@ def get_accelerator_label_key_values(
|
|
1203
1209
|
# early since we assume the cluster autoscaler will handle GPU
|
1204
1210
|
# node provisioning.
|
1205
1211
|
return None, None, None, None
|
1206
|
-
autoscaler = AUTOSCALER_TYPE_TO_AUTOSCALER.get(
|
1212
|
+
autoscaler = AUTOSCALER_TYPE_TO_AUTOSCALER.get(
|
1213
|
+
kubernetes_enums.KubernetesAutoscalerType(autoscaler_type))
|
1207
1214
|
assert autoscaler is not None, ('Unsupported autoscaler type:'
|
1208
1215
|
f' {autoscaler_type}')
|
1209
1216
|
formatter = autoscaler.label_formatter
|
sky/resources.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Resources: compute requirements of Tasks."""
|
2
2
|
import collections
|
3
3
|
import dataclasses
|
4
|
-
import math
|
5
4
|
import re
|
6
5
|
import textwrap
|
7
6
|
import typing
|
@@ -92,7 +91,7 @@ class AutostopConfig:
|
|
92
91
|
return cls(idle_minutes=config, down=False, enabled=True)
|
93
92
|
|
94
93
|
if isinstance(config, str):
|
95
|
-
return cls(idle_minutes=parse_time_minutes(config),
|
94
|
+
return cls(idle_minutes=resources_utils.parse_time_minutes(config),
|
96
95
|
down=False,
|
97
96
|
enabled=True)
|
98
97
|
|
@@ -2415,31 +2414,3 @@ def _maybe_add_docker_prefix_to_image_id(
|
|
2415
2414
|
for k, v in image_id_dict.items():
|
2416
2415
|
if not v.startswith('docker:'):
|
2417
2416
|
image_id_dict[k] = f'docker:{v}'
|
2418
|
-
|
2419
|
-
|
2420
|
-
def parse_time_minutes(time: str) -> int:
|
2421
|
-
"""Convert a time string to minutes.
|
2422
|
-
|
2423
|
-
Args:
|
2424
|
-
time: Time string with optional unit suffix (e.g., '30m', '2h', '1d')
|
2425
|
-
|
2426
|
-
Returns:
|
2427
|
-
Time in minutes as an integer
|
2428
|
-
"""
|
2429
|
-
time_str = str(time)
|
2430
|
-
|
2431
|
-
if time_str.isdecimal():
|
2432
|
-
# We assume it is already in minutes to maintain backwards
|
2433
|
-
# compatibility
|
2434
|
-
return int(time_str)
|
2435
|
-
|
2436
|
-
time_str = time_str.lower()
|
2437
|
-
for unit, multiplier in constants.TIME_UNITS.items():
|
2438
|
-
if time_str.endswith(unit):
|
2439
|
-
try:
|
2440
|
-
value = int(time_str[:-len(unit)])
|
2441
|
-
return math.ceil(value * multiplier)
|
2442
|
-
except ValueError:
|
2443
|
-
continue
|
2444
|
-
|
2445
|
-
raise ValueError(f'Invalid time format: {time}')
|
sky/server/metrics.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
"""Instrumentation for the API server."""
|
2
2
|
|
3
|
-
import asyncio
|
4
3
|
import os
|
5
4
|
import time
|
6
5
|
|
@@ -50,7 +49,7 @@ async def metrics() -> fastapi.Response:
|
|
50
49
|
headers={'Cache-Control': 'no-cache'})
|
51
50
|
|
52
51
|
|
53
|
-
def
|
52
|
+
def build_metrics_server(host: str, port: int) -> uvicorn.Server:
|
54
53
|
metrics_config = uvicorn.Config(
|
55
54
|
'sky.server.metrics:metrics_app',
|
56
55
|
host=host,
|
@@ -58,7 +57,7 @@ def run_metrics_server(host: str, port: int):
|
|
58
57
|
workers=1,
|
59
58
|
)
|
60
59
|
metrics_server_instance = uvicorn.Server(metrics_config)
|
61
|
-
|
60
|
+
return metrics_server_instance
|
62
61
|
|
63
62
|
|
64
63
|
def _get_status_code_group(status_code: int) -> str:
|
sky/server/requests/executor.py
CHANGED
@@ -399,11 +399,8 @@ def _request_execution_wrapper(request_id: str,
|
|
399
399
|
f'{common_utils.format_exception(e)}')
|
400
400
|
return
|
401
401
|
else:
|
402
|
-
|
403
|
-
|
404
|
-
request_task.status = api_requests.RequestStatus.SUCCEEDED
|
405
|
-
if not ignore_return_value:
|
406
|
-
request_task.set_return_value(return_value)
|
402
|
+
api_requests.set_request_succeeded(
|
403
|
+
request_id, return_value if not ignore_return_value else None)
|
407
404
|
_restore_output(original_stdout, original_stderr)
|
408
405
|
logger.info(f'Request {request_id} finished')
|
409
406
|
|