skypilot-nightly 1.0.0.dev20250808__py3-none-any.whl → 1.0.0.dev20250812__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +5 -2
- sky/backends/backend_utils.py +37 -6
- sky/backends/cloud_vm_ray_backend.py +41 -6
- sky/client/cli/command.py +22 -2
- sky/core.py +5 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{-DXZksWqf2waNHeU9YTQe → Fuy7OzApYTUMz2QgoP7dP}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/8056-5bdeda81199c0def.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-078751bad714c017.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-da9cc0901349c2e9.js +1 -0
- sky/dashboard/out/_next/static/chunks/{webpack-339efec49c0cc7d0.js → webpack-7fd0cf9dbecff10f.js} +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/execution.py +15 -0
- sky/global_user_state.py +102 -0
- sky/jobs/recovery_strategy.py +3 -0
- sky/jobs/server/core.py +4 -0
- sky/jobs/utils.py +9 -2
- sky/provision/__init__.py +3 -2
- sky/provision/aws/instance.py +5 -4
- sky/provision/azure/instance.py +5 -4
- sky/provision/cudo/instance.py +5 -4
- sky/provision/do/instance.py +5 -4
- sky/provision/fluidstack/instance.py +5 -4
- sky/provision/gcp/instance.py +5 -4
- sky/provision/hyperbolic/instance.py +5 -4
- sky/provision/kubernetes/instance.py +36 -6
- sky/provision/lambda_cloud/instance.py +5 -4
- sky/provision/nebius/instance.py +5 -4
- sky/provision/oci/instance.py +5 -4
- sky/provision/paperspace/instance.py +5 -4
- sky/provision/provisioner.py +6 -0
- sky/provision/runpod/instance.py +5 -4
- sky/provision/scp/instance.py +5 -5
- sky/provision/vast/instance.py +5 -5
- sky/provision/vsphere/instance.py +5 -4
- sky/schemas/db/global_user_state/001_initial_schema.py +1 -1
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/serve_state/001_initial_schema.py +1 -1
- sky/schemas/db/spot_jobs/001_initial_schema.py +1 -1
- sky/serve/serve_utils.py +37 -3
- sky/skypilot_config.py +4 -4
- sky/users/permission.py +1 -1
- sky/utils/cli_utils/status_utils.py +9 -0
- sky/utils/db/db_utils.py +22 -1
- sky/utils/db/migration_utils.py +1 -1
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/RECORD +67 -66
- sky/dashboard/out/_next/static/chunks/8056-34d27f51e6d1c631.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ae17cec0fc6483d9.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +0 -1
- /sky/dashboard/out/_next/static/{-DXZksWqf2waNHeU9YTQe → Fuy7OzApYTUMz2QgoP7dP}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250808.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/top_level.txt +0 -0
sky/dashboard/out/index.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/index-444f1804401f04ea.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra/%5Bcontext%5D-13d53fffc03ccb52.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/infra/[context]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/infra.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra-fc9222e26c8e2f0d.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/infra","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6212-7bd06f60ba693125.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/%5Bjob%5D-154f55cf8af55be5.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/[job]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6212-7bd06f60ba693125.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/pools/%5Bpool%5D-f5ccf5d39d87aebe.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/pools/[pool]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/jobs.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/users.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-7ed36e44e779d5c7.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/volumes.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1559-6c00e20454194859.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-0f886f16e0d55ff8.js" defer=""></script><script src="/dashboard/_next/static/chunks/8056-5bdeda81199c0def.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/9159-11421c0f2909236f.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-a8a8f1adba34c892.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-f72f73bcef9541dc.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-8f67be60165724cc.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/execution.py
CHANGED
|
@@ -436,9 +436,19 @@ def _execute_dag(
|
|
|
436
436
|
logger.info(ux_utils.starting_message('Syncing files.'))
|
|
437
437
|
|
|
438
438
|
if do_workdir:
|
|
439
|
+
if cluster_name is not None:
|
|
440
|
+
global_user_state.add_cluster_event(
|
|
441
|
+
cluster_name, status_lib.ClusterStatus.INIT,
|
|
442
|
+
'Syncing files to cluster',
|
|
443
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
439
444
|
backend.sync_workdir(handle, task.workdir, task.envs_and_secrets)
|
|
440
445
|
|
|
441
446
|
if do_file_mounts:
|
|
447
|
+
if cluster_name is not None:
|
|
448
|
+
global_user_state.add_cluster_event(
|
|
449
|
+
cluster_name, status_lib.ClusterStatus.UP,
|
|
450
|
+
'Syncing file mounts',
|
|
451
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
442
452
|
backend.sync_file_mounts(handle, task.file_mounts,
|
|
443
453
|
task.storage_mounts)
|
|
444
454
|
|
|
@@ -449,6 +459,11 @@ def _execute_dag(
|
|
|
449
459
|
logger.debug('Unnecessary provisioning was skipped, so '
|
|
450
460
|
'skipping setup as well.')
|
|
451
461
|
else:
|
|
462
|
+
if cluster_name is not None:
|
|
463
|
+
global_user_state.add_cluster_event(
|
|
464
|
+
cluster_name, status_lib.ClusterStatus.UP,
|
|
465
|
+
'Running setup commands to install dependencies',
|
|
466
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
452
467
|
backend.setup(handle, task, detach_setup=detach_setup)
|
|
453
468
|
|
|
454
469
|
if Stage.PRE_EXEC in stages and not dryrun:
|
sky/global_user_state.py
CHANGED
|
@@ -6,6 +6,7 @@ Concepts:
|
|
|
6
6
|
- Cluster handle: (non-user facing) an opaque backend handle for us to
|
|
7
7
|
interact with a cluster.
|
|
8
8
|
"""
|
|
9
|
+
import enum
|
|
9
10
|
import functools
|
|
10
11
|
import json
|
|
11
12
|
import os
|
|
@@ -162,6 +163,33 @@ cluster_history_table = sqlalchemy.Table(
|
|
|
162
163
|
sqlalchemy.Column('workspace', sqlalchemy.Text, server_default=None),
|
|
163
164
|
)
|
|
164
165
|
|
|
166
|
+
|
|
167
|
+
class ClusterEventType(enum.Enum):
|
|
168
|
+
"""Type of cluster event."""
|
|
169
|
+
DEBUG = 'DEBUG'
|
|
170
|
+
"""Used to denote events that are not related to cluster status."""
|
|
171
|
+
|
|
172
|
+
STATUS_CHANGE = 'STATUS_CHANGE'
|
|
173
|
+
"""Used to denote events that modify cluster status."""
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Table for cluster status change events.
|
|
177
|
+
# starting_status: Status of the cluster at the start of the event.
|
|
178
|
+
# ending_status: Status of the cluster at the end of the event.
|
|
179
|
+
# reason: Reason for the transition.
|
|
180
|
+
# transitioned_at: Timestamp of the transition.
|
|
181
|
+
cluster_event_table = sqlalchemy.Table(
|
|
182
|
+
'cluster_events',
|
|
183
|
+
Base.metadata,
|
|
184
|
+
sqlalchemy.Column('cluster_hash', sqlalchemy.Text, primary_key=True),
|
|
185
|
+
sqlalchemy.Column('name', sqlalchemy.Text),
|
|
186
|
+
sqlalchemy.Column('starting_status', sqlalchemy.Text),
|
|
187
|
+
sqlalchemy.Column('ending_status', sqlalchemy.Text),
|
|
188
|
+
sqlalchemy.Column('reason', sqlalchemy.Text, primary_key=True),
|
|
189
|
+
sqlalchemy.Column('transitioned_at', sqlalchemy.Integer, primary_key=True),
|
|
190
|
+
sqlalchemy.Column('type', sqlalchemy.Text),
|
|
191
|
+
)
|
|
192
|
+
|
|
165
193
|
ssh_key_table = sqlalchemy.Table(
|
|
166
194
|
'ssh_key',
|
|
167
195
|
Base.metadata,
|
|
@@ -612,6 +640,70 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
612
640
|
session.commit()
|
|
613
641
|
|
|
614
642
|
|
|
643
|
+
@_init_db
|
|
644
|
+
def add_cluster_event(cluster_name: str,
|
|
645
|
+
new_status: Optional[status_lib.ClusterStatus],
|
|
646
|
+
reason: str,
|
|
647
|
+
event_type: ClusterEventType,
|
|
648
|
+
nop_if_duplicate: bool = False) -> None:
|
|
649
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
650
|
+
cluster_hash = _get_hash_for_existing_cluster(cluster_name)
|
|
651
|
+
if cluster_hash is None:
|
|
652
|
+
logger.debug(f'Hash for cluster {cluster_name} not found. '
|
|
653
|
+
'Skipping event.')
|
|
654
|
+
return
|
|
655
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
656
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
657
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
658
|
+
insert_func = sqlite.insert
|
|
659
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
660
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
661
|
+
insert_func = postgresql.insert
|
|
662
|
+
else:
|
|
663
|
+
session.rollback()
|
|
664
|
+
raise ValueError('Unsupported database dialect')
|
|
665
|
+
|
|
666
|
+
cluster_row = session.query(cluster_table).filter_by(name=cluster_name)
|
|
667
|
+
last_status = cluster_row.first(
|
|
668
|
+
).status if cluster_row and cluster_row.first() is not None else None
|
|
669
|
+
if nop_if_duplicate:
|
|
670
|
+
last_event = get_last_cluster_event(cluster_hash,
|
|
671
|
+
event_type=event_type)
|
|
672
|
+
if last_event == reason:
|
|
673
|
+
return
|
|
674
|
+
try:
|
|
675
|
+
session.execute(
|
|
676
|
+
insert_func(cluster_event_table).values(
|
|
677
|
+
cluster_hash=cluster_hash,
|
|
678
|
+
name=cluster_name,
|
|
679
|
+
starting_status=last_status,
|
|
680
|
+
ending_status=new_status.value if new_status else None,
|
|
681
|
+
reason=reason,
|
|
682
|
+
transitioned_at=int(time.time()),
|
|
683
|
+
type=event_type.value,
|
|
684
|
+
))
|
|
685
|
+
session.commit()
|
|
686
|
+
except sqlalchemy.exc.IntegrityError as e:
|
|
687
|
+
if 'UNIQUE constraint failed' in str(e):
|
|
688
|
+
# This can happen if the cluster event is added twice.
|
|
689
|
+
# We can ignore this error.
|
|
690
|
+
pass
|
|
691
|
+
else:
|
|
692
|
+
raise e
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def get_last_cluster_event(cluster_hash: str,
|
|
696
|
+
event_type: ClusterEventType) -> Optional[str]:
|
|
697
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
698
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
699
|
+
row = session.query(cluster_event_table).filter_by(
|
|
700
|
+
cluster_hash=cluster_hash, type=event_type.value).order_by(
|
|
701
|
+
cluster_event_table.c.transitioned_at.desc()).first()
|
|
702
|
+
if row is None:
|
|
703
|
+
return None
|
|
704
|
+
return row.reason
|
|
705
|
+
|
|
706
|
+
|
|
615
707
|
def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
|
|
616
708
|
"""Returns the user hash or the current user hash, if user_hash is None.
|
|
617
709
|
|
|
@@ -662,6 +754,8 @@ def remove_cluster(cluster_name: str, terminate: bool) -> None:
|
|
|
662
754
|
|
|
663
755
|
if terminate:
|
|
664
756
|
session.query(cluster_table).filter_by(name=cluster_name).delete()
|
|
757
|
+
session.query(cluster_event_table).filter_by(
|
|
758
|
+
cluster_hash=cluster_hash).delete()
|
|
665
759
|
else:
|
|
666
760
|
handle = get_handle_from_cluster_name(cluster_name)
|
|
667
761
|
if handle is None:
|
|
@@ -948,6 +1042,8 @@ def get_cluster_from_name(
|
|
|
948
1042
|
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
|
949
1043
|
user = get_user(user_hash)
|
|
950
1044
|
user_name = user.name if user is not None else None
|
|
1045
|
+
last_event = get_last_cluster_event(
|
|
1046
|
+
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE)
|
|
951
1047
|
# TODO: use namedtuple instead of dict
|
|
952
1048
|
record = {
|
|
953
1049
|
'name': row.name,
|
|
@@ -971,6 +1067,7 @@ def get_cluster_from_name(
|
|
|
971
1067
|
'last_creation_yaml': row.last_creation_yaml,
|
|
972
1068
|
'last_creation_command': row.last_creation_command,
|
|
973
1069
|
'is_managed': bool(row.is_managed),
|
|
1070
|
+
'last_event': last_event,
|
|
974
1071
|
}
|
|
975
1072
|
|
|
976
1073
|
return record
|
|
@@ -987,6 +1084,8 @@ def get_clusters() -> List[Dict[str, Any]]:
|
|
|
987
1084
|
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
|
988
1085
|
user = get_user(user_hash)
|
|
989
1086
|
user_name = user.name if user is not None else None
|
|
1087
|
+
last_event = get_last_cluster_event(
|
|
1088
|
+
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE)
|
|
990
1089
|
# TODO: use namedtuple instead of dict
|
|
991
1090
|
record = {
|
|
992
1091
|
'name': row.name,
|
|
@@ -1010,6 +1109,7 @@ def get_clusters() -> List[Dict[str, Any]]:
|
|
|
1010
1109
|
'last_creation_yaml': row.last_creation_yaml,
|
|
1011
1110
|
'last_creation_command': row.last_creation_command,
|
|
1012
1111
|
'is_managed': bool(row.is_managed),
|
|
1112
|
+
'last_event': last_event,
|
|
1013
1113
|
}
|
|
1014
1114
|
|
|
1015
1115
|
records.append(record)
|
|
@@ -1130,6 +1230,8 @@ def get_clusters_from_history(
|
|
|
1130
1230
|
'workspace': workspace,
|
|
1131
1231
|
'last_creation_yaml': row.last_creation_yaml,
|
|
1132
1232
|
'last_creation_command': row.last_creation_command,
|
|
1233
|
+
'last_event': get_last_cluster_event(
|
|
1234
|
+
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE),
|
|
1133
1235
|
}
|
|
1134
1236
|
|
|
1135
1237
|
records.append(record)
|
sky/jobs/recovery_strategy.py
CHANGED
|
@@ -261,6 +261,9 @@ class StrategyExecutor:
|
|
|
261
261
|
if self.cluster_name is None:
|
|
262
262
|
return
|
|
263
263
|
if self.pool is None:
|
|
264
|
+
global_user_state.add_cluster_event(
|
|
265
|
+
self.cluster_name, None, 'Cluster was cleaned up.',
|
|
266
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
264
267
|
managed_job_utils.terminate_cluster(self.cluster_name)
|
|
265
268
|
|
|
266
269
|
def _launch(self,
|
sky/jobs/server/core.py
CHANGED
|
@@ -547,6 +547,10 @@ def _maybe_restart_controller(
|
|
|
547
547
|
'controller'))
|
|
548
548
|
with skypilot_config.local_active_workspace_ctx(
|
|
549
549
|
skylet_constants.SKYPILOT_DEFAULT_WORKSPACE):
|
|
550
|
+
global_user_state.add_cluster_event(
|
|
551
|
+
jobs_controller_type.value.cluster_name,
|
|
552
|
+
status_lib.ClusterStatus.INIT, 'Jobs controller restarted.',
|
|
553
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
550
554
|
handle = core.start(
|
|
551
555
|
cluster_name=jobs_controller_type.value.cluster_name)
|
|
552
556
|
|
sky/jobs/utils.py
CHANGED
|
@@ -141,7 +141,7 @@ def _validate_consolidation_mode_config(
|
|
|
141
141
|
if global_user_state.get_cluster_from_name(controller_cn) is not None:
|
|
142
142
|
with ux_utils.print_exception_no_traceback():
|
|
143
143
|
raise exceptions.InconsistentConsolidationModeError(
|
|
144
|
-
f'{colorama.Fore.RED}Consolidation mode is '
|
|
144
|
+
f'{colorama.Fore.RED}Consolidation mode for jobs is '
|
|
145
145
|
f'enabled, but the controller cluster '
|
|
146
146
|
f'{controller_cn} is still running. Please '
|
|
147
147
|
'terminate the controller cluster first.'
|
|
@@ -179,7 +179,11 @@ def _validate_consolidation_mode_config(
|
|
|
179
179
|
def is_consolidation_mode() -> bool:
|
|
180
180
|
consolidation_mode = skypilot_config.get_nested(
|
|
181
181
|
('jobs', 'controller', 'consolidation_mode'), default_value=False)
|
|
182
|
-
|
|
182
|
+
# We should only do this check on API server, as the controller will not
|
|
183
|
+
# have related config and will always seemingly disabled for consolidation
|
|
184
|
+
# mode. Check #6611 for more details.
|
|
185
|
+
if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
|
|
186
|
+
_validate_consolidation_mode_config(consolidation_mode)
|
|
183
187
|
return consolidation_mode
|
|
184
188
|
|
|
185
189
|
|
|
@@ -333,6 +337,9 @@ def update_managed_jobs_statuses(job_id: Optional[int] = None):
|
|
|
333
337
|
if handle is not None:
|
|
334
338
|
try:
|
|
335
339
|
if pool is None:
|
|
340
|
+
global_user_state.add_cluster_event(
|
|
341
|
+
cluster_name, None, 'Cluster was cleaned up.',
|
|
342
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
336
343
|
terminate_cluster(cluster_name)
|
|
337
344
|
except Exception as e: # pylint: disable=broad-except
|
|
338
345
|
error_msg = (
|
sky/provision/__init__.py
CHANGED
|
@@ -76,10 +76,11 @@ def query_instances(
|
|
|
76
76
|
cluster_name_on_cloud: str,
|
|
77
77
|
provider_config: Optional[Dict[str, Any]] = None,
|
|
78
78
|
non_terminated_only: bool = True,
|
|
79
|
-
) -> Dict[str, Optional['status_lib.ClusterStatus']]:
|
|
79
|
+
) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
|
|
80
80
|
"""Query instances.
|
|
81
81
|
|
|
82
|
-
Returns a dictionary of instance IDs and status
|
|
82
|
+
Returns a dictionary of instance IDs and a tuple of (status, reason for
|
|
83
|
+
being in status if any).
|
|
83
84
|
|
|
84
85
|
A None status means the instance is marked as "terminated"
|
|
85
86
|
or "terminating".
|
sky/provision/aws/instance.py
CHANGED
|
@@ -10,7 +10,7 @@ from multiprocessing import pool
|
|
|
10
10
|
import re
|
|
11
11
|
import time
|
|
12
12
|
import typing
|
|
13
|
-
from typing import Any, Callable, Dict, List, Optional, Set, TypeVar
|
|
13
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypeVar
|
|
14
14
|
|
|
15
15
|
from sky import sky_logging
|
|
16
16
|
from sky.adaptors import aws
|
|
@@ -588,7 +588,7 @@ def query_instances(
|
|
|
588
588
|
cluster_name_on_cloud: str,
|
|
589
589
|
provider_config: Optional[Dict[str, Any]] = None,
|
|
590
590
|
non_terminated_only: bool = True,
|
|
591
|
-
) -> Dict[str, Optional[status_lib.ClusterStatus]]:
|
|
591
|
+
) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
|
|
592
592
|
"""See sky/provision/__init__.py"""
|
|
593
593
|
assert provider_config is not None, (cluster_name_on_cloud, provider_config)
|
|
594
594
|
region = provider_config['region']
|
|
@@ -608,12 +608,13 @@ def query_instances(
|
|
|
608
608
|
'shutting-down': None,
|
|
609
609
|
'terminated': None,
|
|
610
610
|
}
|
|
611
|
-
statuses
|
|
611
|
+
statuses: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
|
|
612
|
+
Optional[str]]] = {}
|
|
612
613
|
for inst in instances:
|
|
613
614
|
status = status_map[inst.state['Name']]
|
|
614
615
|
if non_terminated_only and status is None:
|
|
615
616
|
continue
|
|
616
|
-
statuses[inst.id] = status
|
|
617
|
+
statuses[inst.id] = (status, None)
|
|
617
618
|
return statuses
|
|
618
619
|
|
|
619
620
|
|
sky/provision/azure/instance.py
CHANGED
|
@@ -955,7 +955,7 @@ def query_instances(
|
|
|
955
955
|
cluster_name_on_cloud: str,
|
|
956
956
|
provider_config: Optional[Dict[str, Any]] = None,
|
|
957
957
|
non_terminated_only: bool = True,
|
|
958
|
-
) -> Dict[str, Optional[status_lib.ClusterStatus]]:
|
|
958
|
+
) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
|
|
959
959
|
"""See sky/provision/__init__.py"""
|
|
960
960
|
assert provider_config is not None, cluster_name_on_cloud
|
|
961
961
|
|
|
@@ -964,7 +964,8 @@ def query_instances(
|
|
|
964
964
|
filters = {constants.TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud}
|
|
965
965
|
compute_client = azure.get_client('compute', subscription_id)
|
|
966
966
|
nodes = _filter_instances(compute_client, resource_group, filters)
|
|
967
|
-
statuses: Dict[str, Optional[status_lib.ClusterStatus]
|
|
967
|
+
statuses: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
|
|
968
|
+
Optional[str]]] = {}
|
|
968
969
|
|
|
969
970
|
def _fetch_and_map_status(node, resource_group: str) -> None:
|
|
970
971
|
compute_client = azure.get_client('compute', subscription_id)
|
|
@@ -972,8 +973,8 @@ def query_instances(
|
|
|
972
973
|
|
|
973
974
|
if status is None and non_terminated_only:
|
|
974
975
|
return
|
|
975
|
-
statuses[node.name] = (None if status is None else
|
|
976
|
-
|
|
976
|
+
statuses[node.name] = ((None if status is None else
|
|
977
|
+
status.to_cluster_status()), None)
|
|
977
978
|
|
|
978
979
|
with pool.ThreadPool() as p:
|
|
979
980
|
p.starmap(_fetch_and_map_status,
|
sky/provision/cudo/instance.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Cudo Compute instance provisioning."""
|
|
2
2
|
|
|
3
3
|
import time
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
5
|
|
|
6
6
|
from sky import sky_logging
|
|
7
7
|
from sky.provision import common
|
|
@@ -194,7 +194,7 @@ def query_instances(
|
|
|
194
194
|
cluster_name_on_cloud: str,
|
|
195
195
|
provider_config: Optional[Dict[str, Any]] = None,
|
|
196
196
|
non_terminated_only: bool = True,
|
|
197
|
-
) -> Dict[str, Optional[status_lib.ClusterStatus]]:
|
|
197
|
+
) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
|
|
198
198
|
"""See sky/provision/__init__.py"""
|
|
199
199
|
assert provider_config is not None, (cluster_name_on_cloud, provider_config)
|
|
200
200
|
instances = _filter_instances(cluster_name_on_cloud, None)
|
|
@@ -210,12 +210,13 @@ def query_instances(
|
|
|
210
210
|
'done': status_lib.ClusterStatus.STOPPED,
|
|
211
211
|
'poff': status_lib.ClusterStatus.STOPPED,
|
|
212
212
|
}
|
|
213
|
-
statuses: Dict[str, Optional[status_lib.ClusterStatus]
|
|
213
|
+
statuses: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
|
|
214
|
+
Optional[str]]] = {}
|
|
214
215
|
for inst_id, inst in instances.items():
|
|
215
216
|
status = status_map[inst['status']]
|
|
216
217
|
if non_terminated_only and status is None:
|
|
217
218
|
continue
|
|
218
|
-
statuses[inst_id] = status
|
|
219
|
+
statuses[inst_id] = (status, None)
|
|
219
220
|
return statuses
|
|
220
221
|
|
|
221
222
|
|
sky/provision/do/instance.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""DigitalOcean instance provisioning."""
|
|
2
2
|
|
|
3
3
|
import time
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
5
|
import uuid
|
|
6
6
|
|
|
7
7
|
from sky import sky_logging
|
|
@@ -245,7 +245,7 @@ def query_instances(
|
|
|
245
245
|
cluster_name_on_cloud: str,
|
|
246
246
|
provider_config: Optional[Dict[str, Any]] = None,
|
|
247
247
|
non_terminated_only: bool = True,
|
|
248
|
-
) -> Dict[str, Optional[status_lib.ClusterStatus]]:
|
|
248
|
+
) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
|
|
249
249
|
"""See sky/provision/__init__.py"""
|
|
250
250
|
# terminated instances are not retrieved by the
|
|
251
251
|
# API making `non_terminated_only` argument moot.
|
|
@@ -260,10 +260,11 @@ def query_instances(
|
|
|
260
260
|
'active': status_lib.ClusterStatus.UP,
|
|
261
261
|
'off': status_lib.ClusterStatus.STOPPED,
|
|
262
262
|
}
|
|
263
|
-
statuses: Dict[str, Optional[status_lib.ClusterStatus]
|
|
263
|
+
statuses: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
|
|
264
|
+
Optional[str]]] = {}
|
|
264
265
|
for instance_meta in instances.values():
|
|
265
266
|
status = status_map[instance_meta['status']]
|
|
266
|
-
statuses[instance_meta['name']] = status
|
|
267
|
+
statuses[instance_meta['name']] = (status, None)
|
|
267
268
|
return statuses
|
|
268
269
|
|
|
269
270
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""FluidStack instance provisioning."""
|
|
2
2
|
import os
|
|
3
3
|
import time
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
5
|
|
|
6
6
|
from sky import authentication as auth
|
|
7
7
|
from sky import exceptions
|
|
@@ -290,7 +290,7 @@ def query_instances(
|
|
|
290
290
|
cluster_name_on_cloud: str,
|
|
291
291
|
provider_config: Optional[Dict[str, Any]] = None,
|
|
292
292
|
non_terminated_only: bool = True,
|
|
293
|
-
) -> Dict[str, Optional[status_lib.ClusterStatus]]:
|
|
293
|
+
) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
|
|
294
294
|
"""See sky/provision/__init__.py"""
|
|
295
295
|
assert provider_config is not None, (cluster_name_on_cloud, provider_config)
|
|
296
296
|
instances = _filter_instances(cluster_name_on_cloud, None)
|
|
@@ -302,7 +302,8 @@ def query_instances(
|
|
|
302
302
|
'failed': status_lib.ClusterStatus.INIT,
|
|
303
303
|
'terminated': None,
|
|
304
304
|
}
|
|
305
|
-
statuses: Dict[str, Optional[status_lib.ClusterStatus]
|
|
305
|
+
statuses: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
|
|
306
|
+
Optional[str]]] = {}
|
|
306
307
|
for inst_id, inst in instances.items():
|
|
307
308
|
if inst['status'] not in status_map:
|
|
308
309
|
with ux_utils.print_exception_no_traceback():
|
|
@@ -311,7 +312,7 @@ def query_instances(
|
|
|
311
312
|
status = status_map.get(inst['status'], None)
|
|
312
313
|
if non_terminated_only and status is None:
|
|
313
314
|
continue
|
|
314
|
-
statuses[inst_id] = status
|
|
315
|
+
statuses[inst_id] = (status, None)
|
|
315
316
|
return statuses
|
|
316
317
|
|
|
317
318
|
|