skypilot-nightly 1.0.0.dev20250807__py3-none-any.whl → 1.0.0.dev20250812__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +5 -2
- sky/backends/backend_utils.py +57 -7
- sky/backends/cloud_vm_ray_backend.py +50 -8
- sky/client/cli/command.py +60 -26
- sky/client/sdk.py +132 -65
- sky/client/sdk_async.py +1 -1
- sky/core.py +10 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/{YAirOGsV1z6B2RJ0VIUmD → Fuy7OzApYTUMz2QgoP7dP}/_buildManifest.js +1 -1
- sky/dashboard/out/_next/static/chunks/{6601-3e21152fe16da09c.js → 6601-06114c982db410b6.js} +1 -1
- sky/dashboard/out/_next/static/chunks/8056-5bdeda81199c0def.js +1 -0
- sky/dashboard/out/_next/static/chunks/{8969-318c3dca725e8e5d.js → 8969-c9686994ddafcf01.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{_app-1e6de35d15a8d432.js → _app-491a4d699d95e808.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-078751bad714c017.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-da9cc0901349c2e9.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-7fd0cf9dbecff10f.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/execution.py +21 -4
- sky/global_user_state.py +110 -1
- sky/jobs/client/sdk.py +27 -20
- sky/jobs/controller.py +2 -1
- sky/jobs/recovery_strategy.py +3 -0
- sky/jobs/server/core.py +4 -0
- sky/jobs/utils.py +9 -2
- sky/provision/__init__.py +3 -2
- sky/provision/aws/instance.py +5 -4
- sky/provision/azure/instance.py +5 -4
- sky/provision/cudo/instance.py +5 -4
- sky/provision/do/instance.py +5 -4
- sky/provision/fluidstack/instance.py +5 -4
- sky/provision/gcp/instance.py +5 -4
- sky/provision/hyperbolic/instance.py +5 -4
- sky/provision/kubernetes/instance.py +36 -6
- sky/provision/lambda_cloud/instance.py +5 -4
- sky/provision/nebius/instance.py +5 -4
- sky/provision/oci/instance.py +5 -4
- sky/provision/paperspace/instance.py +5 -4
- sky/provision/provisioner.py +6 -0
- sky/provision/runpod/instance.py +5 -4
- sky/provision/scp/instance.py +5 -5
- sky/provision/vast/instance.py +5 -5
- sky/provision/vsphere/instance.py +5 -4
- sky/schemas/db/global_user_state/001_initial_schema.py +1 -1
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +1 -1
- sky/serve/client/impl.py +11 -8
- sky/serve/client/sdk.py +7 -7
- sky/serve/serve_state.py +437 -340
- sky/serve/serve_utils.py +37 -3
- sky/serve/server/impl.py +2 -2
- sky/server/common.py +12 -8
- sky/server/constants.py +1 -1
- sky/setup_files/alembic.ini +4 -0
- sky/skypilot_config.py +4 -4
- sky/users/permission.py +1 -1
- sky/utils/cli_utils/status_utils.py +10 -1
- sky/utils/db/db_utils.py +53 -1
- sky/utils/db/migration_utils.py +5 -1
- sky/utils/kubernetes/deploy_remote_cluster.py +3 -1
- sky/utils/resource_checker.py +162 -21
- sky/volumes/client/sdk.py +4 -4
- sky/workspaces/core.py +210 -6
- {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/METADATA +2 -2
- {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/RECORD +87 -83
- sky/dashboard/out/_next/static/chunks/8056-019615038d6ce427.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6fd1d2d8441aa54b.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-76efbdad99742559.js +0 -1
- /sky/dashboard/out/_next/static/{YAirOGsV1z6B2RJ0VIUmD → Fuy7OzApYTUMz2QgoP7dP}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/top_level.txt +0 -0
sky/dashboard/out/index.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/index-444f1804401f04ea.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra/%5Bcontext%5D-13d53fffc03ccb52.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/infra/[context]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/infra.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/infra-fc9222e26c8e2f0d.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/infra","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6212-7bd06f60ba693125.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/%5Bjob%5D-154f55cf8af55be5.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/[job]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6212-7bd06f60ba693125.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/pools/%5Bpool%5D-f5ccf5d39d87aebe.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/pools/[pool]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/jobs.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/users.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-7ed36e44e779d5c7.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/volumes.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1559-6c00e20454194859.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-0f886f16e0d55ff8.js" defer=""></script><script src="/dashboard/_next/static/chunks/8056-5bdeda81199c0def.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/9159-11421c0f2909236f.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-a8a8f1adba34c892.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-f72f73bcef9541dc.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-7fd0cf9dbecff10f.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-491a4d699d95e808.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-8f67be60165724cc.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Fuy7OzApYTUMz2QgoP7dP/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"Fuy7OzApYTUMz2QgoP7dP","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/execution.py
CHANGED
|
@@ -353,12 +353,13 @@ def _execute_dag(
|
|
|
353
353
|
task = _maybe_clone_disk_from_cluster(clone_disk_from, cluster_name,
|
|
354
354
|
task)
|
|
355
355
|
|
|
356
|
+
is_managed = (_is_launched_by_jobs_controller or
|
|
357
|
+
_is_launched_by_sky_serve_controller)
|
|
358
|
+
|
|
356
359
|
if not cluster_exists:
|
|
357
360
|
# If spot is launched on serve or jobs controller, we don't need to
|
|
358
361
|
# print out the hint.
|
|
359
|
-
if (Stage.PROVISION in stages and task.use_spot and
|
|
360
|
-
not _is_launched_by_jobs_controller and
|
|
361
|
-
not _is_launched_by_sky_serve_controller):
|
|
362
|
+
if (Stage.PROVISION in stages and task.use_spot and not is_managed):
|
|
362
363
|
yellow = colorama.Fore.YELLOW
|
|
363
364
|
bold = colorama.Style.BRIGHT
|
|
364
365
|
reset = colorama.Style.RESET_ALL
|
|
@@ -397,7 +398,8 @@ def _execute_dag(
|
|
|
397
398
|
# That's because we want to do commands in task.setup and task.run again
|
|
398
399
|
# after K8S pod recovers from a crash.
|
|
399
400
|
# See `kubernetes-ray.yml.j2` for more details.
|
|
400
|
-
dump_final_script=is_controller_high_availability_supported
|
|
401
|
+
dump_final_script=is_controller_high_availability_supported,
|
|
402
|
+
is_managed=is_managed)
|
|
401
403
|
|
|
402
404
|
if task.storage_mounts is not None:
|
|
403
405
|
# Optimizer should eventually choose where to store bucket
|
|
@@ -434,9 +436,19 @@ def _execute_dag(
|
|
|
434
436
|
logger.info(ux_utils.starting_message('Syncing files.'))
|
|
435
437
|
|
|
436
438
|
if do_workdir:
|
|
439
|
+
if cluster_name is not None:
|
|
440
|
+
global_user_state.add_cluster_event(
|
|
441
|
+
cluster_name, status_lib.ClusterStatus.INIT,
|
|
442
|
+
'Syncing files to cluster',
|
|
443
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
437
444
|
backend.sync_workdir(handle, task.workdir, task.envs_and_secrets)
|
|
438
445
|
|
|
439
446
|
if do_file_mounts:
|
|
447
|
+
if cluster_name is not None:
|
|
448
|
+
global_user_state.add_cluster_event(
|
|
449
|
+
cluster_name, status_lib.ClusterStatus.UP,
|
|
450
|
+
'Syncing file mounts',
|
|
451
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
440
452
|
backend.sync_file_mounts(handle, task.file_mounts,
|
|
441
453
|
task.storage_mounts)
|
|
442
454
|
|
|
@@ -447,6 +459,11 @@ def _execute_dag(
|
|
|
447
459
|
logger.debug('Unnecessary provisioning was skipped, so '
|
|
448
460
|
'skipping setup as well.')
|
|
449
461
|
else:
|
|
462
|
+
if cluster_name is not None:
|
|
463
|
+
global_user_state.add_cluster_event(
|
|
464
|
+
cluster_name, status_lib.ClusterStatus.UP,
|
|
465
|
+
'Running setup commands to install dependencies',
|
|
466
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
450
467
|
backend.setup(handle, task, detach_setup=detach_setup)
|
|
451
468
|
|
|
452
469
|
if Stage.PRE_EXEC in stages and not dryrun:
|
sky/global_user_state.py
CHANGED
|
@@ -6,6 +6,7 @@ Concepts:
|
|
|
6
6
|
- Cluster handle: (non-user facing) an opaque backend handle for us to
|
|
7
7
|
interact with a cluster.
|
|
8
8
|
"""
|
|
9
|
+
import enum
|
|
9
10
|
import functools
|
|
10
11
|
import json
|
|
11
12
|
import os
|
|
@@ -100,6 +101,7 @@ cluster_table = sqlalchemy.Table(
|
|
|
100
101
|
sqlalchemy.Column('last_creation_command',
|
|
101
102
|
sqlalchemy.Text,
|
|
102
103
|
server_default=None),
|
|
104
|
+
sqlalchemy.Column('is_managed', sqlalchemy.Integer, server_default='0'),
|
|
103
105
|
)
|
|
104
106
|
|
|
105
107
|
storage_table = sqlalchemy.Table(
|
|
@@ -161,6 +163,33 @@ cluster_history_table = sqlalchemy.Table(
|
|
|
161
163
|
sqlalchemy.Column('workspace', sqlalchemy.Text, server_default=None),
|
|
162
164
|
)
|
|
163
165
|
|
|
166
|
+
|
|
167
|
+
class ClusterEventType(enum.Enum):
|
|
168
|
+
"""Type of cluster event."""
|
|
169
|
+
DEBUG = 'DEBUG'
|
|
170
|
+
"""Used to denote events that are not related to cluster status."""
|
|
171
|
+
|
|
172
|
+
STATUS_CHANGE = 'STATUS_CHANGE'
|
|
173
|
+
"""Used to denote events that modify cluster status."""
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Table for cluster status change events.
|
|
177
|
+
# starting_status: Status of the cluster at the start of the event.
|
|
178
|
+
# ending_status: Status of the cluster at the end of the event.
|
|
179
|
+
# reason: Reason for the transition.
|
|
180
|
+
# transitioned_at: Timestamp of the transition.
|
|
181
|
+
cluster_event_table = sqlalchemy.Table(
|
|
182
|
+
'cluster_events',
|
|
183
|
+
Base.metadata,
|
|
184
|
+
sqlalchemy.Column('cluster_hash', sqlalchemy.Text, primary_key=True),
|
|
185
|
+
sqlalchemy.Column('name', sqlalchemy.Text),
|
|
186
|
+
sqlalchemy.Column('starting_status', sqlalchemy.Text),
|
|
187
|
+
sqlalchemy.Column('ending_status', sqlalchemy.Text),
|
|
188
|
+
sqlalchemy.Column('reason', sqlalchemy.Text, primary_key=True),
|
|
189
|
+
sqlalchemy.Column('transitioned_at', sqlalchemy.Integer, primary_key=True),
|
|
190
|
+
sqlalchemy.Column('type', sqlalchemy.Text),
|
|
191
|
+
)
|
|
192
|
+
|
|
164
193
|
ssh_key_table = sqlalchemy.Table(
|
|
165
194
|
'ssh_key',
|
|
166
195
|
Base.metadata,
|
|
@@ -428,7 +457,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
428
457
|
ready: bool,
|
|
429
458
|
is_launch: bool = True,
|
|
430
459
|
config_hash: Optional[str] = None,
|
|
431
|
-
task_config: Optional[Dict[str, Any]] = None
|
|
460
|
+
task_config: Optional[Dict[str, Any]] = None,
|
|
461
|
+
is_managed: bool = False):
|
|
432
462
|
"""Adds or updates cluster_name -> cluster_handle mapping.
|
|
433
463
|
|
|
434
464
|
Args:
|
|
@@ -441,6 +471,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
441
471
|
and last_use will be updated. Otherwise, use the old value.
|
|
442
472
|
config_hash: Configuration hash for the cluster.
|
|
443
473
|
task_config: The config of the task being launched.
|
|
474
|
+
is_managed: Whether the cluster is launched by the
|
|
475
|
+
controller.
|
|
444
476
|
"""
|
|
445
477
|
assert _SQLALCHEMY_ENGINE is not None
|
|
446
478
|
# FIXME: launched_at will be changed when `sky launch -c` is called.
|
|
@@ -544,6 +576,7 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
544
576
|
cluster_hash=cluster_hash,
|
|
545
577
|
# set storage_mounts_metadata to server default (null)
|
|
546
578
|
status_updated_at=status_updated_at,
|
|
579
|
+
is_managed=int(is_managed),
|
|
547
580
|
)
|
|
548
581
|
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
|
549
582
|
index_elements=[cluster_table.c.name],
|
|
@@ -607,6 +640,70 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
607
640
|
session.commit()
|
|
608
641
|
|
|
609
642
|
|
|
643
|
+
@_init_db
|
|
644
|
+
def add_cluster_event(cluster_name: str,
|
|
645
|
+
new_status: Optional[status_lib.ClusterStatus],
|
|
646
|
+
reason: str,
|
|
647
|
+
event_type: ClusterEventType,
|
|
648
|
+
nop_if_duplicate: bool = False) -> None:
|
|
649
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
650
|
+
cluster_hash = _get_hash_for_existing_cluster(cluster_name)
|
|
651
|
+
if cluster_hash is None:
|
|
652
|
+
logger.debug(f'Hash for cluster {cluster_name} not found. '
|
|
653
|
+
'Skipping event.')
|
|
654
|
+
return
|
|
655
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
656
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
657
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
658
|
+
insert_func = sqlite.insert
|
|
659
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
660
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
661
|
+
insert_func = postgresql.insert
|
|
662
|
+
else:
|
|
663
|
+
session.rollback()
|
|
664
|
+
raise ValueError('Unsupported database dialect')
|
|
665
|
+
|
|
666
|
+
cluster_row = session.query(cluster_table).filter_by(name=cluster_name)
|
|
667
|
+
last_status = cluster_row.first(
|
|
668
|
+
).status if cluster_row and cluster_row.first() is not None else None
|
|
669
|
+
if nop_if_duplicate:
|
|
670
|
+
last_event = get_last_cluster_event(cluster_hash,
|
|
671
|
+
event_type=event_type)
|
|
672
|
+
if last_event == reason:
|
|
673
|
+
return
|
|
674
|
+
try:
|
|
675
|
+
session.execute(
|
|
676
|
+
insert_func(cluster_event_table).values(
|
|
677
|
+
cluster_hash=cluster_hash,
|
|
678
|
+
name=cluster_name,
|
|
679
|
+
starting_status=last_status,
|
|
680
|
+
ending_status=new_status.value if new_status else None,
|
|
681
|
+
reason=reason,
|
|
682
|
+
transitioned_at=int(time.time()),
|
|
683
|
+
type=event_type.value,
|
|
684
|
+
))
|
|
685
|
+
session.commit()
|
|
686
|
+
except sqlalchemy.exc.IntegrityError as e:
|
|
687
|
+
if 'UNIQUE constraint failed' in str(e):
|
|
688
|
+
# This can happen if the cluster event is added twice.
|
|
689
|
+
# We can ignore this error.
|
|
690
|
+
pass
|
|
691
|
+
else:
|
|
692
|
+
raise e
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def get_last_cluster_event(cluster_hash: str,
|
|
696
|
+
event_type: ClusterEventType) -> Optional[str]:
|
|
697
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
698
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
699
|
+
row = session.query(cluster_event_table).filter_by(
|
|
700
|
+
cluster_hash=cluster_hash, type=event_type.value).order_by(
|
|
701
|
+
cluster_event_table.c.transitioned_at.desc()).first()
|
|
702
|
+
if row is None:
|
|
703
|
+
return None
|
|
704
|
+
return row.reason
|
|
705
|
+
|
|
706
|
+
|
|
610
707
|
def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
|
|
611
708
|
"""Returns the user hash or the current user hash, if user_hash is None.
|
|
612
709
|
|
|
@@ -657,6 +754,8 @@ def remove_cluster(cluster_name: str, terminate: bool) -> None:
|
|
|
657
754
|
|
|
658
755
|
if terminate:
|
|
659
756
|
session.query(cluster_table).filter_by(name=cluster_name).delete()
|
|
757
|
+
session.query(cluster_event_table).filter_by(
|
|
758
|
+
cluster_hash=cluster_hash).delete()
|
|
660
759
|
else:
|
|
661
760
|
handle = get_handle_from_cluster_name(cluster_name)
|
|
662
761
|
if handle is None:
|
|
@@ -943,6 +1042,8 @@ def get_cluster_from_name(
|
|
|
943
1042
|
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
|
944
1043
|
user = get_user(user_hash)
|
|
945
1044
|
user_name = user.name if user is not None else None
|
|
1045
|
+
last_event = get_last_cluster_event(
|
|
1046
|
+
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE)
|
|
946
1047
|
# TODO: use namedtuple instead of dict
|
|
947
1048
|
record = {
|
|
948
1049
|
'name': row.name,
|
|
@@ -965,6 +1066,8 @@ def get_cluster_from_name(
|
|
|
965
1066
|
'workspace': row.workspace,
|
|
966
1067
|
'last_creation_yaml': row.last_creation_yaml,
|
|
967
1068
|
'last_creation_command': row.last_creation_command,
|
|
1069
|
+
'is_managed': bool(row.is_managed),
|
|
1070
|
+
'last_event': last_event,
|
|
968
1071
|
}
|
|
969
1072
|
|
|
970
1073
|
return record
|
|
@@ -981,6 +1084,8 @@ def get_clusters() -> List[Dict[str, Any]]:
|
|
|
981
1084
|
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
|
982
1085
|
user = get_user(user_hash)
|
|
983
1086
|
user_name = user.name if user is not None else None
|
|
1087
|
+
last_event = get_last_cluster_event(
|
|
1088
|
+
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE)
|
|
984
1089
|
# TODO: use namedtuple instead of dict
|
|
985
1090
|
record = {
|
|
986
1091
|
'name': row.name,
|
|
@@ -1003,6 +1108,8 @@ def get_clusters() -> List[Dict[str, Any]]:
|
|
|
1003
1108
|
'workspace': row.workspace,
|
|
1004
1109
|
'last_creation_yaml': row.last_creation_yaml,
|
|
1005
1110
|
'last_creation_command': row.last_creation_command,
|
|
1111
|
+
'is_managed': bool(row.is_managed),
|
|
1112
|
+
'last_event': last_event,
|
|
1006
1113
|
}
|
|
1007
1114
|
|
|
1008
1115
|
records.append(record)
|
|
@@ -1123,6 +1230,8 @@ def get_clusters_from_history(
|
|
|
1123
1230
|
'workspace': workspace,
|
|
1124
1231
|
'last_creation_yaml': row.last_creation_yaml,
|
|
1125
1232
|
'last_creation_command': row.last_creation_command,
|
|
1233
|
+
'last_event': get_last_cluster_event(
|
|
1234
|
+
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE),
|
|
1126
1235
|
}
|
|
1127
1236
|
|
|
1128
1237
|
records.append(record)
|
sky/jobs/client/sdk.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""SDK functions for managed jobs."""
|
|
2
2
|
import json
|
|
3
3
|
import typing
|
|
4
|
-
from typing import Dict, List, Optional, Sequence, Union
|
|
4
|
+
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
|
5
5
|
|
|
6
6
|
import click
|
|
7
7
|
|
|
@@ -26,6 +26,7 @@ if typing.TYPE_CHECKING:
|
|
|
26
26
|
import webbrowser
|
|
27
27
|
|
|
28
28
|
import sky
|
|
29
|
+
from sky import backends
|
|
29
30
|
from sky.serve import serve_utils
|
|
30
31
|
else:
|
|
31
32
|
# only used in dashboard()
|
|
@@ -45,7 +46,8 @@ def launch(
|
|
|
45
46
|
# Internal only:
|
|
46
47
|
# pylint: disable=invalid-name
|
|
47
48
|
_need_confirmation: bool = False,
|
|
48
|
-
) -> server_common.RequestId
|
|
49
|
+
) -> server_common.RequestId[Tuple[Optional[int],
|
|
50
|
+
Optional['backends.ResourceHandle']]]:
|
|
49
51
|
"""Launches a managed job.
|
|
50
52
|
|
|
51
53
|
Please refer to sky.cli.job_launch for documentation.
|
|
@@ -86,11 +88,11 @@ def launch(
|
|
|
86
88
|
if _need_confirmation:
|
|
87
89
|
job_identity = 'a managed job'
|
|
88
90
|
if pool is None:
|
|
89
|
-
|
|
90
|
-
sdk.stream_and_get(
|
|
91
|
+
optimize_request_id = sdk.optimize(dag)
|
|
92
|
+
sdk.stream_and_get(optimize_request_id)
|
|
91
93
|
else:
|
|
92
|
-
|
|
93
|
-
pool_statuses = sdk.get(
|
|
94
|
+
pool_status_request_id = pool_status(pool)
|
|
95
|
+
pool_statuses = sdk.get(pool_status_request_id)
|
|
94
96
|
if not pool_statuses:
|
|
95
97
|
raise click.UsageError(f'Pool {pool!r} not found.')
|
|
96
98
|
resources = pool_statuses[0]['requested_resources_str']
|
|
@@ -123,10 +125,12 @@ def launch(
|
|
|
123
125
|
|
|
124
126
|
@usage_lib.entrypoint
|
|
125
127
|
@server_common.check_server_healthy_or_start
|
|
126
|
-
def queue(
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
128
|
+
def queue(
|
|
129
|
+
refresh: bool,
|
|
130
|
+
skip_finished: bool = False,
|
|
131
|
+
all_users: bool = False,
|
|
132
|
+
job_ids: Optional[List[int]] = None
|
|
133
|
+
) -> server_common.RequestId[List[Dict[str, Any]]]:
|
|
130
134
|
"""Gets statuses of managed jobs.
|
|
131
135
|
|
|
132
136
|
Please refer to sky.cli.job_queue for documentation.
|
|
@@ -190,7 +194,7 @@ def cancel(
|
|
|
190
194
|
all: bool = False, # pylint: disable=redefined-builtin
|
|
191
195
|
all_users: bool = False,
|
|
192
196
|
pool: Optional[str] = None,
|
|
193
|
-
) -> server_common.RequestId:
|
|
197
|
+
) -> server_common.RequestId[None]:
|
|
194
198
|
"""Cancels managed jobs.
|
|
195
199
|
|
|
196
200
|
Please refer to sky.cli.job_cancel for documentation.
|
|
@@ -278,7 +282,8 @@ def tail_logs(name: Optional[str] = None,
|
|
|
278
282
|
json=json.loads(body.model_dump_json()),
|
|
279
283
|
stream=True,
|
|
280
284
|
timeout=(5, None))
|
|
281
|
-
request_id = server_common.get_request_id(
|
|
285
|
+
request_id: server_common.RequestId[int] = server_common.get_request_id(
|
|
286
|
+
response)
|
|
282
287
|
# Log request is idempotent when tail is 0, thus can resume previous
|
|
283
288
|
# streaming point on retry.
|
|
284
289
|
return sdk.stream_response(request_id=request_id,
|
|
@@ -326,12 +331,13 @@ def download_logs(
|
|
|
326
331
|
'/jobs/download_logs',
|
|
327
332
|
json=json.loads(body.model_dump_json()),
|
|
328
333
|
timeout=(5, None))
|
|
329
|
-
|
|
330
|
-
server_common.get_request_id(response)
|
|
334
|
+
request_id: server_common.RequestId[Dict[
|
|
335
|
+
str, str]] = server_common.get_request_id(response)
|
|
336
|
+
job_id_remote_path_dict = sdk.stream_and_get(request_id)
|
|
331
337
|
remote2local_path_dict = client_common.download_logs_from_api_server(
|
|
332
338
|
job_id_remote_path_dict.values())
|
|
333
339
|
return {
|
|
334
|
-
job_id: remote2local_path_dict[remote_path]
|
|
340
|
+
int(job_id): remote2local_path_dict[remote_path]
|
|
335
341
|
for job_id, remote_path in job_id_remote_path_dict.items()
|
|
336
342
|
}
|
|
337
343
|
|
|
@@ -380,7 +386,7 @@ def pool_apply(
|
|
|
380
386
|
# Internal only:
|
|
381
387
|
# pylint: disable=invalid-name
|
|
382
388
|
_need_confirmation: bool = False
|
|
383
|
-
) -> server_common.RequestId:
|
|
389
|
+
) -> server_common.RequestId[None]:
|
|
384
390
|
"""Apply a config to a pool."""
|
|
385
391
|
return impl.apply(task,
|
|
386
392
|
pool_name,
|
|
@@ -396,7 +402,7 @@ def pool_down(
|
|
|
396
402
|
pool_names: Optional[Union[str, List[str]]],
|
|
397
403
|
all: bool = False, # pylint: disable=redefined-builtin
|
|
398
404
|
purge: bool = False,
|
|
399
|
-
) -> server_common.RequestId:
|
|
405
|
+
) -> server_common.RequestId[None]:
|
|
400
406
|
"""Delete a pool."""
|
|
401
407
|
return impl.down(pool_names, all, purge, pool=True)
|
|
402
408
|
|
|
@@ -405,7 +411,8 @@ def pool_down(
|
|
|
405
411
|
@server_common.check_server_healthy_or_start
|
|
406
412
|
@versions.minimal_api_version(12)
|
|
407
413
|
def pool_status(
|
|
408
|
-
pool_names: Optional[Union[str, List[str]]],
|
|
414
|
+
pool_names: Optional[Union[str, List[str]]],
|
|
415
|
+
) -> server_common.RequestId[List[Dict[str, Any]]]:
|
|
409
416
|
"""Query a pool."""
|
|
410
417
|
return impl.status(pool_names, pool=True)
|
|
411
418
|
|
|
@@ -413,7 +420,7 @@ def pool_status(
|
|
|
413
420
|
@usage_lib.entrypoint
|
|
414
421
|
@server_common.check_server_healthy_or_start
|
|
415
422
|
@rest.retry_transient_errors()
|
|
416
|
-
@versions.minimal_api_version(
|
|
423
|
+
@versions.minimal_api_version(16)
|
|
417
424
|
def pool_tail_logs(pool_name: str,
|
|
418
425
|
target: Union[str, 'serve_utils.ServiceComponent'],
|
|
419
426
|
worker_id: Optional[int] = None,
|
|
@@ -433,7 +440,7 @@ def pool_tail_logs(pool_name: str,
|
|
|
433
440
|
@usage_lib.entrypoint
|
|
434
441
|
@server_common.check_server_healthy_or_start
|
|
435
442
|
@rest.retry_transient_errors()
|
|
436
|
-
@versions.minimal_api_version(
|
|
443
|
+
@versions.minimal_api_version(16)
|
|
437
444
|
def pool_sync_down_logs(pool_name: str,
|
|
438
445
|
local_dir: str,
|
|
439
446
|
*,
|
sky/jobs/controller.py
CHANGED
|
@@ -332,7 +332,8 @@ class JobsController:
|
|
|
332
332
|
clusters = backend_utils.get_clusters(
|
|
333
333
|
cluster_names=[cluster_name],
|
|
334
334
|
refresh=common.StatusRefreshMode.NONE,
|
|
335
|
-
all_users=True
|
|
335
|
+
all_users=True,
|
|
336
|
+
_include_is_managed=True)
|
|
336
337
|
if clusters:
|
|
337
338
|
assert len(clusters) == 1, (clusters, cluster_name)
|
|
338
339
|
handle = clusters[0].get('handle')
|
sky/jobs/recovery_strategy.py
CHANGED
|
@@ -261,6 +261,9 @@ class StrategyExecutor:
|
|
|
261
261
|
if self.cluster_name is None:
|
|
262
262
|
return
|
|
263
263
|
if self.pool is None:
|
|
264
|
+
global_user_state.add_cluster_event(
|
|
265
|
+
self.cluster_name, None, 'Cluster was cleaned up.',
|
|
266
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
264
267
|
managed_job_utils.terminate_cluster(self.cluster_name)
|
|
265
268
|
|
|
266
269
|
def _launch(self,
|
sky/jobs/server/core.py
CHANGED
|
@@ -547,6 +547,10 @@ def _maybe_restart_controller(
|
|
|
547
547
|
'controller'))
|
|
548
548
|
with skypilot_config.local_active_workspace_ctx(
|
|
549
549
|
skylet_constants.SKYPILOT_DEFAULT_WORKSPACE):
|
|
550
|
+
global_user_state.add_cluster_event(
|
|
551
|
+
jobs_controller_type.value.cluster_name,
|
|
552
|
+
status_lib.ClusterStatus.INIT, 'Jobs controller restarted.',
|
|
553
|
+
global_user_state.ClusterEventType.STATUS_CHANGE)
|
|
550
554
|
handle = core.start(
|
|
551
555
|
cluster_name=jobs_controller_type.value.cluster_name)
|
|
552
556
|
|