skypilot-nightly 1.0.0.dev20250804__py3-none-any.whl → 1.0.0.dev20250807__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +33 -4
- sky/catalog/kubernetes_catalog.py +8 -0
- sky/catalog/nebius_catalog.py +0 -1
- sky/check.py +11 -1
- sky/client/cli/command.py +234 -100
- sky/client/sdk.py +30 -9
- sky/client/sdk_async.py +815 -0
- sky/clouds/kubernetes.py +6 -1
- sky/clouds/nebius.py +1 -4
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-a8a8f1adba34c892.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-980a395e92633a5c.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.6003d293cb83eab4.js +1 -0
- sky/dashboard/out/_next/static/chunks/{3698-7874720877646365.js → 3850-ff4a9a69d978632b.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.29550342bd53afd8.js +1 -0
- sky/dashboard/out/_next/static/chunks/{4937.d6bf67771e353356.js → 4937.a2baa2df5572a276.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/6601-3e21152fe16da09c.js +1 -0
- sky/dashboard/out/_next/static/chunks/{691.6d99cbfba347cebf.js → 691.5eeedf82cc243343.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6989-6129c1cfbcf51063.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-0f886f16e0d55ff8.js +1 -0
- sky/dashboard/out/_next/static/chunks/8056-019615038d6ce427.js +1 -0
- sky/dashboard/out/_next/static/chunks/8252.62b0d23aed618bb2.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-318c3dca725e8e5d.js +1 -0
- sky/dashboard/out/_next/static/chunks/{9025.7937c16bc8623516.js → 9025.a1bef12d672bb66d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/9159-11421c0f2909236f.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.85b0b1b4054574dd.js +31 -0
- sky/dashboard/out/_next/static/chunks/9666.cd4273f2a5c5802c.js +1 -0
- sky/dashboard/out/_next/static/chunks/{9847.4c46c5e229c78704.js → 9847.757720f3b40c0aa5.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{9984.78ee6d2c6fa4b0e8.js → 9984.c5564679e467d245.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{_app-a67ae198457b9886.js → _app-1e6de35d15a8d432.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6fd1d2d8441aa54b.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-b30460f683e6ba96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{config-8620d099cbef8608.js → config-dfb9bf07b13045f4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-13d53fffc03ccb52.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-fc9222e26c8e2f0d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-154f55cf8af55be5.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-f5ccf5d39d87aebe.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-7ed36e44e779d5c7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-f72f73bcef9541dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-8f67be60165724cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-76efbdad99742559.js +1 -0
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +14 -2
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +43 -2
- sky/jobs/client/sdk_async.py +135 -0
- sky/jobs/server/core.py +48 -1
- sky/jobs/server/server.py +52 -3
- sky/jobs/state.py +5 -1
- sky/jobs/utils.py +3 -1
- sky/provision/kubernetes/utils.py +30 -4
- sky/provision/nebius/instance.py +1 -0
- sky/provision/nebius/utils.py +9 -1
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/serve/client/impl.py +85 -1
- sky/serve/client/sdk.py +16 -47
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +3 -1
- sky/serve/controller.py +6 -3
- sky/serve/load_balancer.py +3 -1
- sky/serve/serve_state.py +93 -5
- sky/serve/serve_utils.py +200 -67
- sky/serve/server/core.py +13 -197
- sky/serve/server/impl.py +261 -23
- sky/serve/service.py +15 -3
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +46 -0
- sky/server/auth/oauth2_proxy.py +185 -0
- sky/server/common.py +119 -21
- sky/server/constants.py +1 -1
- sky/server/daemons.py +60 -11
- sky/server/requests/executor.py +5 -3
- sky/server/requests/payloads.py +19 -0
- sky/server/rest.py +114 -0
- sky/server/server.py +44 -40
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +1 -1
- sky/skylet/events.py +5 -1
- sky/skylet/skylet.py +3 -1
- sky/task.py +61 -21
- sky/templates/kubernetes-ray.yml.j2 +9 -0
- sky/templates/nebius-ray.yml.j2 +1 -0
- sky/templates/sky-serve-controller.yaml.j2 +1 -0
- sky/usage/usage_lib.py +8 -6
- sky/utils/annotations.py +8 -3
- sky/utils/common_utils.py +11 -1
- sky/utils/controller_utils.py +7 -0
- sky/utils/db/migration_utils.py +2 -2
- sky/utils/rich_utils.py +120 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/METADATA +22 -13
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/RECORD +120 -112
- sky/client/sdk.pyi +0 -300
- sky/dashboard/out/_next/static/KiGGm4fK0CpmN6BT17jkh/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +0 -11
- sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +0 -1
- sky/dashboard/out/_next/static/chunks/1871-7e17c195296e2ea9.js +0 -6
- sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +0 -16
- sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6601-234b1cf963c7280b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-40d15b6261ec8dc1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa63e8b1d203f298.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9e7df5fc761c95a7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-6c5af4c86e6ab3d3.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-4d41c9023287f59a.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-13145516b19858fb.js +0 -1
- sky/dashboard/out/_next/static/css/b3227360726f12eb.css +0 -3
- /sky/dashboard/out/_next/static/{KiGGm4fK0CpmN6BT17jkh → YAirOGsV1z6B2RJ0VIUmD}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{6135-d0e285ac5f3f2485.js → 6135-85426374db04811e.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6212-7bd06f60ba693125.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-318c3dca725e8e5d.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/%5Bjob%5D-154f55cf8af55be5.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/[job]","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6212-7bd06f60ba693125.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-318c3dca725e8e5d.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/pools/%5Bpool%5D-f5ccf5d39d87aebe.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/pools/[pool]","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/jobs.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/users.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-7ed36e44e779d5c7.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/volumes.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1559-6c00e20454194859.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-6129c1cfbcf51063.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-318c3dca725e8e5d.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-0f886f16e0d55ff8.js" defer=""></script><script src="/dashboard/_next/static/chunks/8056-019615038d6ce427.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-3e21152fe16da09c.js" defer=""></script><script src="/dashboard/_next/static/chunks/9159-11421c0f2909236f.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-a8a8f1adba34c892.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-f72f73bcef9541dc.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-76efbdad99742559.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-1e6de35d15a8d432.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-8f67be60165724cc.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/YAirOGsV1z6B2RJ0VIUmD/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"YAirOGsV1z6B2RJ0VIUmD","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/global_user_state.py
CHANGED
|
@@ -158,6 +158,7 @@ cluster_history_table = sqlalchemy.Table(
|
|
|
158
158
|
sqlalchemy.Column('last_creation_command',
|
|
159
159
|
sqlalchemy.Text,
|
|
160
160
|
server_default=None),
|
|
161
|
+
sqlalchemy.Column('workspace', sqlalchemy.Text, server_default=None),
|
|
161
162
|
)
|
|
162
163
|
|
|
163
164
|
ssh_key_table = sqlalchemy.Table(
|
|
@@ -476,6 +477,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
476
477
|
|
|
477
478
|
user_hash = common_utils.get_current_user().id
|
|
478
479
|
active_workspace = skypilot_config.get_active_workspace()
|
|
480
|
+
history_workspace = active_workspace
|
|
481
|
+
history_hash = user_hash
|
|
479
482
|
|
|
480
483
|
conditional_values = {}
|
|
481
484
|
if is_launch:
|
|
@@ -560,6 +563,10 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
560
563
|
# Modify cluster history table
|
|
561
564
|
launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
|
|
562
565
|
launched_resources = getattr(cluster_handle, 'launched_resources', None)
|
|
566
|
+
if cluster_row and cluster_row.workspace:
|
|
567
|
+
history_workspace = cluster_row.workspace
|
|
568
|
+
if cluster_row and cluster_row.user_hash:
|
|
569
|
+
history_hash = cluster_row.user_hash
|
|
563
570
|
creation_info = {}
|
|
564
571
|
if conditional_values.get('last_creation_yaml') is not None:
|
|
565
572
|
creation_info = {
|
|
@@ -577,6 +584,7 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
577
584
|
launched_resources=pickle.dumps(launched_resources),
|
|
578
585
|
usage_intervals=pickle.dumps(usage_intervals),
|
|
579
586
|
user_hash=user_hash,
|
|
587
|
+
workspace=history_workspace,
|
|
580
588
|
**creation_info,
|
|
581
589
|
)
|
|
582
590
|
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
|
@@ -590,7 +598,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
590
598
|
pickle.dumps(launched_resources),
|
|
591
599
|
cluster_history_table.c.usage_intervals:
|
|
592
600
|
pickle.dumps(usage_intervals),
|
|
593
|
-
cluster_history_table.c.user_hash:
|
|
601
|
+
cluster_history_table.c.user_hash: history_hash,
|
|
602
|
+
cluster_history_table.c.workspace: history_workspace,
|
|
594
603
|
**creation_info,
|
|
595
604
|
})
|
|
596
605
|
session.execute(do_update_stmt)
|
|
@@ -1026,6 +1035,7 @@ def get_clusters_from_history(
|
|
|
1026
1035
|
cluster_history_table.c.user_hash,
|
|
1027
1036
|
cluster_history_table.c.last_creation_yaml,
|
|
1028
1037
|
cluster_history_table.c.last_creation_command,
|
|
1038
|
+
cluster_history_table.c.workspace.label('history_workspace'),
|
|
1029
1039
|
cluster_table.c.status, cluster_table.c.workspace,
|
|
1030
1040
|
cluster_table.c.status_updated_at).select_from(
|
|
1031
1041
|
cluster_history_table.join(cluster_table,
|
|
@@ -1096,6 +1106,8 @@ def get_clusters_from_history(
|
|
|
1096
1106
|
# Get user name from user hash
|
|
1097
1107
|
user = get_user(user_hash)
|
|
1098
1108
|
user_name = user.name if user is not None else None
|
|
1109
|
+
workspace = (row.history_workspace
|
|
1110
|
+
if row.history_workspace else row.workspace)
|
|
1099
1111
|
|
|
1100
1112
|
record = {
|
|
1101
1113
|
'name': row.name,
|
|
@@ -1108,7 +1120,7 @@ def get_clusters_from_history(
|
|
|
1108
1120
|
'status': status,
|
|
1109
1121
|
'user_hash': user_hash,
|
|
1110
1122
|
'user_name': user_name,
|
|
1111
|
-
'workspace':
|
|
1123
|
+
'workspace': workspace,
|
|
1112
1124
|
'last_creation_yaml': row.last_creation_yaml,
|
|
1113
1125
|
'last_creation_command': row.last_creation_command,
|
|
1114
1126
|
}
|
sky/jobs/__init__.py
CHANGED
|
@@ -8,6 +8,8 @@ from sky.jobs.client.sdk import launch
|
|
|
8
8
|
from sky.jobs.client.sdk import pool_apply
|
|
9
9
|
from sky.jobs.client.sdk import pool_down
|
|
10
10
|
from sky.jobs.client.sdk import pool_status
|
|
11
|
+
from sky.jobs.client.sdk import pool_sync_down_logs
|
|
12
|
+
from sky.jobs.client.sdk import pool_tail_logs
|
|
11
13
|
from sky.jobs.client.sdk import queue
|
|
12
14
|
from sky.jobs.client.sdk import tail_logs
|
|
13
15
|
from sky.jobs.constants import JOBS_CLUSTER_NAME_PREFIX_LENGTH
|
sky/jobs/client/sdk.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""SDK functions for managed jobs."""
|
|
2
2
|
import json
|
|
3
3
|
import typing
|
|
4
|
-
from typing import Dict, List, Optional, Union
|
|
4
|
+
from typing import Dict, List, Optional, Sequence, Union
|
|
5
5
|
|
|
6
6
|
import click
|
|
7
7
|
|
|
@@ -186,7 +186,7 @@ def queue(refresh: bool,
|
|
|
186
186
|
@server_common.check_server_healthy_or_start
|
|
187
187
|
def cancel(
|
|
188
188
|
name: Optional[str] = None,
|
|
189
|
-
job_ids: Optional[
|
|
189
|
+
job_ids: Optional[Sequence[int]] = None,
|
|
190
190
|
all: bool = False, # pylint: disable=redefined-builtin
|
|
191
191
|
all_users: bool = False,
|
|
192
192
|
pool: Optional[str] = None,
|
|
@@ -408,3 +408,44 @@ def pool_status(
|
|
|
408
408
|
pool_names: Optional[Union[str, List[str]]],) -> server_common.RequestId:
|
|
409
409
|
"""Query a pool."""
|
|
410
410
|
return impl.status(pool_names, pool=True)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
@usage_lib.entrypoint
|
|
414
|
+
@server_common.check_server_healthy_or_start
|
|
415
|
+
@rest.retry_transient_errors()
|
|
416
|
+
@versions.minimal_api_version(13)
|
|
417
|
+
def pool_tail_logs(pool_name: str,
|
|
418
|
+
target: Union[str, 'serve_utils.ServiceComponent'],
|
|
419
|
+
worker_id: Optional[int] = None,
|
|
420
|
+
follow: bool = True,
|
|
421
|
+
output_stream: Optional['io.TextIOBase'] = None,
|
|
422
|
+
tail: Optional[int] = None) -> None:
|
|
423
|
+
"""Tails logs of a pool."""
|
|
424
|
+
return impl.tail_logs(pool_name,
|
|
425
|
+
target,
|
|
426
|
+
worker_id,
|
|
427
|
+
follow,
|
|
428
|
+
output_stream,
|
|
429
|
+
tail,
|
|
430
|
+
pool=True)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
@usage_lib.entrypoint
|
|
434
|
+
@server_common.check_server_healthy_or_start
|
|
435
|
+
@rest.retry_transient_errors()
|
|
436
|
+
@versions.minimal_api_version(13)
|
|
437
|
+
def pool_sync_down_logs(pool_name: str,
|
|
438
|
+
local_dir: str,
|
|
439
|
+
*,
|
|
440
|
+
targets: Optional[Union[
|
|
441
|
+
str, 'serve_utils.ServiceComponent', Sequence[Union[
|
|
442
|
+
str, 'serve_utils.ServiceComponent']]]] = None,
|
|
443
|
+
worker_ids: Optional[List[int]] = None,
|
|
444
|
+
tail: Optional[int] = None) -> None:
|
|
445
|
+
"""Sync down logs of a pool."""
|
|
446
|
+
return impl.sync_down_logs(pool_name,
|
|
447
|
+
local_dir,
|
|
448
|
+
targets=targets,
|
|
449
|
+
replica_ids=worker_ids,
|
|
450
|
+
tail=tail,
|
|
451
|
+
pool=True)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Async SDK functions for managed jobs."""
|
|
2
|
+
import typing
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
from sky import backends
|
|
6
|
+
from sky import sky_logging
|
|
7
|
+
from sky.adaptors import common as adaptors_common
|
|
8
|
+
from sky.client import sdk_async
|
|
9
|
+
from sky.jobs.client import sdk
|
|
10
|
+
from sky.skylet import constants
|
|
11
|
+
from sky.usage import usage_lib
|
|
12
|
+
from sky.utils import common_utils
|
|
13
|
+
from sky.utils import context_utils
|
|
14
|
+
|
|
15
|
+
if typing.TYPE_CHECKING:
|
|
16
|
+
import io
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
import sky
|
|
21
|
+
else:
|
|
22
|
+
requests = adaptors_common.LazyImport('requests')
|
|
23
|
+
|
|
24
|
+
logger = sky_logging.init_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@usage_lib.entrypoint
|
|
28
|
+
async def launch(
|
|
29
|
+
task: Union['sky.Task', 'sky.Dag'],
|
|
30
|
+
name: Optional[str] = None,
|
|
31
|
+
# Internal only:
|
|
32
|
+
# pylint: disable=invalid-name
|
|
33
|
+
_need_confirmation: bool = False,
|
|
34
|
+
stream_logs: Optional[
|
|
35
|
+
sdk_async.StreamConfig] = sdk_async.DEFAULT_STREAM_CONFIG,
|
|
36
|
+
) -> Tuple[Optional[int], Optional[backends.ResourceHandle]]:
|
|
37
|
+
"""Async version of launch() that launches a managed job."""
|
|
38
|
+
request_id = await context_utils.to_thread(sdk.launch, task, name,
|
|
39
|
+
_need_confirmation)
|
|
40
|
+
if stream_logs is not None:
|
|
41
|
+
return await sdk_async._stream_and_get(request_id, stream_logs) # pylint: disable=protected-access
|
|
42
|
+
else:
|
|
43
|
+
return await sdk_async.get(request_id)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@usage_lib.entrypoint
|
|
47
|
+
async def queue(
|
|
48
|
+
refresh: bool,
|
|
49
|
+
skip_finished: bool = False,
|
|
50
|
+
all_users: bool = False,
|
|
51
|
+
stream_logs: Optional[
|
|
52
|
+
sdk_async.StreamConfig] = sdk_async.DEFAULT_STREAM_CONFIG
|
|
53
|
+
) -> List[Dict[str, Any]]:
|
|
54
|
+
"""Async version of queue() that gets statuses of managed jobs."""
|
|
55
|
+
request_id = await context_utils.to_thread(sdk.queue, refresh,
|
|
56
|
+
skip_finished, all_users)
|
|
57
|
+
if stream_logs is not None:
|
|
58
|
+
return await sdk_async._stream_and_get(request_id, stream_logs) # pylint: disable=protected-access
|
|
59
|
+
else:
|
|
60
|
+
return await sdk_async.get(request_id)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@usage_lib.entrypoint
|
|
64
|
+
async def cancel(
|
|
65
|
+
name: Optional[str] = None,
|
|
66
|
+
job_ids: Optional[List[int]] = None,
|
|
67
|
+
all: bool = False, # pylint: disable=redefined-builtin
|
|
68
|
+
all_users: bool = False,
|
|
69
|
+
stream_logs: Optional[
|
|
70
|
+
sdk_async.StreamConfig] = sdk_async.DEFAULT_STREAM_CONFIG,
|
|
71
|
+
) -> None:
|
|
72
|
+
"""Async version of cancel() that cancels managed jobs."""
|
|
73
|
+
request_id = await context_utils.to_thread(sdk.cancel, name, job_ids, all,
|
|
74
|
+
all_users)
|
|
75
|
+
if stream_logs is not None:
|
|
76
|
+
return await sdk_async._stream_and_get(request_id, stream_logs) # pylint: disable=protected-access
|
|
77
|
+
else:
|
|
78
|
+
return await sdk_async.get(request_id)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@usage_lib.entrypoint
|
|
82
|
+
async def tail_logs(cluster_name: str,
|
|
83
|
+
job_id: Optional[int],
|
|
84
|
+
follow: bool,
|
|
85
|
+
tail: int = 0,
|
|
86
|
+
output_stream: Optional['io.TextIOBase'] = None) -> int:
|
|
87
|
+
"""Async version of tail_logs() that tails the logs of a job."""
|
|
88
|
+
return await context_utils.to_thread(
|
|
89
|
+
sdk.tail_logs,
|
|
90
|
+
cluster_name,
|
|
91
|
+
job_id,
|
|
92
|
+
follow,
|
|
93
|
+
tail,
|
|
94
|
+
output_stream,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@usage_lib.entrypoint
|
|
99
|
+
async def download_logs(
|
|
100
|
+
name: Optional[str],
|
|
101
|
+
job_id: Optional[int],
|
|
102
|
+
refresh: bool,
|
|
103
|
+
controller: bool,
|
|
104
|
+
local_dir: str = constants.SKY_LOGS_DIRECTORY) -> Dict[int, str]:
|
|
105
|
+
"""Async version of download_logs() that syncs down logs of managed jobs."""
|
|
106
|
+
return await context_utils.to_thread(sdk.download_logs, name, job_id,
|
|
107
|
+
refresh, controller, local_dir)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@usage_lib.entrypoint
|
|
111
|
+
async def dashboard() -> None:
|
|
112
|
+
"""Async version of dashboard() that starts a dashboard for managed jobs."""
|
|
113
|
+
return await context_utils.to_thread(sdk.dashboard)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# Deprecated functions
|
|
117
|
+
spot_launch = common_utils.deprecated_function(
|
|
118
|
+
launch,
|
|
119
|
+
name='sky.jobs.launch',
|
|
120
|
+
deprecated_name='spot_launch',
|
|
121
|
+
removing_version='0.8.0',
|
|
122
|
+
override_argument={'use_spot': True})
|
|
123
|
+
spot_queue = common_utils.deprecated_function(queue,
|
|
124
|
+
name='sky.jobs.queue',
|
|
125
|
+
deprecated_name='spot_queue',
|
|
126
|
+
removing_version='0.8.0')
|
|
127
|
+
spot_cancel = common_utils.deprecated_function(cancel,
|
|
128
|
+
name='sky.jobs.cancel',
|
|
129
|
+
deprecated_name='spot_cancel',
|
|
130
|
+
removing_version='0.8.0')
|
|
131
|
+
spot_tail_logs = common_utils.deprecated_function(
|
|
132
|
+
tail_logs,
|
|
133
|
+
name='sky.jobs.tail_logs',
|
|
134
|
+
deprecated_name='spot_tail_logs',
|
|
135
|
+
removing_version='0.8.0')
|
sky/jobs/server/core.py
CHANGED
|
@@ -24,6 +24,7 @@ from sky.jobs import constants as managed_job_constants
|
|
|
24
24
|
from sky.jobs import state as managed_job_state
|
|
25
25
|
from sky.jobs import utils as managed_job_utils
|
|
26
26
|
from sky.provision import common as provision_common
|
|
27
|
+
from sky.serve import serve_state
|
|
27
28
|
from sky.serve import serve_utils
|
|
28
29
|
from sky.serve.server import impl
|
|
29
30
|
from sky.skylet import constants as skylet_constants
|
|
@@ -108,6 +109,11 @@ def _maybe_submit_job_locally(prefix: str, dag: 'sky.Dag', pool: Optional[str],
|
|
|
108
109
|
# Create local directory for the managed job.
|
|
109
110
|
pathlib.Path(prefix).expanduser().mkdir(parents=True, exist_ok=True)
|
|
110
111
|
job_ids = []
|
|
112
|
+
pool_hash = None
|
|
113
|
+
if pool is not None:
|
|
114
|
+
pool_hash = serve_state.get_service_hash(pool)
|
|
115
|
+
# Already checked in the sdk.
|
|
116
|
+
assert pool_hash is not None, f'Pool {pool} not found'
|
|
111
117
|
for _ in range(num_jobs if num_jobs is not None else 1):
|
|
112
118
|
# TODO(tian): We should have a separate name for each job when
|
|
113
119
|
# submitting multiple jobs. Current blocker is that we are sharing
|
|
@@ -121,7 +127,8 @@ def _maybe_submit_job_locally(prefix: str, dag: 'sky.Dag', pool: Optional[str],
|
|
|
121
127
|
workspace=skypilot_config.get_active_workspace(
|
|
122
128
|
force_user_workspace=True),
|
|
123
129
|
entrypoint=common_utils.get_current_command(),
|
|
124
|
-
pool=pool
|
|
130
|
+
pool=pool,
|
|
131
|
+
pool_hash=pool_hash))
|
|
125
132
|
for task_id, task in enumerate(dag.tasks):
|
|
126
133
|
resources_str = backend_utils.get_task_resources_str(
|
|
127
134
|
task, is_managed_job=True)
|
|
@@ -843,3 +850,43 @@ def pool_status(
|
|
|
843
850
|
List[str]]] = None,) -> List[Dict[str, Any]]:
|
|
844
851
|
"""Query a pool."""
|
|
845
852
|
return impl.status(pool_names, pool=True)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
ServiceComponentOrStr = Union[str, serve_utils.ServiceComponent]
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
@usage_lib.entrypoint
|
|
859
|
+
def pool_tail_logs(
|
|
860
|
+
pool_name: str,
|
|
861
|
+
*,
|
|
862
|
+
target: ServiceComponentOrStr,
|
|
863
|
+
worker_id: Optional[int] = None,
|
|
864
|
+
follow: bool = True,
|
|
865
|
+
tail: Optional[int] = None,
|
|
866
|
+
) -> None:
|
|
867
|
+
"""Tail logs of a pool."""
|
|
868
|
+
return impl.tail_logs(pool_name,
|
|
869
|
+
target=target,
|
|
870
|
+
replica_id=worker_id,
|
|
871
|
+
follow=follow,
|
|
872
|
+
tail=tail,
|
|
873
|
+
pool=True)
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
@usage_lib.entrypoint
|
|
877
|
+
def pool_sync_down_logs(
|
|
878
|
+
pool_name: str,
|
|
879
|
+
*,
|
|
880
|
+
local_dir: str,
|
|
881
|
+
targets: Union[ServiceComponentOrStr, List[ServiceComponentOrStr],
|
|
882
|
+
None] = None,
|
|
883
|
+
worker_ids: Optional[List[int]] = None,
|
|
884
|
+
tail: Optional[int] = None,
|
|
885
|
+
) -> str:
|
|
886
|
+
"""Sync down logs of a pool."""
|
|
887
|
+
return impl.sync_down_logs(pool_name,
|
|
888
|
+
local_dir=local_dir,
|
|
889
|
+
targets=targets,
|
|
890
|
+
replica_ids=worker_ids,
|
|
891
|
+
tail=tail,
|
|
892
|
+
pool=True)
|
sky/jobs/server/server.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""REST API for managed jobs."""
|
|
2
2
|
|
|
3
|
+
import pathlib
|
|
4
|
+
|
|
3
5
|
import fastapi
|
|
4
6
|
|
|
5
7
|
from sky import sky_logging
|
|
@@ -117,7 +119,7 @@ async def pool_apply(request: fastapi.Request,
|
|
|
117
119
|
request_body=jobs_pool_apply_body,
|
|
118
120
|
func=core.pool_apply,
|
|
119
121
|
schedule_type=api_requests.ScheduleType.LONG,
|
|
120
|
-
request_cluster_name=common.
|
|
122
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
|
121
123
|
)
|
|
122
124
|
|
|
123
125
|
|
|
@@ -130,7 +132,7 @@ async def pool_down(request: fastapi.Request,
|
|
|
130
132
|
request_body=jobs_pool_down_body,
|
|
131
133
|
func=core.pool_down,
|
|
132
134
|
schedule_type=api_requests.ScheduleType.SHORT,
|
|
133
|
-
request_cluster_name=common.
|
|
135
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
|
134
136
|
)
|
|
135
137
|
|
|
136
138
|
|
|
@@ -144,5 +146,52 @@ async def pool_status(
|
|
|
144
146
|
request_body=jobs_pool_status_body,
|
|
145
147
|
func=core.pool_status,
|
|
146
148
|
schedule_type=api_requests.ScheduleType.SHORT,
|
|
147
|
-
request_cluster_name=common.
|
|
149
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@router.post('/pool_logs')
|
|
154
|
+
async def pool_tail_logs(
|
|
155
|
+
request: fastapi.Request, log_body: payloads.JobsPoolLogsBody,
|
|
156
|
+
background_tasks: fastapi.BackgroundTasks
|
|
157
|
+
) -> fastapi.responses.StreamingResponse:
|
|
158
|
+
executor.schedule_request(
|
|
159
|
+
request_id=request.state.request_id,
|
|
160
|
+
request_name='jobs.pool_logs',
|
|
161
|
+
request_body=log_body,
|
|
162
|
+
func=core.pool_tail_logs,
|
|
163
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
|
164
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
request_task = api_requests.get_request(request.state.request_id)
|
|
168
|
+
|
|
169
|
+
return stream_utils.stream_response(
|
|
170
|
+
request_id=request_task.request_id,
|
|
171
|
+
logs_path=request_task.log_path,
|
|
172
|
+
background_tasks=background_tasks,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@router.post('/pool_sync-down-logs')
|
|
177
|
+
async def pool_download_logs(
|
|
178
|
+
request: fastapi.Request,
|
|
179
|
+
download_logs_body: payloads.JobsPoolDownloadLogsBody,
|
|
180
|
+
) -> None:
|
|
181
|
+
user_hash = download_logs_body.env_vars[constants.USER_ID_ENV_VAR]
|
|
182
|
+
timestamp = sky_logging.get_run_timestamp()
|
|
183
|
+
logs_dir_on_api_server = (
|
|
184
|
+
pathlib.Path(server_common.api_server_user_logs_dir_prefix(user_hash)) /
|
|
185
|
+
'pool' / f'{download_logs_body.pool_name}_{timestamp}')
|
|
186
|
+
logs_dir_on_api_server.mkdir(parents=True, exist_ok=True)
|
|
187
|
+
# We should reuse the original request body, so that the env vars, such as
|
|
188
|
+
# user hash, are kept the same.
|
|
189
|
+
download_logs_body.local_dir = str(logs_dir_on_api_server)
|
|
190
|
+
executor.schedule_request(
|
|
191
|
+
request_id=request.state.request_id,
|
|
192
|
+
request_name='jobs.pool_sync_down_logs',
|
|
193
|
+
request_body=download_logs_body,
|
|
194
|
+
func=core.pool_sync_down_logs,
|
|
195
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
|
196
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
|
148
197
|
)
|
sky/jobs/state.py
CHANGED
|
@@ -107,6 +107,7 @@ job_info_table = sqlalchemy.Table(
|
|
|
107
107
|
sqlalchemy.Column('job_id_on_pool_cluster',
|
|
108
108
|
sqlalchemy.Integer,
|
|
109
109
|
server_default=None),
|
|
110
|
+
sqlalchemy.Column('pool_hash', sqlalchemy.Text, server_default=None),
|
|
110
111
|
)
|
|
111
112
|
|
|
112
113
|
ha_recovery_script_table = sqlalchemy.Table(
|
|
@@ -225,6 +226,7 @@ def _get_jobs_dict(r: 'row.RowMapping') -> Dict[str, Any]:
|
|
|
225
226
|
'pool': r['pool'],
|
|
226
227
|
'current_cluster_name': r['current_cluster_name'],
|
|
227
228
|
'job_id_on_pool_cluster': r['job_id_on_pool_cluster'],
|
|
229
|
+
'pool_hash': r['pool_hash'],
|
|
228
230
|
}
|
|
229
231
|
|
|
230
232
|
|
|
@@ -462,7 +464,8 @@ def set_job_info(job_id: int, name: str, workspace: str, entrypoint: str):
|
|
|
462
464
|
|
|
463
465
|
@_init_db
|
|
464
466
|
def set_job_info_without_job_id(name: str, workspace: str, entrypoint: str,
|
|
465
|
-
pool: Optional[str]
|
|
467
|
+
pool: Optional[str],
|
|
468
|
+
pool_hash: Optional[str]) -> int:
|
|
466
469
|
assert _SQLALCHEMY_ENGINE is not None
|
|
467
470
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
468
471
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
@@ -480,6 +483,7 @@ def set_job_info_without_job_id(name: str, workspace: str, entrypoint: str,
|
|
|
480
483
|
workspace=workspace,
|
|
481
484
|
entrypoint=entrypoint,
|
|
482
485
|
pool=pool,
|
|
486
|
+
pool_hash=pool_hash,
|
|
483
487
|
)
|
|
484
488
|
|
|
485
489
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
sky/jobs/utils.py
CHANGED
|
@@ -189,7 +189,8 @@ def ha_recovery_for_consolidation_mode():
|
|
|
189
189
|
# already has all runtime installed. Directly start jobs recovery here.
|
|
190
190
|
# Refers to sky/templates/kubernetes-ray.yml.j2 for more details.
|
|
191
191
|
runner = command_runner.LocalProcessCommandRunner()
|
|
192
|
-
with open(constants.HA_PERSISTENT_RECOVERY_LOG_PATH
|
|
192
|
+
with open(constants.HA_PERSISTENT_RECOVERY_LOG_PATH.format('jobs_'),
|
|
193
|
+
'w',
|
|
193
194
|
encoding='utf-8') as f:
|
|
194
195
|
start = time.time()
|
|
195
196
|
f.write(f'Starting HA recovery at {datetime.datetime.now()}\n')
|
|
@@ -315,6 +316,7 @@ def update_managed_jobs_statuses(job_id: Optional[int] = None):
|
|
|
315
316
|
This function should not throw any exception. If it fails, it will
|
|
316
317
|
capture the error message, and log/return it.
|
|
317
318
|
"""
|
|
319
|
+
managed_job_state.remove_ha_recovery_script(job_id)
|
|
318
320
|
error_msg = None
|
|
319
321
|
tasks = managed_job_state.get_managed_jobs(job_id)
|
|
320
322
|
for task in tasks:
|