skypilot-nightly 1.0.0.dev20251018__py3-none-any.whl → 1.0.0.dev20251021__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +61 -0
- sky/backends/backend_utils.py +11 -11
- sky/backends/cloud_vm_ray_backend.py +15 -4
- sky/client/cli/command.py +39 -10
- sky/client/cli/flags.py +4 -2
- sky/client/sdk.py +26 -3
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage.py +2 -2
- sky/global_user_state.py +20 -20
- sky/jobs/server/server.py +10 -1
- sky/provision/kubernetes/network.py +9 -6
- sky/provision/provisioner.py +8 -0
- sky/serve/server/server.py +1 -0
- sky/server/common.py +9 -2
- sky/server/constants.py +1 -1
- sky/server/daemons.py +4 -2
- sky/server/requests/executor.py +10 -8
- sky/server/requests/payloads.py +2 -1
- sky/server/requests/preconditions.py +9 -4
- sky/server/requests/requests.py +124 -36
- sky/server/server.py +58 -24
- sky/server/stream_utils.py +127 -38
- sky/server/uvicorn.py +18 -17
- sky/utils/asyncio_utils.py +63 -3
- {skypilot_nightly-1.0.0.dev20251018.dist-info → skypilot_nightly-1.0.0.dev20251021.dist-info}/METADATA +33 -34
- {skypilot_nightly-1.0.0.dev20251018.dist-info → skypilot_nightly-1.0.0.dev20251021.dist-info}/RECORD +48 -48
- /sky/dashboard/out/_next/static/{Rn37hj-nuHOYT-HwxSDXC → jDc1PlRsl9Cc5FQUMLBu8}/_buildManifest.js +0 -0
- /sky/dashboard/out/_next/static/{Rn37hj-nuHOYT-HwxSDXC → jDc1PlRsl9Cc5FQUMLBu8}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20251018.dist-info → skypilot_nightly-1.0.0.dev20251021.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20251018.dist-info → skypilot_nightly-1.0.0.dev20251021.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20251018.dist-info → skypilot_nightly-1.0.0.dev20251021.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20251018.dist-info → skypilot_nightly-1.0.0.dev20251021.dist-info}/top_level.txt +0 -0
sky/dashboard/out/jobs.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-a35a9dc3c5ccd657.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-a35a9dc3c5ccd657.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"jDc1PlRsl9Cc5FQUMLBu8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/users.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-98d2ed979084162a.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-98d2ed979084162a.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"jDc1PlRsl9Cc5FQUMLBu8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/dashboard/out/volumes.html
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-835d14ba94808f79.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-835d14ba94808f79.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"jDc1PlRsl9Cc5FQUMLBu8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"jDc1PlRsl9Cc5FQUMLBu8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/7359-c8d04e06886000b3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-01359c57e018caa4.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-66237729cdf9749e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f6818c84ed8f1c86.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-4b4d5e824b7f9d3c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1121-d0782b9251f0fcd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/3015-7e0e8f06bb2f881c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-3b40c39626f99c89.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-e8688c35c06f0ac5.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/7359-c8d04e06886000b3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-01359c57e018caa4.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-66237729cdf9749e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f6818c84ed8f1c86.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-4b4d5e824b7f9d3c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1121-d0782b9251f0fcd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/3015-7e0e8f06bb2f881c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-3b40c39626f99c89.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-e8688c35c06f0ac5.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"jDc1PlRsl9Cc5FQUMLBu8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-69c80d677d3c2949.js" defer=""></script><script src="/dashboard/_next/static/
|
|
1
|
+
<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-69c80d677d3c2949.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/jDc1PlRsl9Cc5FQUMLBu8/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"jDc1PlRsl9Cc5FQUMLBu8","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
|
sky/data/storage.py
CHANGED
|
@@ -2015,7 +2015,7 @@ class S3CompatibleStore(AbstractStore):
|
|
|
2015
2015
|
except aws.botocore_exceptions().ClientError as e:
|
|
2016
2016
|
with ux_utils.print_exception_no_traceback():
|
|
2017
2017
|
raise exceptions.StorageBucketCreateError(
|
|
2018
|
-
f'Attempted to create
|
|
2018
|
+
f'Attempted to create S3 bucket {self.name} but failed.'
|
|
2019
2019
|
) from e
|
|
2020
2020
|
return self.config.resource_factory(bucket_name)
|
|
2021
2021
|
|
|
@@ -2554,7 +2554,7 @@ class GcsStore(AbstractStore):
|
|
|
2554
2554
|
except Exception as e: # pylint: disable=broad-except
|
|
2555
2555
|
with ux_utils.print_exception_no_traceback():
|
|
2556
2556
|
raise exceptions.StorageBucketCreateError(
|
|
2557
|
-
f'Attempted to create
|
|
2557
|
+
f'Attempted to create GCS bucket {self.name} but failed.'
|
|
2558
2558
|
) from e
|
|
2559
2559
|
logger.info(
|
|
2560
2560
|
f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
|
sky/global_user_state.py
CHANGED
|
@@ -1620,8 +1620,10 @@ def get_clusters(
|
|
|
1620
1620
|
cluster_table.c.storage_mounts_metadata,
|
|
1621
1621
|
cluster_table.c.cluster_ever_up,
|
|
1622
1622
|
cluster_table.c.status_updated_at, cluster_table.c.user_hash,
|
|
1623
|
-
cluster_table.c.config_hash,
|
|
1624
|
-
cluster_table.c.is_managed
|
|
1623
|
+
cluster_table.c.config_hash,
|
|
1624
|
+
cluster_table.c.workspace, cluster_table.c.is_managed,
|
|
1625
|
+
user_table.c.name.label('user_name')).outerjoin(
|
|
1626
|
+
user_table, cluster_table.c.user_hash == user_table.c.id)
|
|
1625
1627
|
else:
|
|
1626
1628
|
query = session.query(
|
|
1627
1629
|
cluster_table.c.name,
|
|
@@ -1643,7 +1645,9 @@ def get_clusters(
|
|
|
1643
1645
|
cluster_table.c.is_managed,
|
|
1644
1646
|
# extra fields compared to above query
|
|
1645
1647
|
cluster_table.c.last_creation_yaml,
|
|
1646
|
-
cluster_table.c.last_creation_command
|
|
1648
|
+
cluster_table.c.last_creation_command,
|
|
1649
|
+
user_table.c.name.label('user_name')).outerjoin(
|
|
1650
|
+
user_table, cluster_table.c.user_hash == user_table.c.id)
|
|
1647
1651
|
if exclude_managed_clusters:
|
|
1648
1652
|
query = query.filter(cluster_table.c.is_managed == int(False))
|
|
1649
1653
|
if workspaces_filter is not None:
|
|
@@ -1666,28 +1670,22 @@ def get_clusters(
|
|
|
1666
1670
|
rows = query.all()
|
|
1667
1671
|
records = []
|
|
1668
1672
|
|
|
1669
|
-
#
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
user_hashes = set(row_to_user_hash.values())
|
|
1678
|
-
user_hash_to_user = get_users(user_hashes)
|
|
1673
|
+
# Check if we need to fetch the current user's name,
|
|
1674
|
+
# for backwards compatibility, if user_hash is None.
|
|
1675
|
+
current_user_name = None
|
|
1676
|
+
needs_current_user = any(row.user_hash is None for row in rows)
|
|
1677
|
+
if needs_current_user:
|
|
1678
|
+
current_user = get_user(current_user_hash)
|
|
1679
|
+
current_user_name = (current_user.name
|
|
1680
|
+
if current_user is not None else None)
|
|
1679
1681
|
|
|
1680
1682
|
# get last cluster event for each row
|
|
1681
|
-
cluster_hashes = set(row_to_user_hash.keys())
|
|
1682
1683
|
if not summary_response:
|
|
1684
|
+
cluster_hashes = {row.cluster_hash for row in rows}
|
|
1683
1685
|
last_cluster_event_dict = _get_last_cluster_event_multiple(
|
|
1684
1686
|
cluster_hashes, ClusterEventType.STATUS_CHANGE)
|
|
1685
1687
|
|
|
1686
|
-
# get user for each row
|
|
1687
1688
|
for row in rows:
|
|
1688
|
-
user_hash = row_to_user_hash[row.cluster_hash]
|
|
1689
|
-
user = user_hash_to_user.get(user_hash, None)
|
|
1690
|
-
user_name = user.name if user is not None else None
|
|
1691
1689
|
# TODO: use namedtuple instead of dict
|
|
1692
1690
|
record = {
|
|
1693
1691
|
'name': row.name,
|
|
@@ -1704,8 +1702,10 @@ def get_clusters(
|
|
|
1704
1702
|
row.storage_mounts_metadata),
|
|
1705
1703
|
'cluster_ever_up': bool(row.cluster_ever_up),
|
|
1706
1704
|
'status_updated_at': row.status_updated_at,
|
|
1707
|
-
'user_hash': user_hash
|
|
1708
|
-
|
|
1705
|
+
'user_hash': (row.user_hash
|
|
1706
|
+
if row.user_hash is not None else current_user_hash),
|
|
1707
|
+
'user_name': (row.user_name
|
|
1708
|
+
if row.user_name is not None else current_user_name),
|
|
1709
1709
|
'workspace': row.workspace,
|
|
1710
1710
|
'is_managed': bool(row.is_managed),
|
|
1711
1711
|
'config_hash': row.config_hash,
|
sky/jobs/server/server.py
CHANGED
|
@@ -109,6 +109,7 @@ async def logs(
|
|
|
109
109
|
schedule_type=schedule_type,
|
|
110
110
|
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
|
111
111
|
)
|
|
112
|
+
kill_request_on_disconnect = False
|
|
112
113
|
if schedule_type == api_requests.ScheduleType.SHORT:
|
|
113
114
|
# For short request, run in the coroutine to avoid blocking
|
|
114
115
|
# short workers.
|
|
@@ -117,11 +118,15 @@ async def logs(
|
|
|
117
118
|
background_tasks.add_task(task.cancel)
|
|
118
119
|
else:
|
|
119
120
|
executor.schedule_prepared_request(request_task)
|
|
121
|
+
# When runs in long executor process, we should kill the request on
|
|
122
|
+
# disconnect to cancel the running routine.
|
|
123
|
+
kill_request_on_disconnect = True
|
|
120
124
|
|
|
121
125
|
return stream_utils.stream_response_for_long_request(
|
|
122
126
|
request_id=request_task.request_id,
|
|
123
127
|
logs_path=request_task.log_path,
|
|
124
128
|
background_tasks=background_tasks,
|
|
129
|
+
kill_request_on_disconnect=kill_request_on_disconnect,
|
|
125
130
|
)
|
|
126
131
|
|
|
127
132
|
|
|
@@ -201,12 +206,16 @@ async def pool_tail_logs(
|
|
|
201
206
|
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
|
202
207
|
)
|
|
203
208
|
|
|
204
|
-
request_task = api_requests.get_request(request.state.request_id
|
|
209
|
+
request_task = api_requests.get_request(request.state.request_id,
|
|
210
|
+
fields=['request_id'])
|
|
205
211
|
|
|
206
212
|
return stream_utils.stream_response_for_long_request(
|
|
207
213
|
request_id=request_task.request_id,
|
|
214
|
+
# req.log_path is derived from request_id,
|
|
215
|
+
# so it's ok to just grab the request_id in the above query.
|
|
208
216
|
logs_path=request_task.log_path,
|
|
209
217
|
background_tasks=background_tasks,
|
|
218
|
+
kill_request_on_disconnect=True,
|
|
210
219
|
)
|
|
211
220
|
|
|
212
221
|
|
|
@@ -48,8 +48,10 @@ def _open_ports_using_loadbalancer(
|
|
|
48
48
|
service_name = _LOADBALANCER_SERVICE_NAME.format(
|
|
49
49
|
cluster_name_on_cloud=cluster_name_on_cloud)
|
|
50
50
|
context = kubernetes_utils.get_context_from_config(provider_config)
|
|
51
|
+
namespace = kubernetes_utils.get_namespace_from_config(provider_config)
|
|
52
|
+
|
|
51
53
|
content = network_utils.fill_loadbalancer_template(
|
|
52
|
-
namespace=
|
|
54
|
+
namespace=namespace,
|
|
53
55
|
context=context,
|
|
54
56
|
service_name=service_name,
|
|
55
57
|
ports=ports,
|
|
@@ -103,7 +105,7 @@ def _open_ports_using_ingress(
|
|
|
103
105
|
# To avoid this, we change ingress creation into one object containing
|
|
104
106
|
# multiple rules.
|
|
105
107
|
content = network_utils.fill_ingress_template(
|
|
106
|
-
namespace=
|
|
108
|
+
namespace=namespace,
|
|
107
109
|
context=context,
|
|
108
110
|
service_details=service_details,
|
|
109
111
|
ingress_name=f'{cluster_name_on_cloud}-skypilot-ingress',
|
|
@@ -165,9 +167,10 @@ def _cleanup_ports_for_loadbalancer(
|
|
|
165
167
|
# TODO(aylei): test coverage
|
|
166
168
|
context = provider_config.get(
|
|
167
169
|
'context', kubernetes_utils.get_current_kube_config_context_name())
|
|
170
|
+
namespace = kubernetes_utils.get_namespace_from_config(provider_config)
|
|
168
171
|
network_utils.delete_namespaced_service(
|
|
169
172
|
context=context,
|
|
170
|
-
namespace=
|
|
173
|
+
namespace=namespace,
|
|
171
174
|
service_name=service_name,
|
|
172
175
|
)
|
|
173
176
|
|
|
@@ -180,19 +183,19 @@ def _cleanup_ports_for_ingress(
|
|
|
180
183
|
# Delete services for each port
|
|
181
184
|
context = provider_config.get(
|
|
182
185
|
'context', kubernetes_utils.get_current_kube_config_context_name())
|
|
186
|
+
namespace = kubernetes_utils.get_namespace_from_config(provider_config)
|
|
183
187
|
for port in ports:
|
|
184
188
|
service_name = f'{cluster_name_on_cloud}--skypilot-svc--{port}'
|
|
185
189
|
network_utils.delete_namespaced_service(
|
|
186
190
|
context=context,
|
|
187
|
-
namespace=
|
|
188
|
-
kubernetes_utils.DEFAULT_NAMESPACE),
|
|
191
|
+
namespace=namespace,
|
|
189
192
|
service_name=service_name,
|
|
190
193
|
)
|
|
191
194
|
|
|
192
195
|
# Delete the single ingress used for all ports
|
|
193
196
|
ingress_name = f'{cluster_name_on_cloud}-skypilot-ingress'
|
|
194
197
|
network_utils.delete_namespaced_ingress(
|
|
195
|
-
namespace=
|
|
198
|
+
namespace=namespace,
|
|
196
199
|
context=kubernetes_utils.get_context_from_config(provider_config),
|
|
197
200
|
ingress_name=ingress_name,
|
|
198
201
|
)
|
sky/provision/provisioner.py
CHANGED
|
@@ -442,6 +442,14 @@ def _post_provision_setup(
|
|
|
442
442
|
cluster_name.name_on_cloud,
|
|
443
443
|
provider_config=provider_config)
|
|
444
444
|
|
|
445
|
+
# Update cluster info in handle so cluster instance ids are set. This
|
|
446
|
+
# allows us to expose provision logs to debug nodes that failed during post
|
|
447
|
+
# provision setup.
|
|
448
|
+
handle = global_user_state.get_handle_from_cluster_name(
|
|
449
|
+
cluster_name.display_name)
|
|
450
|
+
handle.cached_cluster_info = cluster_info
|
|
451
|
+
global_user_state.update_cluster_handle(cluster_name.display_name, handle)
|
|
452
|
+
|
|
445
453
|
if cluster_info.num_instances > 1:
|
|
446
454
|
# Only worker nodes have logs in the per-instance log directory. Head
|
|
447
455
|
# node's log will be redirected to the main log file.
|
sky/serve/server/server.py
CHANGED
sky/server/common.py
CHANGED
|
@@ -490,6 +490,7 @@ def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
|
|
|
490
490
|
def handle_request_error(response: 'requests.Response') -> None:
|
|
491
491
|
# Keep the original HTTPError if the response code >= 400
|
|
492
492
|
response.raise_for_status()
|
|
493
|
+
|
|
493
494
|
# Other status codes are not expected neither, e.g. we do not expect to
|
|
494
495
|
# handle redirection here.
|
|
495
496
|
if response.status_code != 200:
|
|
@@ -916,12 +917,18 @@ def reload_for_new_request(client_entrypoint: Optional[str],
|
|
|
916
917
|
client_command: Optional[str],
|
|
917
918
|
using_remote_api_server: bool, user: 'models.User',
|
|
918
919
|
request_id: str) -> None:
|
|
919
|
-
"""Reload modules, global variables, and usage message for a new request.
|
|
920
|
+
"""Reload modules, global variables, and usage message for a new request.
|
|
921
|
+
|
|
922
|
+
Must be called within the request's context.
|
|
923
|
+
"""
|
|
920
924
|
# This should be called first to make sure the logger is up-to-date.
|
|
921
925
|
sky_logging.reload_logger()
|
|
922
926
|
|
|
923
927
|
# Reload the skypilot config to make sure the latest config is used.
|
|
924
|
-
|
|
928
|
+
# We don't need to grab the lock here because this function is only
|
|
929
|
+
# run once we are inside the request's context, so there shouldn't
|
|
930
|
+
# be any race conditions when reloading the config.
|
|
931
|
+
skypilot_config.reload_config()
|
|
925
932
|
|
|
926
933
|
# Reset the client entrypoint and command for the usage message.
|
|
927
934
|
common_utils.set_request_context(
|
sky/server/constants.py
CHANGED
|
@@ -10,7 +10,7 @@ from sky.skylet import constants
|
|
|
10
10
|
# based on version info is needed.
|
|
11
11
|
# For more details and code guidelines, refer to:
|
|
12
12
|
# https://docs.skypilot.co/en/latest/developers/CONTRIBUTING.html#backward-compatibility-guidelines
|
|
13
|
-
API_VERSION =
|
|
13
|
+
API_VERSION = 21
|
|
14
14
|
|
|
15
15
|
# The minimum peer API version that the code should still work with.
|
|
16
16
|
# Notes (dev):
|
sky/server/daemons.py
CHANGED
|
@@ -38,9 +38,11 @@ class InternalRequestDaemon:
|
|
|
38
38
|
try:
|
|
39
39
|
# Refresh config within the while loop.
|
|
40
40
|
# Since this is a long running daemon,
|
|
41
|
-
#
|
|
41
|
+
# reload_for_new_request()
|
|
42
42
|
# is not called in between the event runs.
|
|
43
|
-
|
|
43
|
+
# We don't need to grab the lock here because each of the daemons
|
|
44
|
+
# run in their own process and thus have their own request context.
|
|
45
|
+
skypilot_config.reload_config()
|
|
44
46
|
# Get the configured log level for the daemon inside the event loop
|
|
45
47
|
# in case the log level changes after the API server is started.
|
|
46
48
|
level_str = skypilot_config.get_nested(
|
sky/server/requests/executor.py
CHANGED
|
@@ -214,10 +214,11 @@ class RequestWorker:
|
|
|
214
214
|
time.sleep(0.1)
|
|
215
215
|
return
|
|
216
216
|
request_id, ignore_return_value, _ = request_element
|
|
217
|
-
request = api_requests.get_request(request_id)
|
|
217
|
+
request = api_requests.get_request(request_id, fields=['status'])
|
|
218
218
|
assert request is not None, f'Request with ID {request_id} is None'
|
|
219
219
|
if request.status == api_requests.RequestStatus.CANCELLED:
|
|
220
220
|
return
|
|
221
|
+
del request
|
|
221
222
|
logger.info(f'[{self}] Submitting request: {request_id}')
|
|
222
223
|
# Start additional process to run the request, so that it can be
|
|
223
224
|
# cancelled when requested by a user.
|
|
@@ -621,8 +622,8 @@ async def _execute_request_coroutine(request: api_requests.Request):
|
|
|
621
622
|
logger.info(f'Executing request {request.request_id} in coroutine')
|
|
622
623
|
func = request.entrypoint
|
|
623
624
|
request_body = request.request_body
|
|
624
|
-
|
|
625
|
-
|
|
625
|
+
await api_requests.update_status_async(request.request_id,
|
|
626
|
+
api_requests.RequestStatus.RUNNING)
|
|
626
627
|
# Redirect stdout and stderr to the request log path.
|
|
627
628
|
original_output = ctx.redirect_log(request.log_path)
|
|
628
629
|
try:
|
|
@@ -632,7 +633,7 @@ async def _execute_request_coroutine(request: api_requests.Request):
|
|
|
632
633
|
**request_body.to_kwargs())
|
|
633
634
|
except Exception as e: # pylint: disable=broad-except
|
|
634
635
|
ctx.redirect_log(original_output)
|
|
635
|
-
api_requests.
|
|
636
|
+
await api_requests.set_request_failed_async(request.request_id, e)
|
|
636
637
|
logger.error(f'Failed to run request {request.request_id} due to '
|
|
637
638
|
f'{common_utils.format_exception(e)}')
|
|
638
639
|
return
|
|
@@ -649,14 +650,15 @@ async def _execute_request_coroutine(request: api_requests.Request):
|
|
|
649
650
|
if fut.done():
|
|
650
651
|
try:
|
|
651
652
|
result = await fut
|
|
652
|
-
api_requests.
|
|
653
|
+
await api_requests.set_request_succeeded_async(
|
|
654
|
+
request_id, result)
|
|
653
655
|
except asyncio.CancelledError:
|
|
654
656
|
# The task is cancelled by ctx.cancel(), where the status
|
|
655
657
|
# should already be set to CANCELLED.
|
|
656
658
|
pass
|
|
657
659
|
except Exception as e: # pylint: disable=broad-except
|
|
658
660
|
ctx.redirect_log(original_output)
|
|
659
|
-
api_requests.
|
|
661
|
+
await api_requests.set_request_failed_async(request_id, e)
|
|
660
662
|
logger.error(f'Request {request_id} failed due to '
|
|
661
663
|
f'{common_utils.format_exception(e)}')
|
|
662
664
|
return True
|
|
@@ -671,13 +673,13 @@ async def _execute_request_coroutine(request: api_requests.Request):
|
|
|
671
673
|
except asyncio.CancelledError:
|
|
672
674
|
# Current coroutine is cancelled due to client disconnect, set the
|
|
673
675
|
# request status for consistency.
|
|
674
|
-
api_requests.
|
|
676
|
+
await api_requests.set_request_cancelled_async(request.request_id)
|
|
675
677
|
pass
|
|
676
678
|
# pylint: disable=broad-except
|
|
677
679
|
except (Exception, KeyboardInterrupt, SystemExit) as e:
|
|
678
680
|
# Handle any other error
|
|
679
681
|
ctx.redirect_log(original_output)
|
|
680
|
-
api_requests.
|
|
682
|
+
await api_requests.set_request_failed_async(request.request_id, e)
|
|
681
683
|
logger.error(f'Request {request.request_id} interrupted due to '
|
|
682
684
|
f'unhandled exception: {common_utils.format_exception(e)}')
|
|
683
685
|
raise
|
sky/server/requests/payloads.py
CHANGED
|
@@ -363,9 +363,10 @@ class CancelBody(RequestBody):
|
|
|
363
363
|
return kwargs
|
|
364
364
|
|
|
365
365
|
|
|
366
|
-
class
|
|
366
|
+
class ProvisionLogsBody(RequestBody):
|
|
367
367
|
"""Cluster node."""
|
|
368
368
|
cluster_name: str
|
|
369
|
+
worker: Optional[int] = None
|
|
369
370
|
|
|
370
371
|
|
|
371
372
|
class ClusterJobBody(RequestBody):
|
|
@@ -90,7 +90,7 @@ class Precondition(abc.ABC):
|
|
|
90
90
|
while True:
|
|
91
91
|
if self.timeout > 0 and time.time() - start_time > self.timeout:
|
|
92
92
|
# Cancel the request on timeout.
|
|
93
|
-
api_requests.
|
|
93
|
+
await api_requests.set_request_failed_async(
|
|
94
94
|
self.request_id,
|
|
95
95
|
exceptions.RequestCancelled(
|
|
96
96
|
f'Request {self.request_id} precondition wait timed '
|
|
@@ -98,13 +98,15 @@ class Precondition(abc.ABC):
|
|
|
98
98
|
return False
|
|
99
99
|
|
|
100
100
|
# Check if the request has been cancelled
|
|
101
|
-
request = await api_requests.get_request_async(self.request_id
|
|
101
|
+
request = await api_requests.get_request_async(self.request_id,
|
|
102
|
+
fields=['status'])
|
|
102
103
|
if request is None:
|
|
103
104
|
logger.error(f'Request {self.request_id} not found')
|
|
104
105
|
return False
|
|
105
106
|
if request.status == api_requests.RequestStatus.CANCELLED:
|
|
106
107
|
logger.debug(f'Request {self.request_id} cancelled')
|
|
107
108
|
return False
|
|
109
|
+
del request
|
|
108
110
|
|
|
109
111
|
try:
|
|
110
112
|
met, status_msg = await self.check()
|
|
@@ -116,7 +118,7 @@ class Precondition(abc.ABC):
|
|
|
116
118
|
self.request_id, status_msg)
|
|
117
119
|
last_status_msg = status_msg
|
|
118
120
|
except (Exception, SystemExit, KeyboardInterrupt) as e: # pylint: disable=broad-except
|
|
119
|
-
api_requests.
|
|
121
|
+
await api_requests.set_request_failed_async(self.request_id, e)
|
|
120
122
|
logger.info(f'Request {self.request_id} failed due to '
|
|
121
123
|
f'{common_utils.format_exception(e)}')
|
|
122
124
|
return False
|
|
@@ -166,7 +168,10 @@ class ClusterStartCompletePrecondition(Precondition):
|
|
|
166
168
|
api_requests.RequestStatus.RUNNING
|
|
167
169
|
],
|
|
168
170
|
include_request_names=['sky.launch', 'sky.start'],
|
|
169
|
-
cluster_names=[self.cluster_name]
|
|
171
|
+
cluster_names=[self.cluster_name],
|
|
172
|
+
# Only get the request ID to avoid fetching the whole request.
|
|
173
|
+
# We're only interested in the count, not the whole request.
|
|
174
|
+
fields=['request_id']))
|
|
170
175
|
if len(requests) == 0:
|
|
171
176
|
# No running or pending tasks, the start process is done.
|
|
172
177
|
return True, None
|