PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20250812__py3-none-any.whl → 1.0.0.dev20250814__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20250812py3-none-any.whl → 1.0.0.dev20250814py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (102) hide show

sky/__init__.py +4 -2
sky/backends/backend_utils.py +69 -6
sky/backends/cloud_vm_ray_backend.py +156 -25
sky/catalog/cudo_catalog.py +1 -1
sky/catalog/data_fetchers/fetch_cudo.py +1 -1
sky/catalog/data_fetchers/fetch_nebius.py +6 -3
sky/client/cli/command.py +40 -77
sky/client/common.py +1 -1
sky/client/sdk.py +19 -19
sky/client/sdk_async.py +5 -4
sky/clouds/aws.py +52 -1
sky/clouds/kubernetes.py +14 -0
sky/dag.py +1 -0
sky/dashboard/out/404.html +1 -1
sky/dashboard/out/_next/static/{Fuy7OzApYTUMz2QgoP7dP → Y0eNlwi85qGRecLTin11y}/_buildManifest.js +1 -1
sky/dashboard/out/_next/static/chunks/{6989-6129c1cfbcf51063.js → 6989-37611fe6b86d274d.js} +1 -1
sky/dashboard/out/_next/static/chunks/pages/{_app-491a4d699d95e808.js → _app-c2ea34fda4f1f8c8.js} +1 -1
sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-f5ccf5d39d87aebe.js → [pool]-664c36eda967b1ba.js} +1 -1
sky/dashboard/out/_next/static/chunks/{webpack-7fd0cf9dbecff10f.js → webpack-00c0a51d21157453.js} +1 -1
sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
sky/dashboard/out/clusters/[cluster].html +1 -1
sky/dashboard/out/clusters.html +1 -1
sky/dashboard/out/config.html +1 -1
sky/dashboard/out/index.html +1 -1
sky/dashboard/out/infra/[context].html +1 -1
sky/dashboard/out/infra.html +1 -1
sky/dashboard/out/jobs/[job].html +1 -1
sky/dashboard/out/jobs/pools/[pool].html +1 -1
sky/dashboard/out/jobs.html +1 -1
sky/dashboard/out/users.html +1 -1
sky/dashboard/out/volumes.html +1 -1
sky/dashboard/out/workspace/new.html +1 -1
sky/dashboard/out/workspaces/[name].html +1 -1
sky/dashboard/out/workspaces.html +1 -1
sky/data/storage.py +11 -1
sky/exceptions.py +5 -0
sky/global_user_state.py +63 -7
sky/jobs/constants.py +1 -1
sky/jobs/controller.py +0 -1
sky/jobs/recovery_strategy.py +3 -3
sky/jobs/scheduler.py +23 -68
sky/jobs/server/core.py +18 -12
sky/jobs/state.py +6 -2
sky/jobs/utils.py +8 -0
sky/provision/__init__.py +1 -0
sky/provision/aws/config.py +9 -0
sky/provision/aws/instance.py +36 -13
sky/provision/azure/instance.py +2 -0
sky/provision/cudo/cudo_wrapper.py +1 -1
sky/provision/cudo/instance.py +2 -0
sky/provision/do/instance.py +2 -0
sky/provision/fluidstack/instance.py +2 -0
sky/provision/gcp/instance.py +2 -0
sky/provision/hyperbolic/instance.py +2 -1
sky/provision/kubernetes/instance.py +133 -0
sky/provision/lambda_cloud/instance.py +2 -0
sky/provision/nebius/instance.py +2 -0
sky/provision/oci/instance.py +2 -0
sky/provision/paperspace/instance.py +2 -1
sky/provision/paperspace/utils.py +1 -1
sky/provision/runpod/instance.py +2 -0
sky/provision/runpod/utils.py +1 -1
sky/provision/scp/instance.py +2 -0
sky/provision/vast/instance.py +2 -0
sky/provision/vsphere/instance.py +2 -0
sky/resources.py +1 -2
sky/schemas/__init__.py +0 -0
sky/schemas/api/__init__.py +0 -0
sky/schemas/api/responses.py +70 -0
sky/schemas/generated/__init__.py +0 -0
sky/schemas/generated/autostopv1_pb2.py +36 -0
sky/schemas/generated/autostopv1_pb2.pyi +43 -0
sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
sky/serve/constants.py +3 -7
sky/serve/replica_managers.py +15 -16
sky/serve/serve_state.py +10 -0
sky/serve/serve_utils.py +21 -20
sky/serve/server/impl.py +15 -19
sky/serve/service.py +31 -16
sky/server/server.py +20 -14
sky/setup_files/dependencies.py +11 -10
sky/skylet/autostop_lib.py +38 -5
sky/skylet/constants.py +3 -1
sky/skylet/services.py +44 -0
sky/skylet/skylet.py +49 -4
sky/task.py +19 -16
sky/templates/aws-ray.yml.j2 +2 -2
sky/templates/jobs-controller.yaml.j2 +6 -0
sky/utils/command_runner.py +1 -1
sky/utils/config_utils.py +29 -5
sky/utils/controller_utils.py +73 -0
sky/utils/db/db_utils.py +17 -0
sky/utils/schemas.py +3 -0
sky/volumes/server/core.py +2 -2
sky/volumes/server/server.py +2 -2
{skypilot_nightly-1.0.0.dev20250812.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/METADATA +5 -7
{skypilot_nightly-1.0.0.dev20250812.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/RECORD +102 -94
/sky/dashboard/out/_next/static/{Fuy7OzApYTUMz2QgoP7dP → Y0eNlwi85qGRecLTin11y}/_ssgManifest.js +0 -0
{skypilot_nightly-1.0.0.dev20250812.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/WHEEL +0 -0
{skypilot_nightly-1.0.0.dev20250812.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/entry_points.txt +0 -0
{skypilot_nightly-1.0.0.dev20250812.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/licenses/LICENSE +0 -0
{skypilot_nightly-1.0.0.dev20250812.dist-info → skypilot_nightly-1.0.0.dev20250814.dist-info}/top_level.txt +0 -0

sky/dashboard/out/users.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-~~7fd0cf9dbecff10f~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-~~491a4d699d95e808~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-7ed36e44e779d5c7.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"~~Fuy7OzApYTUMz2QgoP7dP~~","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1	+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-00c0a51d21157453.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c2ea34fda4f1f8c8.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-7ed36e44e779d5c7.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"Y0eNlwi85qGRecLTin11y","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>

sky/dashboard/out/volumes.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-~~7fd0cf9dbecff10f~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-~~491a4d699d95e808~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"~~Fuy7OzApYTUMz2QgoP7dP~~","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1	+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-00c0a51d21157453.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c2ea34fda4f1f8c8.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"Y0eNlwi85qGRecLTin11y","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>

sky/dashboard/out/workspace/new.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-~~7fd0cf9dbecff10f~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-~~491a4d699d95e808~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"~~Fuy7OzApYTUMz2QgoP7dP~~","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1	+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-00c0a51d21157453.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c2ea34fda4f1f8c8.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"Y0eNlwi85qGRecLTin11y","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>

sky/dashboard/out/workspaces/[name].html CHANGED Viewed

	@@ -1 +1 @@
1	- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-~~7fd0cf9dbecff10f~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-~~491a4d699d95e808~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1559-6c00e20454194859.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-~~6129c1cfbcf51063~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-0f886f16e0d55ff8.js" defer=""></script><script src="/dashboard/_next/static/chunks/8056-5bdeda81199c0def.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/9159-11421c0f2909236f.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-a8a8f1adba34c892.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-f72f73bcef9541dc.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"~~Fuy7OzApYTUMz2QgoP7dP~~","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1	+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-00c0a51d21157453.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c2ea34fda4f1f8c8.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1559-6c00e20454194859.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-37611fe6b86d274d.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-c9686994ddafcf01.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-0f886f16e0d55ff8.js" defer=""></script><script src="/dashboard/_next/static/chunks/8056-5bdeda81199c0def.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-85426374db04811e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/9159-11421c0f2909236f.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-a8a8f1adba34c892.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-f72f73bcef9541dc.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"Y0eNlwi85qGRecLTin11y","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>

sky/dashboard/out/workspaces.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-~~7fd0cf9dbecff10f~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-~~491a4d699d95e808~~.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-8f67be60165724cc.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/~~Fuy7OzApYTUMz2QgoP7dP~~/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"~~Fuy7OzApYTUMz2QgoP7dP~~","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1	+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-00c0a51d21157453.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-c2ea34fda4f1f8c8.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-8f67be60165724cc.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/Y0eNlwi85qGRecLTin11y/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"Y0eNlwi85qGRecLTin11y","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>

sky/data/storage.py CHANGED Viewed

@@ -4510,9 +4510,19 @@ class R2Store(S3CompatibleStore):
             extra_cli_args=['--checksum-algorithm', 'CRC32'],  # R2 specific
             cloud_name=cloudflare.NAME,
             default_region='auto',
-            mount_cmd_factory=mounting_utils.get_r2_mount_cmd,
+            mount_cmd_factory=cls._get_r2_mount_cmd,
         )
+    @classmethod
+    def _get_r2_mount_cmd(cls, bucket_name: str, mount_path: str,
+                          bucket_sub_path: Optional[str]) -> str:
+        """Factory method for R2 mount command."""
+        endpoint_url = cloudflare.create_endpoint()
+        return mounting_utils.get_r2_mount_cmd(cloudflare.R2_CREDENTIALS_PATH,
+                                               cloudflare.R2_PROFILE_NAME,
+                                               endpoint_url, bucket_name,
+                                               mount_path, bucket_sub_path)
     def mount_cached_command(self, mount_path: str) -> str:
         """R2-specific cached mount implementation using rclone."""
         install_cmd = mounting_utils.get_rclone_install_cmd()

sky/exceptions.py CHANGED Viewed

@@ -651,3 +651,8 @@ class RequestInterruptedError(Exception):
     this error is raised.
     """
     pass
+class SkyletInternalError(Exception):
+    """Raised when a Skylet internal error occurs."""
+    pass

sky/global_user_state.py CHANGED Viewed

@@ -645,13 +645,32 @@ def add_cluster_event(cluster_name: str,
                       new_status: Optional[status_lib.ClusterStatus],
                       reason: str,
                       event_type: ClusterEventType,
-                      nop_if_duplicate: bool = False) -> None:
+                      nop_if_duplicate: bool = False,
+                      duplicate_regex: Optional[str] = None,
+                      expose_duplicate_error: bool = False,
+                      transitioned_at: Optional[int] = None) -> None:
+    """Add a cluster event.
+    Args:
+        cluster_name: Name of the cluster.
+        new_status: New status of the cluster.
+        reason: Reason for the event.
+        event_type: Type of the event.
+        nop_if_duplicate: If True, do not add the event if it is a duplicate.
+        duplicate_regex: If provided, do not add the event if it matches the
+            regex. Only used if nop_if_duplicate is True.
+        expose_duplicate_error: If True, raise an error if the event is a
+            duplicate. Only used if nop_if_duplicate is True.
+        transitioned_at: If provided, use this timestamp for the event.
+    """
     assert _SQLALCHEMY_ENGINE is not None
     cluster_hash = _get_hash_for_existing_cluster(cluster_name)
     if cluster_hash is None:
         logger.debug(f'Hash for cluster {cluster_name} not found. '
                      'Skipping event.')
         return
+    if transitioned_at is None:
+        transitioned_at = int(time.time())
     with orm.Session(_SQLALCHEMY_ENGINE) as session:
         if (_SQLALCHEMY_ENGINE.dialect.name ==
                 db_utils.SQLAlchemyDialect.SQLITE.value):
@@ -669,7 +688,10 @@ def add_cluster_event(cluster_name: str,
         if nop_if_duplicate:
             last_event = get_last_cluster_event(cluster_hash,
                                                 event_type=event_type)
-            if last_event == reason:
+            if duplicate_regex is not None and last_event is not None:
+                if re.search(duplicate_regex, last_event):
+                    return
+            elif last_event == reason:
                 return
         try:
             session.execute(
@@ -679,15 +701,20 @@ def add_cluster_event(cluster_name: str,
                     starting_status=last_status,
                     ending_status=new_status.value if new_status else None,
                     reason=reason,
-                    transitioned_at=int(time.time()),
+                    transitioned_at=transitioned_at,
                     type=event_type.value,
                 ))
             session.commit()
         except sqlalchemy.exc.IntegrityError as e:
             if 'UNIQUE constraint failed' in str(e):
                 # This can happen if the cluster event is added twice.
-                # We can ignore this error.
-                pass
+                # We can ignore this error unless the caller requests
+                # to expose the error.
+                if expose_duplicate_error:
+                    raise db_utils.UniqueConstraintViolationError(
+                        value=reason, message=str(e))
+                else:
+                    pass
             else:
                 raise e
@@ -704,6 +731,35 @@ def get_last_cluster_event(cluster_hash: str,
     return row.reason
+def get_cluster_events(cluster_name: Optional[str], cluster_hash: Optional[str],
+                       event_type: ClusterEventType) -> List[str]:
+    """Returns the cluster events for the cluster.
+    Args:
+        cluster_name: Name of the cluster. Cannot be specified if cluster_hash
+            is specified.
+        cluster_hash: Hash of the cluster. Cannot be specified if cluster_name
+            is specified.
+        event_type: Type of the event.
+    """
+    assert _SQLALCHEMY_ENGINE is not None
+    if cluster_name is not None and cluster_hash is not None:
+        raise ValueError('Cannot specify both cluster_name and cluster_hash')
+    if cluster_name is None and cluster_hash is None:
+        raise ValueError('Must specify either cluster_name or cluster_hash')
+    if cluster_name is not None:
+        cluster_hash = _get_hash_for_existing_cluster(cluster_name)
+        if cluster_hash is None:
+            raise ValueError(f'Hash for cluster {cluster_name} not found.')
+    with orm.Session(_SQLALCHEMY_ENGINE) as session:
+        rows = session.query(cluster_event_table).filter_by(
+            cluster_hash=cluster_hash, type=event_type.value).order_by(
+                cluster_event_table.c.transitioned_at.asc()).all()
+    return [row.reason for row in rows]
 def _get_user_hash_or_current_user(user_hash: Optional[str]) -> str:
     """Returns the user hash or the current user hash, if user_hash is None.
@@ -1245,9 +1301,9 @@ def get_clusters_from_history(
 def get_cluster_names_start_with(starts_with: str) -> List[str]:
     assert _SQLALCHEMY_ENGINE is not None
     with orm.Session(_SQLALCHEMY_ENGINE) as session:
-        rows = session.query(cluster_table).filter(
+        rows = session.query(cluster_table.c.name).filter(
             cluster_table.c.name.like(f'{starts_with}%')).all()
-    return [row.name for row in rows]
+    return [row[0] for row in rows]
 @_init_db

sky/jobs/constants.py CHANGED Viewed

@@ -47,7 +47,7 @@ JOBS_CLUSTER_NAME_PREFIX_LENGTH = 25
 # The version of the lib files that jobs/utils use. Whenever there is an API
 # change for the jobs/utils, we need to bump this version and update
 # job.utils.ManagedJobCodeGen to handle the version update.
-MANAGED_JOBS_VERSION = 7
+MANAGED_JOBS_VERSION = 8
 # The command for setting up the jobs dashboard on the controller. It firstly
 # checks if the systemd services are available, and if not (e.g., Kubernetes

sky/jobs/controller.py CHANGED Viewed

@@ -30,7 +30,6 @@ from sky.jobs import recovery_strategy
 from sky.jobs import scheduler
 from sky.jobs import state as managed_job_state
 from sky.jobs import utils as managed_job_utils
-from sky.serve import serve_utils
 from sky.skylet import constants
 from sky.skylet import job_lib
 from sky.usage import usage_lib

sky/jobs/recovery_strategy.py CHANGED Viewed

@@ -10,8 +10,8 @@ import traceback
 import typing
 from typing import Optional
-import sky
 from sky import backends
+from sky import dag as dag_lib
 from sky import exceptions
 from sky import execution
 from sky import global_user_state
@@ -61,7 +61,7 @@ class StrategyExecutor:
         """
         assert isinstance(backend, backends.CloudVmRayBackend), (
             'Only CloudVMRayBackend is supported.')
-        self.dag = sky.Dag()
+        self.dag = dag_lib.Dag()
         self.dag.add(task)
         # For jobs submitted to a pool, the cluster name might change after each
         # recovery. Initially this is set to an empty string to indicate that no
@@ -447,7 +447,7 @@ class StrategyExecutor:
                 # We retry immediately for worker pool, since no sky.launch()
                 # is called and the overhead is minimal.
                 gap_seconds = (backoff.current_backoff()
-                               if self.pool is None else 0)
+                               if self.pool is None else 1)
                 logger.info('Retrying to launch the cluster in '
                             f'{gap_seconds:.1f} seconds.')
                 time.sleep(gap_seconds)

sky/jobs/scheduler.py CHANGED Viewed

@@ -15,13 +15,14 @@ following section for more details).
 The scheduling logic limits #running jobs according to three limits:
 1. The number of jobs that can be launching (that is, STARTING or RECOVERING) at
-   once, based on the number of CPUs. (See _get_launch_parallelism.) This the
-   most compute-intensive part of the job lifecycle, which is why we have an
-   additional limit.
+   once, based on the number of CPUs. This the most compute-intensive part of
+   the job lifecycle, which is why we have an additional limit.
+   See sky/utils/controller_utils.py::_get_launch_parallelism.
 2. The number of jobs that can be running at any given time, based on the amount
-   of memory. (See _get_job_parallelism.) Since the job controller is doing very
-   little once a job starts (just checking its status periodically), the most
-   significant resource it consumes is memory.
+   of memory. Since the job controller is doing very little once a job starts
+   (just checking its status periodically), the most significant resource it
+   consumes is memory.
+   See sky/utils/controller_utils.py::_get_job_parallelism.
 3. The number of jobs that can be running in a pool at any given time, based on
    the number of ready workers in the pool. (See _can_start_new_job.)
@@ -42,55 +43,27 @@ Nomenclature:
 from argparse import ArgumentParser
 import contextlib
-from functools import lru_cache
 import os
 import sys
 import time
-import typing
 from typing import Optional
 import filelock
 from sky import exceptions
 from sky import sky_logging
-from sky.adaptors import common as adaptors_common
 from sky.jobs import constants as managed_job_constants
 from sky.jobs import state
 from sky.serve import serve_utils
 from sky.skylet import constants
 from sky.utils import common_utils
+from sky.utils import controller_utils
 from sky.utils import subprocess_utils
-if typing.TYPE_CHECKING:
-    import psutil
-else:
-    psutil = adaptors_common.LazyImport('psutil')
 logger = sky_logging.init_logger('sky.jobs.controller')
-# The _MANAGED_JOB_SCHEDULER_LOCK should be held whenever we are checking the
-# parallelism control or updating the schedule_state of any job.
-# Any code that takes this lock must conclude by calling
-# maybe_schedule_next_jobs.
-_MANAGED_JOB_SCHEDULER_LOCK = '~/.sky/locks/managed_job_scheduler.lock'
 _ALIVE_JOB_LAUNCH_WAIT_INTERVAL = 0.5
-# Based on testing, assume a running job uses 350MB memory.
-JOB_MEMORY_MB = 350
-# Past 2000 simultaneous jobs, we become unstable.
-# See https://github.com/skypilot-org/skypilot/issues/4649.
-MAX_JOB_LIMIT = 2000
-# Number of ongoing launches launches allowed per CPU.
-LAUNCHES_PER_CPU = 4
-@lru_cache(maxsize=1)
-def _get_lock_path() -> str:
-    # TODO(tian): Per pool lock.
-    path = os.path.expanduser(_MANAGED_JOB_SCHEDULER_LOCK)
-    os.makedirs(os.path.dirname(path), exist_ok=True)
-    return path
 def _start_controller(job_id: int, dag_yaml_path: str, env_file_path: str,
                       pool: Optional[str]) -> None:
@@ -163,7 +136,8 @@ def maybe_schedule_next_jobs(pool: Optional[str] = None) -> None:
         # parallelism control. If we cannot obtain the lock, exit immediately.
         # The current lock holder is expected to launch any jobs it can before
         # releasing the lock.
-        with filelock.FileLock(_get_lock_path(), blocking=False):
+        with filelock.FileLock(controller_utils.get_resources_lock_path(),
+                               blocking=False):
             while True:
                 maybe_next_job = state.get_waiting_job(pool)
                 if maybe_next_job is None:
@@ -184,7 +158,8 @@ def maybe_schedule_next_jobs(pool: Optional[str] = None) -> None:
                 # an ALIVE_WAITING job, but we would be able to launch a WAITING
                 # job.
                 if current_state == state.ManagedJobScheduleState.ALIVE_WAITING:
-                    if not _can_lauch_in_alive_job():
+                    if not (controller_utils.can_provision() or
+                            actual_pool is not None):
                         # Can't schedule anything, break from scheduling loop.
                         break
                 elif current_state == state.ManagedJobScheduleState.WAITING:
@@ -234,7 +209,7 @@ def submit_job(job_id: int, dag_yaml_path: str, original_user_yaml_path: str,
     The user hash should be set (e.g. via SKYPILOT_USER_ID) before calling this.
     """
-    with filelock.FileLock(_get_lock_path()):
+    with filelock.FileLock(controller_utils.get_resources_lock_path()):
         is_resume = state.scheduler_set_waiting(job_id, dag_yaml_path,
                                                 original_user_yaml_path,
                                                 env_file_path,
@@ -286,11 +261,11 @@ def scheduled_launch(job_id: int):
     except exceptions.NoClusterLaunchedError:
         # NoClusterLaunchedError is indicates that the job is in retry backoff.
         # We should transition to ALIVE_BACKOFF instead of ALIVE.
-        with filelock.FileLock(_get_lock_path()):
+        with filelock.FileLock(controller_utils.get_resources_lock_path()):
             state.scheduler_set_alive_backoff(job_id)
         raise
     else:
-        with filelock.FileLock(_get_lock_path()):
+        with filelock.FileLock(controller_utils.get_resources_lock_path()):
             state.scheduler_set_alive(job_id)
     finally:
         maybe_schedule_next_jobs(pool)
@@ -310,56 +285,36 @@ def job_done(job_id: int, idempotent: bool = False) -> None:
         return
     pool = state.get_pool_from_job_id(job_id)
-    with filelock.FileLock(_get_lock_path()):
+    with filelock.FileLock(controller_utils.get_resources_lock_path()):
         state.scheduler_set_done(job_id, idempotent)
     maybe_schedule_next_jobs(pool)
 def _set_alive_waiting(job_id: int) -> None:
     """Should use wait_until_launch_okay() to transition to this state."""
-    with filelock.FileLock(_get_lock_path()):
+    with filelock.FileLock(controller_utils.get_resources_lock_path()):
         state.scheduler_set_alive_waiting(job_id)
     pool = state.get_pool_from_job_id(job_id)
     maybe_schedule_next_jobs(pool)
-def _get_job_parallelism() -> int:
-    job_memory = JOB_MEMORY_MB * 1024 * 1024
-    job_limit = min(psutil.virtual_memory().total // job_memory, MAX_JOB_LIMIT)
-    return max(job_limit, 1)
-def _get_launch_parallelism() -> int:
-    cpus = os.cpu_count()
-    return cpus * LAUNCHES_PER_CPU if cpus is not None else 1
 def _can_start_new_job(pool: Optional[str]) -> bool:
-    launching_jobs = state.get_num_launching_jobs()
-    alive_jobs = state.get_num_alive_jobs()
     # Check basic resource limits
-    if not (launching_jobs < _get_launch_parallelism() and
-            alive_jobs < _get_job_parallelism()):
+    # Pool jobs don't need to provision resources, so we skip the check.
+    if not ((controller_utils.can_provision() or pool is not None) and
+            controller_utils.can_start_new_process()):
         return False
-    # Check if there are available replicas in the pool
+    # Check if there are available workers in the pool
     if pool is not None:
         alive_jobs_in_pool = state.get_num_alive_jobs(pool)
-        if alive_jobs_in_pool >= serve_utils.num_replicas(pool):
-            logger.debug(f'No replicas available in pool {pool}')
+        if alive_jobs_in_pool >= len(serve_utils.get_ready_replicas(pool)):
+            logger.debug(f'No READY workers available in pool {pool}')
             return False
     return True
-def _can_lauch_in_alive_job() -> bool:
-    launching_jobs = state.get_num_launching_jobs()
-    return launching_jobs < _get_launch_parallelism()
 if __name__ == '__main__':
     parser = ArgumentParser()
     parser.add_argument('dag_yaml',

sky/jobs/server/core.py CHANGED Viewed

@@ -93,8 +93,8 @@ def _upload_files_to_controller(dag: 'sky.Dag') -> Dict[str, str]:
     return local_to_controller_file_mounts
-def _maybe_submit_job_locally(prefix: str, dag: 'sky.Dag', pool: Optional[str],
-                              num_jobs: Optional[int]) -> Optional[List[int]]:
+def _maybe_submit_job_locally(prefix: str, dag: 'sky.Dag',
+                              num_jobs: int) -> Optional[List[int]]:
     """Submit the managed job locally if in consolidation mode.
     In normal mode the managed job submission is done in the ray job submission.
@@ -109,12 +109,13 @@ def _maybe_submit_job_locally(prefix: str, dag: 'sky.Dag', pool: Optional[str],
     # Create local directory for the managed job.
     pathlib.Path(prefix).expanduser().mkdir(parents=True, exist_ok=True)
     job_ids = []
+    pool = dag.pool
     pool_hash = None
     if pool is not None:
         pool_hash = serve_state.get_service_hash(pool)
         # Already checked in the sdk.
         assert pool_hash is not None, f'Pool {pool} not found'
-    for _ in range(num_jobs if num_jobs is not None else 1):
+    for _ in range(num_jobs):
         # TODO(tian): We should have a separate name for each job when
         # submitting multiple jobs. Current blocker is that we are sharing
         # the same dag object for all jobs. Maybe we can do copy.copy() for
@@ -172,9 +173,6 @@ def launch(
       handle: Optional[backends.ResourceHandle]; handle to the controller VM.
         None if dryrun.
     """
-    if pool is not None and not managed_job_utils.is_consolidation_mode():
-        with ux_utils.print_exception_no_traceback():
-            raise ValueError('pool is only supported in consolidation mode.')
     entrypoint = task
     # using hasattr instead of isinstance to avoid importing sky
     if hasattr(task, 'metadata'):
@@ -295,8 +293,13 @@ def launch(
         controller=controller,
         task_resources=sum([list(t.resources) for t in dag.tasks], []))
+    num_jobs = num_jobs if num_jobs is not None else 1
+    # We do this assignment after applying the admin policy, so that we don't
+    # need to serialize the pool name in the dag. The dag object will be
+    # preserved. See sky/admin_policy.py::MutatedUserRequest::decode.
+    dag.pool = pool
     consolidation_mode_job_ids = _maybe_submit_job_locally(
-        prefix, dag, pool, num_jobs)
+        prefix, dag, num_jobs)
     # This is only needed for non-consolidation mode. For consolidation
     # mode, the controller uses the same catalog as API server.
@@ -373,8 +376,8 @@ def launch(
             controller_task._metadata = metadata
             job_identity = ''
-            if consolidation_mode_job_id is not None:
-                job_identity = f' (Job ID: {consolidation_mode_job_id})'
+            if job_rank is not None:
+                job_identity = f' (rank: {job_rank})'
             logger.info(f'{colorama.Fore.YELLOW}'
                         f'Launching managed job {dag.name!r}{job_identity} '
                         f'from jobs controller...{colorama.Style.RESET_ALL}')
@@ -428,14 +431,17 @@ def launch(
                     backend.run_on_head(local_handle, run_script)
                     return consolidation_mode_job_id, local_handle
-    if consolidation_mode_job_ids is None:
-        return _submit_one()
     if pool is None:
+        if consolidation_mode_job_ids is None:
+            return _submit_one()
         assert len(consolidation_mode_job_ids) == 1
         return _submit_one(consolidation_mode_job_ids[0])
     ids = []
     all_handle = None
-    for job_rank, job_id in enumerate(consolidation_mode_job_ids):
+    for job_rank in range(num_jobs):
+        job_id = (consolidation_mode_job_ids[job_rank]
+                  if consolidation_mode_job_ids is not None else None)
         jid, handle = _submit_one(job_id, job_rank)
         assert jid is not None, (job_id, handle)
         ids.append(jid)

sky/jobs/state.py CHANGED Viewed

@@ -441,7 +441,8 @@ class ManagedJobScheduleState(enum.Enum):
 # === Status transition functions ===
 @_init_db
-def set_job_info(job_id: int, name: str, workspace: str, entrypoint: str):
+def set_job_info(job_id: int, name: str, workspace: str, entrypoint: str,
+                 pool: Optional[str], pool_hash: Optional[str]):
     assert _SQLALCHEMY_ENGINE is not None
     with orm.Session(_SQLALCHEMY_ENGINE) as session:
         if (_SQLALCHEMY_ENGINE.dialect.name ==
@@ -457,7 +458,10 @@ def set_job_info(job_id: int, name: str, workspace: str, entrypoint: str):
             name=name,
             schedule_state=ManagedJobScheduleState.INACTIVE.value,
             workspace=workspace,
-            entrypoint=entrypoint)
+            entrypoint=entrypoint,
+            pool=pool,
+            pool_hash=pool_hash,
+        )
         session.execute(insert_stmt)
         session.commit()

sky/jobs/utils.py CHANGED Viewed

@@ -1690,6 +1690,7 @@ class ManagedJobCodeGen:
     def set_pending(cls, job_id: int, managed_job_dag: 'dag_lib.Dag',
                     workspace: str, entrypoint: str) -> str:
         dag_name = managed_job_dag.name
+        pool = managed_job_dag.pool
         # Add the managed job to queue table.
         code = textwrap.dedent(f"""\
             set_job_info_kwargs = {{'workspace': {workspace!r}}}
@@ -1697,6 +1698,13 @@ class ManagedJobCodeGen:
                 set_job_info_kwargs = {{}}
             if managed_job_version >= 5:
                 set_job_info_kwargs['entrypoint'] = {entrypoint!r}
+            if managed_job_version >= 8:
+                from sky.serve import serve_state
+                pool_hash = None
+                if {pool!r} != None:
+                    pool_hash = serve_state.get_service_hash({pool!r})
+                set_job_info_kwargs['pool'] = {pool!r}
+                set_job_info_kwargs['pool_hash'] = pool_hash
             managed_job_state.set_job_info(
                 {job_id}, {dag_name!r}, **set_job_info_kwargs)
             """)

sky/provision/__init__.py CHANGED Viewed

@@ -73,6 +73,7 @@ def _route_to_cloud_impl(func):
 @_route_to_cloud_impl
 def query_instances(
     provider_name: str,
+    cluster_name: str,
     cluster_name_on_cloud: str,
     provider_config: Optional[Dict[str, Any]] = None,
     non_terminated_only: bool = True,

sky/provision/aws/config.py CHANGED Viewed

@@ -19,6 +19,7 @@ import colorama
 from sky import exceptions
 from sky import sky_logging
 from sky.adaptors import aws
+from sky.clouds import aws as aws_cloud
 from sky.provision import common
 from sky.provision.aws import utils
 from sky.utils import annotations
@@ -103,6 +104,14 @@ def bootstrap_instances(
         security_group_ids = _configure_security_group(ec2, vpc_id,
                                                        expected_sg_name,
                                                        extended_ip_rules)
+        if expected_sg_name != aws_cloud.DEFAULT_SECURITY_GROUP_NAME:
+            # Ensure the default security group is created. This is needed
+            # to enable us to use the default security group to quickly
+            # delete the cluster. If the default security group is not created,
+            # we will need to block on instance termination to delete the
+            # security group.
+            _configure_security_group(ec2, vpc_id,
+                                      aws_cloud.DEFAULT_SECURITY_GROUP_NAME, [])
         end_time = time.time()
         elapsed = end_time - start_time
         logger.info(

skypilot-nightly 1.0.0.dev20250812__py3-none-any.whl → 1.0.0.dev20250814__py3-none-any.whl

Potentially problematic release.

skypilot-nightly 1.0.0.dev20250812py3-none-any.whl → 1.0.0.dev20250814py3-none-any.whl