skypilot-nightly 1.0.0.dev20250724__py3-none-any.whl → 1.0.0.dev20250726__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (73) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +28 -2
  3. sky/backends/cloud_vm_ray_backend.py +1 -1
  4. sky/client/cli/command.py +55 -16
  5. sky/clouds/aws.py +1 -1
  6. sky/clouds/azure.py +1 -1
  7. sky/clouds/cloud.py +1 -1
  8. sky/clouds/cudo.py +1 -1
  9. sky/clouds/do.py +1 -1
  10. sky/clouds/fluidstack.py +1 -1
  11. sky/clouds/gcp.py +1 -1
  12. sky/clouds/hyperbolic.py +1 -1
  13. sky/clouds/ibm.py +1 -1
  14. sky/clouds/kubernetes.py +11 -9
  15. sky/clouds/lambda_cloud.py +1 -1
  16. sky/clouds/nebius.py +1 -1
  17. sky/clouds/oci.py +1 -1
  18. sky/clouds/paperspace.py +1 -1
  19. sky/clouds/runpod.py +1 -1
  20. sky/clouds/scp.py +1 -1
  21. sky/clouds/vast.py +1 -1
  22. sky/clouds/vsphere.py +1 -1
  23. sky/dashboard/out/404.html +1 -1
  24. sky/dashboard/out/_next/static/chunks/{webpack-b6447da22305b14a.js → webpack-a305898dc479711e.js} +1 -1
  25. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  26. sky/dashboard/out/clusters/[cluster].html +1 -1
  27. sky/dashboard/out/clusters.html +1 -1
  28. sky/dashboard/out/config.html +1 -1
  29. sky/dashboard/out/index.html +1 -1
  30. sky/dashboard/out/infra/[context].html +1 -1
  31. sky/dashboard/out/infra.html +1 -1
  32. sky/dashboard/out/jobs/[job].html +1 -1
  33. sky/dashboard/out/jobs.html +1 -1
  34. sky/dashboard/out/users.html +1 -1
  35. sky/dashboard/out/volumes.html +1 -1
  36. sky/dashboard/out/workspace/new.html +1 -1
  37. sky/dashboard/out/workspaces/[name].html +1 -1
  38. sky/dashboard/out/workspaces.html +1 -1
  39. sky/global_user_state.py +12 -23
  40. sky/jobs/state.py +12 -24
  41. sky/jobs/utils.py +5 -0
  42. sky/provision/kubernetes/utils.py +9 -0
  43. sky/provision/kubernetes/volume.py +1 -1
  44. sky/resources.py +1 -1
  45. sky/server/common.py +9 -0
  46. sky/server/server.py +3 -0
  47. sky/skylet/job_lib.py +4 -0
  48. sky/skylet/log_lib.py +5 -3
  49. sky/task.py +1 -1
  50. sky/templates/aws-ray.yml.j2 +7 -1
  51. sky/templates/azure-ray.yml.j2 +1 -1
  52. sky/templates/do-ray.yml.j2 +1 -1
  53. sky/templates/kubernetes-ray.yml.j2 +6 -0
  54. sky/templates/lambda-ray.yml.j2 +1 -1
  55. sky/templates/nebius-ray.yml.j2 +1 -1
  56. sky/templates/paperspace-ray.yml.j2 +1 -1
  57. sky/templates/runpod-ray.yml.j2 +1 -1
  58. sky/utils/db/migration_utils.py +60 -19
  59. sky/utils/schemas.py +14 -1
  60. sky/utils/volume.py +78 -0
  61. sky/volumes/__init__.py +13 -0
  62. sky/volumes/client/sdk.py +19 -2
  63. sky/volumes/server/server.py +1 -1
  64. sky/volumes/utils.py +1 -1
  65. sky/volumes/volume.py +0 -73
  66. {skypilot_nightly-1.0.0.dev20250724.dist-info → skypilot_nightly-1.0.0.dev20250726.dist-info}/METADATA +13 -2
  67. {skypilot_nightly-1.0.0.dev20250724.dist-info → skypilot_nightly-1.0.0.dev20250726.dist-info}/RECORD +73 -72
  68. /sky/dashboard/out/_next/static/{BURfWrKsQk9psMPv0OXrh → lVqpIJvikPZAX3dgsm6_q}/_buildManifest.js +0 -0
  69. /sky/dashboard/out/_next/static/{BURfWrKsQk9psMPv0OXrh → lVqpIJvikPZAX3dgsm6_q}/_ssgManifest.js +0 -0
  70. {skypilot_nightly-1.0.0.dev20250724.dist-info → skypilot_nightly-1.0.0.dev20250726.dist-info}/WHEEL +0 -0
  71. {skypilot_nightly-1.0.0.dev20250724.dist-info → skypilot_nightly-1.0.0.dev20250726.dist-info}/entry_points.txt +0 -0
  72. {skypilot_nightly-1.0.0.dev20250724.dist-info → skypilot_nightly-1.0.0.dev20250726.dist-info}/licenses/LICENSE +0 -0
  73. {skypilot_nightly-1.0.0.dev20250724.dist-info → skypilot_nightly-1.0.0.dev20250726.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b6447da22305b14a.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-eab0e9c16b64fd9f.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-9fa11dafb5cad4a6.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-2abbd0352f8ee061.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-8e0b2055bf5dd499.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/%5Bjob%5D-dc0299ffefebcdbe.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/[job]","query":{},"buildId":"BURfWrKsQk9psMPv0OXrh","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-a305898dc479711e.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-eab0e9c16b64fd9f.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-9fa11dafb5cad4a6.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-2abbd0352f8ee061.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-8e0b2055bf5dd499.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs/%5Bjob%5D-dc0299ffefebcdbe.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs/[job]","query":{},"buildId":"lVqpIJvikPZAX3dgsm6_q","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b6447da22305b14a.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-49f790d12a85027c.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"BURfWrKsQk9psMPv0OXrh","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-a305898dc479711e.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/jobs-49f790d12a85027c.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/jobs","query":{},"buildId":"lVqpIJvikPZAX3dgsm6_q","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b6447da22305b14a.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-6790fcefd5487b13.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"BURfWrKsQk9psMPv0OXrh","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-a305898dc479711e.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/users-6790fcefd5487b13.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/users","query":{},"buildId":"lVqpIJvikPZAX3dgsm6_q","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b6447da22305b14a.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-61ea7ba7e56f8d06.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"BURfWrKsQk9psMPv0OXrh","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-a305898dc479711e.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-61ea7ba7e56f8d06.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"lVqpIJvikPZAX3dgsm6_q","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b6447da22305b14a.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"BURfWrKsQk9psMPv0OXrh","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-a305898dc479711e.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"lVqpIJvikPZAX3dgsm6_q","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b6447da22305b14a.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1559-18717d96ef2fcbe9.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-eab0e9c16b64fd9f.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-9fa11dafb5cad4a6.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-2abbd0352f8ee061.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f64e03df359e04f7.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-8e0b2055bf5dd499.js" defer=""></script><script src="/dashboard/_next/static/chunks/1043-869d9c78bf5dd3df.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-d4a381403a8bae91.js" defer=""></script><script src="/dashboard/_next/static/chunks/938-7ee806653aef0609.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-e49a159c30a6c4a7.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-6bcd4b20914d76c9.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"BURfWrKsQk9psMPv0OXrh","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-a305898dc479711e.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-162f3033ffcd3d31.js" defer=""></script><script src="/dashboard/_next/static/chunks/5230-df791914b54d91d9.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-5ea3ffa10fc884f2.js" defer=""></script><script src="/dashboard/_next/static/chunks/1664-d65361e92b85e786.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/1559-18717d96ef2fcbe9.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-eab0e9c16b64fd9f.js" defer=""></script><script src="/dashboard/_next/static/chunks/3698-9fa11dafb5cad4a6.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-2abbd0352f8ee061.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f64e03df359e04f7.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-8e0b2055bf5dd499.js" defer=""></script><script src="/dashboard/_next/static/chunks/1043-869d9c78bf5dd3df.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-d4a381403a8bae91.js" defer=""></script><script src="/dashboard/_next/static/chunks/938-7ee806653aef0609.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-e49a159c30a6c4a7.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-6bcd4b20914d76c9.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"lVqpIJvikPZAX3dgsm6_q","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b6447da22305b14a.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/BURfWrKsQk9psMPv0OXrh/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"BURfWrKsQk9psMPv0OXrh","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/b3227360726f12eb.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/b3227360726f12eb.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-a305898dc479711e.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-efc06c2733009cd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-c0a4f1ea606d48d2.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-da491665d4289aae.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/lVqpIJvikPZAX3dgsm6_q/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"lVqpIJvikPZAX3dgsm6_q","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
sky/global_user_state.py CHANGED
@@ -9,7 +9,6 @@ Concepts:
9
9
  import functools
10
10
  import json
11
11
  import os
12
- import pathlib
13
12
  import pickle
14
13
  import re
15
14
  import time
@@ -237,35 +236,25 @@ def create_table(engine: sqlalchemy.engine.Engine):
237
236
  # If the database is locked, it is OK to continue, as the WAL mode
238
237
  # is not critical and is likely to be enabled by other processes.
239
238
 
240
- # Get alembic config for state db and run migrations
241
- alembic_config = migration_utils.get_alembic_config(
242
- engine, migration_utils.GLOBAL_USER_STATE_DB_NAME)
243
- # pylint: disable=line-too-long
244
- alembic_config.config_ini_section = migration_utils.GLOBAL_USER_STATE_DB_NAME
245
239
  migration_utils.safe_alembic_upgrade(
246
- engine, alembic_config, migration_utils.GLOBAL_USER_STATE_VERSION)
240
+ engine, migration_utils.GLOBAL_USER_STATE_DB_NAME,
241
+ migration_utils.GLOBAL_USER_STATE_VERSION)
247
242
 
248
243
 
249
244
  def initialize_and_get_db() -> sqlalchemy.engine.Engine:
250
245
  global _SQLALCHEMY_ENGINE
246
+
251
247
  if _SQLALCHEMY_ENGINE is not None:
252
248
  return _SQLALCHEMY_ENGINE
253
- with migration_utils.db_lock(migration_utils.GLOBAL_USER_STATE_DB_NAME):
254
- if _SQLALCHEMY_ENGINE is None:
255
- conn_string = None
256
- if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
257
- conn_string = skypilot_config.get_nested(('db',), None)
258
- if conn_string:
259
- logger.debug(f'using db URI from {conn_string}')
260
- engine = sqlalchemy.create_engine(conn_string,
261
- poolclass=sqlalchemy.NullPool)
262
- else:
263
- db_path = os.path.expanduser('~/.sky/state.db')
264
- pathlib.Path(db_path).parents[0].mkdir(parents=True,
265
- exist_ok=True)
266
- engine = sqlalchemy.create_engine('sqlite:///' + db_path)
267
- create_table(engine)
268
- _SQLALCHEMY_ENGINE = engine
249
+
250
+ # get an engine to the db
251
+ engine = migration_utils.get_engine('state')
252
+
253
+ # run migrations if needed
254
+ create_table(engine)
255
+
256
+ # return engine
257
+ _SQLALCHEMY_ENGINE = engine
269
258
  return _SQLALCHEMY_ENGINE
270
259
 
271
260
 
sky/jobs/state.py CHANGED
@@ -4,8 +4,6 @@
4
4
  import enum
5
5
  import functools
6
6
  import json
7
- import os
8
- import pathlib
9
7
  import time
10
8
  import typing
11
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
@@ -20,7 +18,6 @@ from sqlalchemy.ext import declarative
20
18
 
21
19
  from sky import exceptions
22
20
  from sky import sky_logging
23
- from sky import skypilot_config
24
21
  from sky.skylet import constants
25
22
  from sky.utils import common_utils
26
23
  from sky.utils.db import db_utils
@@ -129,34 +126,25 @@ def create_table(engine: sqlalchemy.engine.Engine):
129
126
  # If the database is locked, it is OK to continue, as the WAL mode
130
127
  # is not critical and is likely to be enabled by other processes.
131
128
 
132
- # Get alembic config for spot jobs db and run migrations
133
- alembic_config = migration_utils.get_alembic_config(
134
- engine, migration_utils.SPOT_JOBS_DB_NAME)
135
- alembic_config.config_ini_section = migration_utils.SPOT_JOBS_DB_NAME
136
- migration_utils.safe_alembic_upgrade(engine, alembic_config,
129
+ migration_utils.safe_alembic_upgrade(engine,
130
+ migration_utils.SPOT_JOBS_DB_NAME,
137
131
  migration_utils.SPOT_JOBS_VERSION)
138
132
 
139
133
 
140
134
  def initialize_and_get_db() -> sqlalchemy.engine.Engine:
141
135
  global _SQLALCHEMY_ENGINE
136
+
142
137
  if _SQLALCHEMY_ENGINE is not None:
143
138
  return _SQLALCHEMY_ENGINE
144
- with migration_utils.db_lock(migration_utils.SPOT_JOBS_DB_NAME):
145
- if _SQLALCHEMY_ENGINE is None:
146
- conn_string = None
147
- if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
148
- conn_string = skypilot_config.get_nested(('db',), None)
149
- if conn_string:
150
- logger.debug(f'using db URI from {conn_string}')
151
- engine = sqlalchemy.create_engine(conn_string,
152
- poolclass=sqlalchemy.NullPool)
153
- else:
154
- db_path = os.path.expanduser('~/.sky/spot_jobs.db')
155
- pathlib.Path(db_path).parents[0].mkdir(parents=True,
156
- exist_ok=True)
157
- engine = sqlalchemy.create_engine('sqlite:///' + db_path)
158
- create_table(engine)
159
- _SQLALCHEMY_ENGINE = engine
139
+
140
+ # get an engine to the db
141
+ engine = migration_utils.get_engine('spot_jobs')
142
+
143
+ # run migrations if needed
144
+ create_table(engine)
145
+
146
+ # return engine
147
+ _SQLALCHEMY_ENGINE = engine
160
148
  return _SQLALCHEMY_ENGINE
161
149
 
162
150
 
sky/jobs/utils.py CHANGED
@@ -733,6 +733,11 @@ def stream_logs_by_id(job_id: int,
733
733
  start_streaming = True
734
734
  if start_streaming:
735
735
  print(line, end='', flush=True)
736
+ # Add the "Job finished" message for terminal states
737
+ if managed_job_status.is_terminal():
738
+ print(ux_utils.finishing_message(
739
+ f'Job finished (status: {managed_job_status.value}).'),
740
+ flush=True)
736
741
  return '', exceptions.JobExitCode.from_managed_job_status(
737
742
  managed_job_status)
738
743
  return (f'{colorama.Fore.YELLOW}'
@@ -73,6 +73,7 @@ class KubernetesHighPerformanceNetworkType(enum.Enum):
73
73
  (A4/A3 Ultra instances)
74
74
  - NEBIUS: Nebius clusters with InfiniBand support for high-throughput,
75
75
  low-latency networking
76
+ - COREWEAVE: CoreWeave clusters with InfiniBand support.
76
77
  - NONE: Standard clusters without specialized networking optimizations
77
78
 
78
79
  The network configurations align with corresponding VM-based
@@ -86,6 +87,7 @@ class KubernetesHighPerformanceNetworkType(enum.Enum):
86
87
  GCP_TCPXO = 'gcp_tcpxo'
87
88
  GCP_GPUDIRECT_RDMA = 'gcp_gpudirect_rdma'
88
89
  NEBIUS = 'nebius'
90
+ COREWEAVE = 'coreweave'
89
91
  NONE = 'none'
90
92
 
91
93
  def get_network_env_vars(self) -> Dict[str, str]:
@@ -97,6 +99,13 @@ class KubernetesHighPerformanceNetworkType(enum.Enum):
97
99
  'UCX_NET_DEVICES': ('mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_3:1,'
98
100
  'mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1')
99
101
  }
102
+ elif self == KubernetesHighPerformanceNetworkType.COREWEAVE:
103
+ return {
104
+ 'NCCL_SOCKET_IFNAME': 'eth0',
105
+ 'NCCL_IB_HCA': 'ibp',
106
+ 'UCX_NET_DEVICES': ('ibp0:1,ibp1:1,ibp2:1,ibp3:1,'
107
+ 'ibp4:1,ibp5:1,ibp6:1,ibp7:1')
108
+ }
100
109
  else:
101
110
  # GCP clusters and generic clusters - environment variables are
102
111
  # handled directly in the template
@@ -8,7 +8,7 @@ from sky.adaptors import kubernetes
8
8
  from sky.provision.kubernetes import config as config_lib
9
9
  from sky.provision.kubernetes import constants as k8s_constants
10
10
  from sky.provision.kubernetes import utils as kubernetes_utils
11
- from sky.volumes import volume as volume_lib
11
+ from sky.utils import volume as volume_lib
12
12
 
13
13
  logger = sky_logging.init_logger(__name__)
14
14
 
sky/resources.py CHANGED
@@ -32,7 +32,7 @@ from sky.utils import schemas
32
32
  from sky.utils import ux_utils
33
33
 
34
34
  if typing.TYPE_CHECKING:
35
- from sky.volumes import volume as volume_lib
35
+ from sky.utils import volume as volume_lib
36
36
 
37
37
  logger = sky_logging.init_logger(__name__)
38
38
 
sky/server/common.py CHANGED
@@ -13,12 +13,14 @@ import shutil
13
13
  import subprocess
14
14
  import sys
15
15
  import tempfile
16
+ import threading
16
17
  import time
17
18
  import typing
18
19
  from typing import Any, Dict, Literal, Optional, Tuple, Union
19
20
  from urllib import parse
20
21
  import uuid
21
22
 
23
+ import cachetools
22
24
  import colorama
23
25
  import filelock
24
26
 
@@ -276,6 +278,10 @@ def _handle_non_200_server_status(
276
278
  return ApiServerInfo(status=ApiServerStatus.UNHEALTHY)
277
279
 
278
280
 
281
+ @cachetools.cached(cache=cachetools.TTLCache(maxsize=10,
282
+ ttl=5.0,
283
+ timer=time.time),
284
+ lock=threading.RLock())
279
285
  def get_api_server_status(endpoint: Optional[str] = None) -> ApiServerInfo:
280
286
  """Retrieve the status of the API server.
281
287
 
@@ -413,6 +419,7 @@ def _start_api_server(deploy: bool = False,
413
419
  server_url = get_server_url(host)
414
420
  assert server_url in AVAILABLE_LOCAL_API_SERVER_URLS, (
415
421
  f'server url {server_url} is not a local url')
422
+
416
423
  with rich_utils.client_status('Starting SkyPilot API server, '
417
424
  f'view logs at {constants.API_SERVER_LOGS}'):
418
425
  logger.info(f'{colorama.Style.DIM}Failed to connect to '
@@ -488,6 +495,8 @@ def _start_api_server(deploy: bool = False,
488
495
  'SkyPilot API server process exited unexpectedly.\n'
489
496
  f'View logs at: {constants.API_SERVER_LOGS}')
490
497
  try:
498
+ # Clear the cache to ensure fresh checks during startup
499
+ get_api_server_status.cache_clear() # type: ignore
491
500
  check_server_healthy()
492
501
  except exceptions.APIVersionMismatchError:
493
502
  raise
sky/server/server.py CHANGED
@@ -1764,6 +1764,9 @@ if __name__ == '__main__':
1764
1764
 
1765
1765
  from sky.server import uvicorn as skyuvicorn
1766
1766
 
1767
+ # Initialize global user state db
1768
+ global_user_state.initialize_and_get_db()
1769
+ # Initialize request db
1767
1770
  requests_lib.reset_db_and_logs()
1768
1771
 
1769
1772
  parser = argparse.ArgumentParser()
sky/skylet/job_lib.py CHANGED
@@ -1185,6 +1185,10 @@ class JobLibCodeGen:
1185
1185
  # and older did not have JobExitCode, so we use 0 for those versions
1186
1186
  # TODO: Remove this special handling after 0.10.0.
1187
1187
  'exit_code = exceptions.JobExitCode.from_job_status(job_status) if getattr(constants, "SKYLET_LIB_VERSION", 1) > 2 else 0',
1188
+ # Fix for dashboard: When follow=False and job is still running (NOT_FINISHED=101),
1189
+ # exit with success (0) since fetching current logs is a successful operation.
1190
+ # This prevents shell wrappers from printing "command terminated with exit code 101".
1191
+ f'exit_code = 0 if not {follow} and exit_code == 101 else exit_code',
1188
1192
  'sys.exit(exit_code)',
1189
1193
  ]
1190
1194
  return cls._build(code)
sky/skylet/log_lib.py CHANGED
@@ -544,9 +544,11 @@ def tail_logs(job_id: Optional[int],
544
544
  if start_streaming:
545
545
  print(line, end='', flush=True)
546
546
  status_str = status.value if status is not None else 'None'
547
- print(ux_utils.finishing_message(
548
- f'Job finished (status: {status_str}).'),
549
- flush=True)
547
+ # Only show "Job finished" for actually terminal states
548
+ if status is not None and status.is_terminal():
549
+ print(ux_utils.finishing_message(
550
+ f'Job finished (status: {status_str}).'),
551
+ flush=True)
550
552
  except FileNotFoundError:
551
553
  print(f'{colorama.Fore.RED}ERROR: Logs for job {job_id} (status:'
552
554
  f' {status.value}) does not exist.{colorama.Style.RESET_ALL}')
sky/task.py CHANGED
@@ -24,7 +24,7 @@ from sky.skylet import constants
24
24
  from sky.utils import common_utils
25
25
  from sky.utils import schemas
26
26
  from sky.utils import ux_utils
27
- from sky.volumes import volume as volume_lib
27
+ from sky.utils import volume as volume_lib
28
28
 
29
29
  if typing.TYPE_CHECKING:
30
30
  import yaml
@@ -19,7 +19,7 @@ docker:
19
19
  username: |-
20
20
  {{docker_login_config.username}}
21
21
  password: |-
22
- {{docker_login_config.password}}
22
+ {{docker_login_config.password | indent(6) }}
23
23
  server: |-
24
24
  {{docker_login_config.server}}
25
25
  {%- endif %}
@@ -131,6 +131,12 @@ available_node_types:
131
131
  - systemctl disable apt-daily.timer apt-daily-upgrade.timer unattended-upgrades.service
132
132
  - systemctl mask apt-daily.service apt-daily-upgrade.service unattended-upgrades.service
133
133
  - systemctl daemon-reload
134
+ {%- if runcmd %}
135
+ runcmd:
136
+ {%- for cmd in runcmd %}
137
+ - {{cmd}}
138
+ {%- endfor %}
139
+ {%- endif %}
134
140
  TagSpecifications:
135
141
  - ResourceType: instance
136
142
  Tags:
@@ -19,7 +19,7 @@ docker:
19
19
  username: |-
20
20
  {{docker_login_config.username}}
21
21
  password: |-
22
- {{docker_login_config.password}}
22
+ {{docker_login_config.password | indent(6) }}
23
23
  server: |-
24
24
  {{docker_login_config.server}}
25
25
  {%- endif %}
@@ -19,7 +19,7 @@ docker:
19
19
  username: |-
20
20
  {{docker_login_config.username}}
21
21
  password: |-
22
- {{docker_login_config.password}}
22
+ {{docker_login_config.password | indent(6) }}
23
23
  server: |-
24
24
  {{docker_login_config.server}}
25
25
  {%- endif %}
@@ -1008,12 +1008,18 @@ available_node_types:
1008
1008
  # https://cloud.google.com/kubernetes-engine/docs/concepts/tpus#how_tpus_work
1009
1009
  {{k8s_resource_key}}: {{accelerator_count}}
1010
1010
  {% endif %}
1011
+ {% if k8s_network_type == 'coreweave' %}
1012
+ rdma/ib: 1
1013
+ {% endif %}
1011
1014
  {% if k8s_resource_key is not none %}
1012
1015
  limits:
1013
1016
  # Limits need to be defined for GPU/TPU requests
1014
1017
  {% if k8s_resource_key is not none %}
1015
1018
  {{k8s_resource_key}}: {{accelerator_count}}
1016
1019
  {% endif %}
1020
+ {% if k8s_network_type == 'coreweave' %}
1021
+ rdma/ib: 1
1022
+ {% endif %}
1017
1023
  {% endif %}
1018
1024
  {% if k8s_ipc_lock_capability %}
1019
1025
  securityContext:
@@ -19,7 +19,7 @@ docker:
19
19
  username: |-
20
20
  {{docker_login_config.username}}
21
21
  password: |-
22
- {{docker_login_config.password}}
22
+ {{docker_login_config.password | indent(6) }}
23
23
  server: |-
24
24
  {{docker_login_config.server}}
25
25
  {%- endif %}
@@ -25,7 +25,7 @@ docker:
25
25
  username: |-
26
26
  {{docker_login_config.username}}
27
27
  password: |-
28
- {{docker_login_config.password}}
28
+ {{docker_login_config.password | indent(6) }}
29
29
  server: |-
30
30
  {{docker_login_config.server}}
31
31
  {%- endif %}
@@ -19,7 +19,7 @@ docker:
19
19
  username: |-
20
20
  {{docker_login_config.username}}
21
21
  password: |-
22
- {{docker_login_config.password}}
22
+ {{docker_login_config.password | indent(6) }}
23
23
  server: |-
24
24
  {{docker_login_config.server}}
25
25
  {%- endif %}
@@ -20,7 +20,7 @@ provider:
20
20
  username: |-
21
21
  {{docker_login_config.username}}
22
22
  password: |-
23
- {{docker_login_config.password}}
23
+ {{docker_login_config.password | indent(6) }}
24
24
  server: |-
25
25
  {{docker_login_config.server}}
26
26
  {%- endif %}
@@ -3,6 +3,7 @@
3
3
  import contextlib
4
4
  import logging
5
5
  import os
6
+ import pathlib
6
7
 
7
8
  from alembic import command as alembic_command
8
9
  from alembic.config import Config
@@ -10,6 +11,12 @@ from alembic.runtime import migration
10
11
  import filelock
11
12
  import sqlalchemy
12
13
 
14
+ from sky import sky_logging
15
+ from sky import skypilot_config
16
+ from sky.skylet import constants
17
+
18
+ logger = sky_logging.init_logger(__name__)
19
+
13
20
  DB_INIT_LOCK_TIMEOUT_SECONDS = 10
14
21
 
15
22
  GLOBAL_USER_STATE_DB_NAME = 'state_db'
@@ -21,6 +28,21 @@ SPOT_JOBS_VERSION = '001'
21
28
  SPOT_JOBS_LOCK_PATH = '~/.sky/locks/.spot_jobs_db.lock'
22
29
 
23
30
 
31
+ def get_engine(db_name: str):
32
+ conn_string = None
33
+ if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
34
+ conn_string = skypilot_config.get_nested(('db',), None)
35
+ if conn_string:
36
+ logger.debug(f'using db URI from {conn_string}')
37
+ engine = sqlalchemy.create_engine(conn_string,
38
+ poolclass=sqlalchemy.NullPool)
39
+ else:
40
+ db_path = os.path.expanduser(f'~/.sky/{db_name}.db')
41
+ pathlib.Path(db_path).parents[0].mkdir(parents=True, exist_ok=True)
42
+ engine = sqlalchemy.create_engine('sqlite:///' + db_path)
43
+ return engine
44
+
45
+
24
46
  @contextlib.contextmanager
25
47
  def db_lock(db_name: str):
26
48
  lock_path = os.path.expanduser(f'~/.sky/locks/.{db_name}.lock')
@@ -37,7 +59,6 @@ def db_lock(db_name: str):
37
59
 
38
60
  def get_alembic_config(engine: sqlalchemy.engine.Engine, section: str):
39
61
  """Get Alembic configuration for the given section"""
40
- # Use the alembic.ini file from setup_files (included in wheel)
41
62
  # From sky/utils/db/migration_utils.py -> sky/setup_files/alembic.ini
42
63
  alembic_ini_path = os.path.join(
43
64
  os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
@@ -47,31 +68,29 @@ def get_alembic_config(engine: sqlalchemy.engine.Engine, section: str):
47
68
  # Override the database URL to match SkyPilot's current connection
48
69
  # Use render_as_string to get the full URL with password
49
70
  url = engine.url.render_as_string(hide_password=False)
71
+ # Replace % with %% to escape the % character in the URL
72
+ # set_section_option uses variable interpolation, which treats % as a
73
+ # special character.
74
+ # any '%' symbol not used for interpolation needs to be escaped.
75
+ url = url.replace('%', '%%')
50
76
  alembic_cfg.set_section_option(section, 'sqlalchemy.url', url)
51
77
 
52
78
  return alembic_cfg
53
79
 
54
80
 
55
- def safe_alembic_upgrade(engine: sqlalchemy.engine.Engine,
56
- alembic_config: Config, target_revision: str):
57
- """Only upgrade if current version is older than target.
58
-
59
- This handles the case where a database was created with a newer version of
60
- the code and we're now running older code. Since our migrations are purely
61
- additive, it's safe to run a newer database with older code.
81
+ def needs_upgrade(engine: sqlalchemy.engine.Engine, section: str,
82
+ target_revision: str):
83
+ """Check if the database needs to be upgraded.
62
84
 
63
85
  Args:
64
86
  engine: SQLAlchemy engine for the database
65
- alembic_config: Alembic configuration object
87
+ section: Alembic section to upgrade (e.g., 'state_db' or 'spot_jobs_db')
66
88
  target_revision: Target revision to upgrade to (e.g., '001')
67
89
  """
68
- # set alembic logger to warning level
69
- alembic_logger = logging.getLogger('alembic')
70
- alembic_logger.setLevel(logging.WARNING)
71
-
72
90
  current_rev = None
73
91
 
74
- # Get the current revision from the database
92
+ # get alembic config for the given section
93
+ alembic_config = get_alembic_config(engine, section)
75
94
  version_table = alembic_config.get_section_option(
76
95
  alembic_config.config_ini_section, 'version_table', 'alembic_version')
77
96
 
@@ -81,13 +100,35 @@ def safe_alembic_upgrade(engine: sqlalchemy.engine.Engine,
81
100
  current_rev = context.get_current_revision()
82
101
 
83
102
  if current_rev is None:
84
- alembic_command.upgrade(alembic_config, target_revision)
85
- return
103
+ return True
86
104
 
87
105
  # Compare revisions - assuming they are numeric strings like '001', '002'
88
106
  current_rev_num = int(current_rev)
89
107
  target_rev_num = int(target_revision)
90
108
 
91
- # only upgrade if current revision is older than target revision
92
- if current_rev_num < target_rev_num:
93
- alembic_command.upgrade(alembic_config, target_revision)
109
+ return current_rev_num < target_rev_num
110
+
111
+
112
+ def safe_alembic_upgrade(engine: sqlalchemy.engine.Engine, section: str,
113
+ target_revision: str):
114
+ """Upgrade the database if needed. Uses a file lock to ensure
115
+ that only one process tries to upgrade the database at a time.
116
+
117
+ Args:
118
+ engine: SQLAlchemy engine for the database
119
+ section: Alembic section to upgrade (e.g., 'state_db' or 'spot_jobs_db')
120
+ target_revision: Target revision to upgrade to (e.g., '001')
121
+ """
122
+ # set alembic logger to warning level
123
+ alembic_logger = logging.getLogger('alembic')
124
+ alembic_logger.setLevel(logging.WARNING)
125
+
126
+ alembic_config = get_alembic_config(engine, section)
127
+
128
+ # only acquire lock if db needs upgrade
129
+ if needs_upgrade(engine, section, target_revision):
130
+ with db_lock(section):
131
+ # check again if db needs upgrade in case another
132
+ # process upgraded it while we were waiting for the lock
133
+ if needs_upgrade(engine, section, target_revision):
134
+ alembic_command.upgrade(alembic_config, target_revision)