skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/utils/yaml_utils.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""YAML utilities."""
|
|
2
|
+
import io
|
|
3
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union
|
|
4
|
+
|
|
5
|
+
from sky.adaptors import common
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
import yaml
|
|
9
|
+
else:
|
|
10
|
+
yaml = common.LazyImport('yaml')
|
|
11
|
+
|
|
12
|
+
_c_extension_unavailable = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def safe_load(stream) -> Any:
|
|
16
|
+
global _c_extension_unavailable
|
|
17
|
+
if _c_extension_unavailable:
|
|
18
|
+
return yaml.load(stream, Loader=yaml.SafeLoader)
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
return yaml.load(stream, Loader=yaml.CSafeLoader)
|
|
22
|
+
except AttributeError:
|
|
23
|
+
_c_extension_unavailable = True
|
|
24
|
+
return yaml.load(stream, Loader=yaml.SafeLoader)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def safe_load_all(stream) -> Any:
|
|
28
|
+
global _c_extension_unavailable
|
|
29
|
+
if _c_extension_unavailable:
|
|
30
|
+
return yaml.load_all(stream, Loader=yaml.SafeLoader)
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
return yaml.load_all(stream, Loader=yaml.CSafeLoader)
|
|
34
|
+
except AttributeError:
|
|
35
|
+
_c_extension_unavailable = True
|
|
36
|
+
return yaml.load_all(stream, Loader=yaml.SafeLoader)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read_yaml(path: Optional[str]) -> Dict[str, Any]:
|
|
40
|
+
if path is None:
|
|
41
|
+
raise ValueError('Attempted to read a None YAML.')
|
|
42
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
43
|
+
config = safe_load(f)
|
|
44
|
+
return config
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def read_yaml_str(yaml_str: str) -> Dict[str, Any]:
|
|
48
|
+
stream = io.StringIO(yaml_str)
|
|
49
|
+
parsed_yaml = safe_load(stream)
|
|
50
|
+
if not parsed_yaml:
|
|
51
|
+
# Empty dict
|
|
52
|
+
return {}
|
|
53
|
+
return parsed_yaml
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def read_yaml_all_str(yaml_str: str) -> List[Dict[str, Any]]:
|
|
57
|
+
stream = io.StringIO(yaml_str)
|
|
58
|
+
config = safe_load_all(stream)
|
|
59
|
+
configs = list(config)
|
|
60
|
+
if not configs:
|
|
61
|
+
# Empty YAML file.
|
|
62
|
+
return [{}]
|
|
63
|
+
return configs
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def read_yaml_all(path: str) -> List[Dict[str, Any]]:
|
|
67
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
68
|
+
return read_yaml_all_str(f.read())
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def dump_yaml(path: str,
|
|
72
|
+
config: Union[List[Dict[str, Any]], Dict[str, Any]],
|
|
73
|
+
blank: bool = False) -> None:
|
|
74
|
+
"""Dumps a YAML file.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
path: the path to the YAML file.
|
|
78
|
+
config: the configuration to dump.
|
|
79
|
+
"""
|
|
80
|
+
with open(path, 'w', encoding='utf-8') as f:
|
|
81
|
+
contents = dump_yaml_str(config)
|
|
82
|
+
if blank and isinstance(config, dict) and len(config) == 0:
|
|
83
|
+
# when dumping to yaml, an empty dict will go in as {}.
|
|
84
|
+
contents = ''
|
|
85
|
+
f.write(contents)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def dump_yaml_str(config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str:
|
|
89
|
+
"""Dumps a YAML string.
|
|
90
|
+
Args:
|
|
91
|
+
config: the configuration to dump.
|
|
92
|
+
Returns:
|
|
93
|
+
The YAML string.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
# https://github.com/yaml/pyyaml/issues/127
|
|
97
|
+
class LineBreakDumper(yaml.SafeDumper):
|
|
98
|
+
|
|
99
|
+
def write_line_break(self, data=None):
|
|
100
|
+
super().write_line_break(data)
|
|
101
|
+
if len(self.indents) == 1:
|
|
102
|
+
super().write_line_break()
|
|
103
|
+
|
|
104
|
+
if isinstance(config, list):
|
|
105
|
+
dump_func = yaml.dump_all # type: ignore
|
|
106
|
+
else:
|
|
107
|
+
dump_func = yaml.dump # type: ignore
|
|
108
|
+
return dump_func(config,
|
|
109
|
+
Dumper=LineBreakDumper,
|
|
110
|
+
sort_keys=False,
|
|
111
|
+
default_flow_style=False)
|
sky/volumes/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""SDK functions for managed jobs."""
|
|
2
|
+
import json
|
|
3
|
+
import typing
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
from sky import exceptions
|
|
7
|
+
from sky import sky_logging
|
|
8
|
+
from sky.adaptors import common as adaptors_common
|
|
9
|
+
from sky.schemas.api import responses
|
|
10
|
+
from sky.server import common as server_common
|
|
11
|
+
from sky.server import versions
|
|
12
|
+
from sky.server.requests import payloads
|
|
13
|
+
from sky.usage import usage_lib
|
|
14
|
+
from sky.utils import annotations
|
|
15
|
+
from sky.utils import context
|
|
16
|
+
from sky.utils import ux_utils
|
|
17
|
+
from sky.volumes import volume as volume_lib
|
|
18
|
+
|
|
19
|
+
if typing.TYPE_CHECKING:
|
|
20
|
+
import requests
|
|
21
|
+
else:
|
|
22
|
+
requests = adaptors_common.LazyImport('requests')
|
|
23
|
+
|
|
24
|
+
logger = sky_logging.init_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@context.contextual
|
|
28
|
+
@usage_lib.entrypoint
|
|
29
|
+
@server_common.check_server_healthy_or_start
|
|
30
|
+
@annotations.client_api
|
|
31
|
+
def apply(volume: volume_lib.Volume) -> server_common.RequestId[None]:
|
|
32
|
+
"""Creates or registers a volume.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
.. code-block:: python
|
|
36
|
+
|
|
37
|
+
import sky.volumes
|
|
38
|
+
cfg = {
|
|
39
|
+
'name': 'pvc',
|
|
40
|
+
'type': 'k8s-pvc',
|
|
41
|
+
'size': '100GB',
|
|
42
|
+
'labels': {
|
|
43
|
+
'key': 'value',
|
|
44
|
+
},
|
|
45
|
+
}
|
|
46
|
+
vol = sky.volumes.Volume.from_yaml_config(cfg)
|
|
47
|
+
request_id = sky.volumes.apply(vol)
|
|
48
|
+
sky.get(request_id)
|
|
49
|
+
|
|
50
|
+
or
|
|
51
|
+
|
|
52
|
+
import sky.volumes
|
|
53
|
+
vol = sky.volumes.Volume(
|
|
54
|
+
name='vol',
|
|
55
|
+
type='runpod-network-volume',
|
|
56
|
+
infra='runpod/ca/CA-MTL-1',
|
|
57
|
+
size='100GB',
|
|
58
|
+
)
|
|
59
|
+
request_id = sky.volumes.apply(vol)
|
|
60
|
+
sky.get(request_id)
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
volume: The volume to apply.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The request ID of the apply request.
|
|
67
|
+
"""
|
|
68
|
+
body = payloads.VolumeApplyBody(
|
|
69
|
+
name=volume.name,
|
|
70
|
+
volume_type=volume.type,
|
|
71
|
+
cloud=volume.cloud,
|
|
72
|
+
region=volume.region,
|
|
73
|
+
zone=volume.zone,
|
|
74
|
+
size=volume.size,
|
|
75
|
+
config=volume.config,
|
|
76
|
+
labels=volume.labels,
|
|
77
|
+
)
|
|
78
|
+
response = server_common.make_authenticated_request(
|
|
79
|
+
'POST', '/volumes/apply', json=json.loads(body.model_dump_json()))
|
|
80
|
+
return server_common.get_request_id(response)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@context.contextual
|
|
84
|
+
@usage_lib.entrypoint
|
|
85
|
+
@server_common.check_server_healthy_or_start
|
|
86
|
+
@annotations.client_api
|
|
87
|
+
@versions.minimal_api_version(20)
|
|
88
|
+
def validate(volume: volume_lib.Volume) -> None:
|
|
89
|
+
"""Validates the volume.
|
|
90
|
+
|
|
91
|
+
All validation is done on the server side.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
volume: The volume to validate.
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
ValueError: If the volume is invalid.
|
|
98
|
+
"""
|
|
99
|
+
body = payloads.VolumeValidateBody(
|
|
100
|
+
name=volume.name,
|
|
101
|
+
volume_type=volume.type,
|
|
102
|
+
infra=volume.infra,
|
|
103
|
+
resource_name=volume.resource_name,
|
|
104
|
+
size=volume.size,
|
|
105
|
+
config=volume.config,
|
|
106
|
+
labels=volume.labels,
|
|
107
|
+
)
|
|
108
|
+
response = server_common.make_authenticated_request(
|
|
109
|
+
'POST', '/volumes/validate', json=json.loads(body.model_dump_json()))
|
|
110
|
+
if response.status_code == 400:
|
|
111
|
+
with ux_utils.print_exception_no_traceback():
|
|
112
|
+
raise exceptions.deserialize_exception(
|
|
113
|
+
response.json().get('detail'))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@context.contextual
|
|
117
|
+
@usage_lib.entrypoint
|
|
118
|
+
@server_common.check_server_healthy_or_start
|
|
119
|
+
@annotations.client_api
|
|
120
|
+
def ls() -> server_common.RequestId[List[responses.VolumeRecord]]:
|
|
121
|
+
"""Lists all volumes.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
The request ID of the list request.
|
|
125
|
+
"""
|
|
126
|
+
response = server_common.make_authenticated_request(
|
|
127
|
+
'GET',
|
|
128
|
+
'/volumes',
|
|
129
|
+
)
|
|
130
|
+
return server_common.get_request_id(response)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@context.contextual
|
|
134
|
+
@usage_lib.entrypoint
|
|
135
|
+
@server_common.check_server_healthy_or_start
|
|
136
|
+
@annotations.client_api
|
|
137
|
+
def delete(names: List[str]) -> server_common.RequestId[None]:
|
|
138
|
+
"""Deletes volumes.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
names: List of volume names to delete.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
The request ID of the delete request.
|
|
145
|
+
"""
|
|
146
|
+
body = payloads.VolumeDeleteBody(names=names)
|
|
147
|
+
response = server_common.make_authenticated_request(
|
|
148
|
+
'POST', '/volumes/delete', json=json.loads(body.model_dump_json()))
|
|
149
|
+
return server_common.get_request_id(response)
|
|
File without changes
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""Volume management core."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Dict, Generator, List, Optional
|
|
6
|
+
import uuid
|
|
7
|
+
|
|
8
|
+
import filelock
|
|
9
|
+
|
|
10
|
+
from sky import global_user_state
|
|
11
|
+
from sky import models
|
|
12
|
+
from sky import provision
|
|
13
|
+
from sky import sky_logging
|
|
14
|
+
from sky.schemas.api import responses
|
|
15
|
+
from sky.utils import common_utils
|
|
16
|
+
from sky.utils import registry
|
|
17
|
+
from sky.utils import rich_utils
|
|
18
|
+
from sky.utils import status_lib
|
|
19
|
+
from sky.utils import ux_utils
|
|
20
|
+
|
|
21
|
+
logger = sky_logging.init_logger(__name__)
|
|
22
|
+
|
|
23
|
+
# Filelocks for the storage management.
|
|
24
|
+
VOLUME_LOCK_PATH = os.path.expanduser('~/.sky/.{volume_name}.lock')
|
|
25
|
+
VOLUME_LOCK_TIMEOUT_SECONDS = 20
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def volume_refresh():
|
|
29
|
+
"""Refreshes the volume status."""
|
|
30
|
+
volumes = global_user_state.get_volumes()
|
|
31
|
+
for volume in volumes:
|
|
32
|
+
volume_name = volume.get('name')
|
|
33
|
+
config = volume.get('handle')
|
|
34
|
+
if config is None:
|
|
35
|
+
logger.warning(f'Volume {volume_name} has no handle.'
|
|
36
|
+
'Skipping status refresh...')
|
|
37
|
+
continue
|
|
38
|
+
cloud = config.cloud
|
|
39
|
+
usedby_pods, _ = provision.get_volume_usedby(cloud, config)
|
|
40
|
+
with _volume_lock(volume_name):
|
|
41
|
+
latest_volume = global_user_state.get_volume_by_name(volume_name)
|
|
42
|
+
if latest_volume is None:
|
|
43
|
+
logger.warning(f'Volume {volume_name} not found.')
|
|
44
|
+
continue
|
|
45
|
+
status = latest_volume.get('status')
|
|
46
|
+
if not usedby_pods:
|
|
47
|
+
if status != status_lib.VolumeStatus.READY:
|
|
48
|
+
logger.info(f'Update volume {volume_name} '
|
|
49
|
+
f'status to READY')
|
|
50
|
+
global_user_state.update_volume_status(
|
|
51
|
+
volume_name, status=status_lib.VolumeStatus.READY)
|
|
52
|
+
else:
|
|
53
|
+
if status != status_lib.VolumeStatus.IN_USE:
|
|
54
|
+
logger.info(f'Update volume {volume_name} '
|
|
55
|
+
f'status to IN_USE, usedby: {usedby_pods}')
|
|
56
|
+
global_user_state.update_volume_status(
|
|
57
|
+
volume_name, status=status_lib.VolumeStatus.IN_USE)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def volume_list() -> List[responses.VolumeRecord]:
|
|
61
|
+
"""Gets the volumes.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
[
|
|
65
|
+
{
|
|
66
|
+
'name': str,
|
|
67
|
+
'type': str,
|
|
68
|
+
'launched_at': int timestamp of creation,
|
|
69
|
+
'cloud': str,
|
|
70
|
+
'region': str,
|
|
71
|
+
'zone': str,
|
|
72
|
+
'size': str,
|
|
73
|
+
'config': Dict[str, Any],
|
|
74
|
+
'name_on_cloud': str,
|
|
75
|
+
'user_hash': str,
|
|
76
|
+
'workspace': str,
|
|
77
|
+
'last_attached_at': int timestamp of last attachment,
|
|
78
|
+
'last_use': last command,
|
|
79
|
+
'status': sky.VolumeStatus,
|
|
80
|
+
'usedby_pods': List[str],
|
|
81
|
+
'usedby_clusters': List[str],
|
|
82
|
+
}
|
|
83
|
+
]
|
|
84
|
+
"""
|
|
85
|
+
with rich_utils.safe_status(ux_utils.spinner_message('Listing volumes')):
|
|
86
|
+
volumes = global_user_state.get_volumes()
|
|
87
|
+
cloud_to_configs: Dict[str, List[models.VolumeConfig]] = {}
|
|
88
|
+
for volume in volumes:
|
|
89
|
+
config = volume.get('handle')
|
|
90
|
+
if config is None:
|
|
91
|
+
volume_name = volume.get('name')
|
|
92
|
+
logger.warning(f'Volume {volume_name} has no handle.')
|
|
93
|
+
continue
|
|
94
|
+
cloud = config.cloud
|
|
95
|
+
if cloud not in cloud_to_configs:
|
|
96
|
+
cloud_to_configs[cloud] = []
|
|
97
|
+
cloud_to_configs[cloud].append(config)
|
|
98
|
+
|
|
99
|
+
cloud_to_used_by_pods, cloud_to_used_by_clusters = {}, {}
|
|
100
|
+
for cloud, configs in cloud_to_configs.items():
|
|
101
|
+
used_by_pods, used_by_clusters = provision.get_all_volumes_usedby(
|
|
102
|
+
cloud, configs)
|
|
103
|
+
cloud_to_used_by_pods[cloud] = used_by_pods
|
|
104
|
+
cloud_to_used_by_clusters[cloud] = used_by_clusters
|
|
105
|
+
|
|
106
|
+
all_users = global_user_state.get_all_users()
|
|
107
|
+
user_map = {user.id: user.name for user in all_users}
|
|
108
|
+
records = []
|
|
109
|
+
for volume in volumes:
|
|
110
|
+
volume_name = volume.get('name')
|
|
111
|
+
record = {
|
|
112
|
+
'name': volume_name,
|
|
113
|
+
'launched_at': volume.get('launched_at'),
|
|
114
|
+
'user_hash': volume.get('user_hash'),
|
|
115
|
+
'user_name': user_map.get(volume.get('user_hash'), ''),
|
|
116
|
+
'workspace': volume.get('workspace'),
|
|
117
|
+
'last_attached_at': volume.get('last_attached_at'),
|
|
118
|
+
'last_use': volume.get('last_use'),
|
|
119
|
+
'usedby_pods': [],
|
|
120
|
+
'usedby_clusters': [],
|
|
121
|
+
}
|
|
122
|
+
status = volume.get('status')
|
|
123
|
+
if status is not None:
|
|
124
|
+
record['status'] = status.value
|
|
125
|
+
else:
|
|
126
|
+
record['status'] = ''
|
|
127
|
+
config = volume.get('handle')
|
|
128
|
+
if config is None:
|
|
129
|
+
logger.warning(f'Volume {volume_name} has no handle.')
|
|
130
|
+
continue
|
|
131
|
+
cloud = config.cloud
|
|
132
|
+
usedby_pods, usedby_clusters = provision.map_all_volumes_usedby(
|
|
133
|
+
cloud,
|
|
134
|
+
cloud_to_used_by_pods[cloud],
|
|
135
|
+
cloud_to_used_by_clusters[cloud],
|
|
136
|
+
config,
|
|
137
|
+
)
|
|
138
|
+
record['type'] = config.type
|
|
139
|
+
record['cloud'] = config.cloud
|
|
140
|
+
record['region'] = config.region
|
|
141
|
+
record['zone'] = config.zone
|
|
142
|
+
record['size'] = config.size
|
|
143
|
+
record['config'] = config.config
|
|
144
|
+
record['name_on_cloud'] = config.name_on_cloud
|
|
145
|
+
record['usedby_pods'] = usedby_pods
|
|
146
|
+
record['usedby_clusters'] = usedby_clusters
|
|
147
|
+
records.append(responses.VolumeRecord(**record))
|
|
148
|
+
return records
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def volume_delete(names: List[str]) -> None:
|
|
152
|
+
"""Deletes volumes.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
names: List of volume names to delete.
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
ValueError: If the volume does not exist
|
|
159
|
+
or is in use or has no handle.
|
|
160
|
+
"""
|
|
161
|
+
with rich_utils.safe_status(ux_utils.spinner_message('Deleting volumes')):
|
|
162
|
+
for name in names:
|
|
163
|
+
volume = global_user_state.get_volume_by_name(name)
|
|
164
|
+
if volume is None:
|
|
165
|
+
raise ValueError(f'Volume {name} not found.')
|
|
166
|
+
config = volume.get('handle')
|
|
167
|
+
if config is None:
|
|
168
|
+
raise ValueError(f'Volume {name} has no handle.')
|
|
169
|
+
cloud = config.cloud
|
|
170
|
+
usedby_pods, usedby_clusters = provision.get_volume_usedby(
|
|
171
|
+
cloud, config)
|
|
172
|
+
if usedby_clusters:
|
|
173
|
+
usedby_clusters_str = ', '.join(usedby_clusters)
|
|
174
|
+
cluster_str = 'clusters' if len(
|
|
175
|
+
usedby_clusters) > 1 else 'cluster'
|
|
176
|
+
raise ValueError(f'Volume {name} is used by {cluster_str}'
|
|
177
|
+
f' {usedby_clusters_str}.')
|
|
178
|
+
if usedby_pods:
|
|
179
|
+
usedby_pods_str = ', '.join(usedby_pods)
|
|
180
|
+
pod_str = 'pods' if len(usedby_pods) > 1 else 'pod'
|
|
181
|
+
raise ValueError(
|
|
182
|
+
f'Volume {name} is used by {pod_str} {usedby_pods_str}.')
|
|
183
|
+
logger.debug(f'Deleting volume {name} with config {config}')
|
|
184
|
+
with _volume_lock(name):
|
|
185
|
+
provision.delete_volume(cloud, config)
|
|
186
|
+
global_user_state.delete_volume(name)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def volume_apply(
|
|
190
|
+
name: str,
|
|
191
|
+
volume_type: str,
|
|
192
|
+
cloud: str,
|
|
193
|
+
region: Optional[str],
|
|
194
|
+
zone: Optional[str],
|
|
195
|
+
size: Optional[str],
|
|
196
|
+
config: Dict[str, Any],
|
|
197
|
+
labels: Optional[Dict[str, str]] = None,
|
|
198
|
+
) -> None:
|
|
199
|
+
"""Creates or registers a volume.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
name: The name of the volume.
|
|
203
|
+
volume_type: The type of the volume.
|
|
204
|
+
cloud: The cloud of the volume.
|
|
205
|
+
region: The region of the volume.
|
|
206
|
+
zone: The zone of the volume.
|
|
207
|
+
size: The size of the volume.
|
|
208
|
+
config: The configuration of the volume.
|
|
209
|
+
labels: The labels of the volume.
|
|
210
|
+
|
|
211
|
+
"""
|
|
212
|
+
with rich_utils.safe_status(ux_utils.spinner_message('Creating volume')):
|
|
213
|
+
# Reuse the method for cluster name on cloud to
|
|
214
|
+
# generate the storage name on cloud.
|
|
215
|
+
cloud_obj = registry.CLOUD_REGISTRY.from_str(cloud)
|
|
216
|
+
assert cloud_obj is not None
|
|
217
|
+
region, zone = cloud_obj.validate_region_zone(region, zone)
|
|
218
|
+
name_uuid = str(uuid.uuid4())[:6]
|
|
219
|
+
name_on_cloud = common_utils.make_cluster_name_on_cloud(
|
|
220
|
+
name, max_length=cloud_obj.max_cluster_name_length())
|
|
221
|
+
name_on_cloud += '-' + name_uuid
|
|
222
|
+
config = models.VolumeConfig(
|
|
223
|
+
name=name,
|
|
224
|
+
type=volume_type,
|
|
225
|
+
cloud=str(cloud_obj),
|
|
226
|
+
region=region,
|
|
227
|
+
zone=zone,
|
|
228
|
+
size=size,
|
|
229
|
+
config=config,
|
|
230
|
+
name_on_cloud=name_on_cloud,
|
|
231
|
+
labels=labels,
|
|
232
|
+
)
|
|
233
|
+
logger.debug(
|
|
234
|
+
f'Creating volume {name} on cloud {cloud} with config {config}')
|
|
235
|
+
with _volume_lock(name):
|
|
236
|
+
current_volume = global_user_state.get_volume_by_name(name)
|
|
237
|
+
if current_volume is not None:
|
|
238
|
+
logger.info(f'Volume {name} already exists.')
|
|
239
|
+
return
|
|
240
|
+
config = provision.apply_volume(cloud, config)
|
|
241
|
+
global_user_state.add_volume(name, config,
|
|
242
|
+
status_lib.VolumeStatus.READY)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@contextlib.contextmanager
|
|
246
|
+
def _volume_lock(volume_name: str) -> Generator[None, None, None]:
|
|
247
|
+
"""Context manager for volume lock."""
|
|
248
|
+
try:
|
|
249
|
+
with filelock.FileLock(VOLUME_LOCK_PATH.format(volume_name=volume_name),
|
|
250
|
+
VOLUME_LOCK_TIMEOUT_SECONDS):
|
|
251
|
+
yield
|
|
252
|
+
except filelock.Timeout as e:
|
|
253
|
+
raise RuntimeError(
|
|
254
|
+
f'Failed to update user due to a timeout '
|
|
255
|
+
f'when trying to acquire the lock at '
|
|
256
|
+
f'{VOLUME_LOCK_PATH.format(volume_name=volume_name)}. '
|
|
257
|
+
'Please try again or manually remove the lock '
|
|
258
|
+
f'file if you believe it is stale.') from e
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""REST API for storage management."""
|
|
2
|
+
|
|
3
|
+
import fastapi
|
|
4
|
+
|
|
5
|
+
from sky import clouds
|
|
6
|
+
from sky import exceptions
|
|
7
|
+
from sky import sky_logging
|
|
8
|
+
from sky.server.requests import executor
|
|
9
|
+
from sky.server.requests import payloads
|
|
10
|
+
from sky.server.requests import request_names
|
|
11
|
+
from sky.server.requests import requests as requests_lib
|
|
12
|
+
from sky.utils import registry
|
|
13
|
+
from sky.utils import volume as volume_utils
|
|
14
|
+
from sky.volumes.server import core
|
|
15
|
+
|
|
16
|
+
logger = sky_logging.init_logger(__name__)
|
|
17
|
+
|
|
18
|
+
router = fastapi.APIRouter()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@router.get('')
|
|
22
|
+
async def volume_list(request: fastapi.Request) -> None:
|
|
23
|
+
"""Gets the volumes."""
|
|
24
|
+
auth_user = request.state.auth_user
|
|
25
|
+
auth_user_env_vars_kwargs = {
|
|
26
|
+
'env_vars': auth_user.to_env_vars()
|
|
27
|
+
} if auth_user else {}
|
|
28
|
+
request_body = payloads.RequestBody(**auth_user_env_vars_kwargs)
|
|
29
|
+
await executor.schedule_request_async(
|
|
30
|
+
request_id=request.state.request_id,
|
|
31
|
+
request_name=request_names.RequestName.VOLUME_LIST,
|
|
32
|
+
request_body=request_body,
|
|
33
|
+
func=core.volume_list,
|
|
34
|
+
schedule_type=requests_lib.ScheduleType.SHORT,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@router.post('/delete')
|
|
39
|
+
async def volume_delete(request: fastapi.Request,
|
|
40
|
+
volume_delete_body: payloads.VolumeDeleteBody) -> None:
|
|
41
|
+
"""Deletes a volume."""
|
|
42
|
+
await executor.schedule_request_async(
|
|
43
|
+
request_id=request.state.request_id,
|
|
44
|
+
request_name=request_names.RequestName.VOLUME_DELETE,
|
|
45
|
+
request_body=volume_delete_body,
|
|
46
|
+
func=core.volume_delete,
|
|
47
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@router.post('/validate')
|
|
52
|
+
async def volume_validate(
|
|
53
|
+
_: fastapi.Request,
|
|
54
|
+
volume_validate_body: payloads.VolumeValidateBody) -> None:
|
|
55
|
+
"""Validates a volume."""
|
|
56
|
+
# pylint: disable=import-outside-toplevel
|
|
57
|
+
from sky.volumes import volume as volume_lib
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
volume_config = {
|
|
61
|
+
'name': volume_validate_body.name,
|
|
62
|
+
'type': volume_validate_body.volume_type,
|
|
63
|
+
'infra': volume_validate_body.infra,
|
|
64
|
+
'size': volume_validate_body.size,
|
|
65
|
+
'labels': volume_validate_body.labels,
|
|
66
|
+
'config': volume_validate_body.config,
|
|
67
|
+
'resource_name': volume_validate_body.resource_name,
|
|
68
|
+
}
|
|
69
|
+
volume = volume_lib.Volume.from_yaml_config(volume_config)
|
|
70
|
+
volume.validate()
|
|
71
|
+
except Exception as e:
|
|
72
|
+
raise fastapi.HTTPException(status_code=400,
|
|
73
|
+
detail=exceptions.serialize_exception(e))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@router.post('/apply')
|
|
77
|
+
async def volume_apply(request: fastapi.Request,
|
|
78
|
+
volume_apply_body: payloads.VolumeApplyBody) -> None:
|
|
79
|
+
"""Creates or registers a volume."""
|
|
80
|
+
volume_cloud = volume_apply_body.cloud
|
|
81
|
+
volume_type = volume_apply_body.volume_type
|
|
82
|
+
volume_config = volume_apply_body.config
|
|
83
|
+
|
|
84
|
+
supported_volume_types = [
|
|
85
|
+
volume_type.value for volume_type in volume_utils.VolumeType
|
|
86
|
+
]
|
|
87
|
+
if volume_type not in supported_volume_types:
|
|
88
|
+
raise fastapi.HTTPException(
|
|
89
|
+
status_code=400, detail=f'Invalid volume type: {volume_type}')
|
|
90
|
+
cloud = registry.CLOUD_REGISTRY.from_str(volume_cloud)
|
|
91
|
+
if cloud is None:
|
|
92
|
+
raise fastapi.HTTPException(status_code=400,
|
|
93
|
+
detail=f'Invalid cloud: {volume_cloud}')
|
|
94
|
+
if volume_type == volume_utils.VolumeType.PVC.value:
|
|
95
|
+
if not cloud.is_same_cloud(clouds.Kubernetes()):
|
|
96
|
+
raise fastapi.HTTPException(
|
|
97
|
+
status_code=400,
|
|
98
|
+
detail='PVC storage is only supported on Kubernetes')
|
|
99
|
+
supported_access_modes = [
|
|
100
|
+
access_mode.value for access_mode in volume_utils.VolumeAccessMode
|
|
101
|
+
]
|
|
102
|
+
if volume_config is None:
|
|
103
|
+
volume_config = {}
|
|
104
|
+
access_mode = volume_config.get('access_mode')
|
|
105
|
+
if access_mode is None:
|
|
106
|
+
volume_config['access_mode'] = (
|
|
107
|
+
volume_utils.VolumeAccessMode.READ_WRITE_ONCE.value)
|
|
108
|
+
elif access_mode not in supported_access_modes:
|
|
109
|
+
raise fastapi.HTTPException(
|
|
110
|
+
status_code=400, detail=f'Invalid access mode: {access_mode}')
|
|
111
|
+
elif volume_type == volume_utils.VolumeType.RUNPOD_NETWORK_VOLUME.value:
|
|
112
|
+
if not cloud.is_same_cloud(clouds.RunPod()):
|
|
113
|
+
raise fastapi.HTTPException(
|
|
114
|
+
status_code=400,
|
|
115
|
+
detail='Runpod network volume is only supported on Runpod')
|
|
116
|
+
await executor.schedule_request_async(
|
|
117
|
+
request_id=request.state.request_id,
|
|
118
|
+
request_name=request_names.RequestName.VOLUME_APPLY,
|
|
119
|
+
request_body=volume_apply_body,
|
|
120
|
+
func=core.volume_apply,
|
|
121
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
|
122
|
+
)
|