skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/workspaces/core.py
ADDED
|
@@ -0,0 +1,655 @@
|
|
|
1
|
+
"""Workspace management core."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Callable, Dict, List, Tuple
|
|
5
|
+
|
|
6
|
+
import filelock
|
|
7
|
+
|
|
8
|
+
from sky import check as sky_check
|
|
9
|
+
from sky import exceptions
|
|
10
|
+
from sky import models
|
|
11
|
+
from sky import sky_logging
|
|
12
|
+
from sky import skypilot_config
|
|
13
|
+
from sky.backends import backend_utils
|
|
14
|
+
from sky.skylet import constants
|
|
15
|
+
from sky.usage import usage_lib
|
|
16
|
+
from sky.users import permission
|
|
17
|
+
from sky.users import rbac
|
|
18
|
+
from sky.utils import annotations
|
|
19
|
+
from sky.utils import common_utils
|
|
20
|
+
from sky.utils import config_utils
|
|
21
|
+
from sky.utils import locks
|
|
22
|
+
from sky.utils import resource_checker
|
|
23
|
+
from sky.utils import schemas
|
|
24
|
+
from sky.workspaces import utils as workspaces_utils
|
|
25
|
+
|
|
26
|
+
logger = sky_logging.init_logger(__name__)
|
|
27
|
+
|
|
28
|
+
# Lock for workspace configuration updates to prevent race conditions
|
|
29
|
+
_WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS = 60
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class WorkspaceConfigComparison:
|
|
34
|
+
"""Result of comparing current and new workspace configurations.
|
|
35
|
+
|
|
36
|
+
This class encapsulates the results of analyzing differences between
|
|
37
|
+
workspace configurations, particularly focusing on user access changes
|
|
38
|
+
and their implications for resource validation.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
only_user_access_changes: True if only allowed_users or private changed
|
|
42
|
+
private_changed: True if private setting changed
|
|
43
|
+
private_old: Old private setting value
|
|
44
|
+
private_new: New private setting value
|
|
45
|
+
allowed_users_changed: True if allowed_users changed
|
|
46
|
+
allowed_users_old: Old allowed users list
|
|
47
|
+
allowed_users_new: New allowed users list
|
|
48
|
+
removed_users: Users removed from allowed_users
|
|
49
|
+
added_users: Users added to allowed_users
|
|
50
|
+
"""
|
|
51
|
+
only_user_access_changes: bool
|
|
52
|
+
private_changed: bool
|
|
53
|
+
private_old: bool
|
|
54
|
+
private_new: bool
|
|
55
|
+
allowed_users_changed: bool
|
|
56
|
+
allowed_users_old: List[str]
|
|
57
|
+
allowed_users_new: List[str]
|
|
58
|
+
removed_users: List[str]
|
|
59
|
+
added_users: List[str]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# =========================
|
|
63
|
+
# = Workspace Management =
|
|
64
|
+
# =========================
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_workspaces() -> Dict[str, Any]:
|
|
68
|
+
"""Returns the workspace config."""
|
|
69
|
+
return workspaces_for_user(common_utils.get_current_user().id)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _update_workspaces_config(
|
|
73
|
+
workspace_modifier_fn: Callable[[Dict[str, Any]],
|
|
74
|
+
None]) -> Dict[str, Any]:
|
|
75
|
+
"""Update the workspaces configuration in the config file.
|
|
76
|
+
|
|
77
|
+
This function uses file locking to prevent race conditions when multiple
|
|
78
|
+
processes try to update the workspace configuration simultaneously.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
workspace_modifier_fn: A function that takes the current workspaces
|
|
82
|
+
dict and modifies it in-place. This ensures all read-modify-write
|
|
83
|
+
operations happen atomically inside the lock.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
The updated workspaces configuration.
|
|
87
|
+
"""
|
|
88
|
+
lock_path = skypilot_config.get_skypilot_config_lock_path()
|
|
89
|
+
try:
|
|
90
|
+
with filelock.FileLock(lock_path,
|
|
91
|
+
_WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
|
|
92
|
+
# Read the current config inside the lock to ensure we have
|
|
93
|
+
# the latest state
|
|
94
|
+
current_config = skypilot_config.to_dict()
|
|
95
|
+
current_workspaces = current_config.get('workspaces', {}).copy()
|
|
96
|
+
|
|
97
|
+
# Apply the modification inside the lock
|
|
98
|
+
workspace_modifier_fn(current_workspaces)
|
|
99
|
+
|
|
100
|
+
# Update the config with the modified workspaces
|
|
101
|
+
current_config['workspaces'] = current_workspaces
|
|
102
|
+
|
|
103
|
+
# Write the configuration back to the file
|
|
104
|
+
skypilot_config.update_api_server_config_no_lock(current_config)
|
|
105
|
+
|
|
106
|
+
return current_workspaces
|
|
107
|
+
except filelock.Timeout as e:
|
|
108
|
+
raise RuntimeError(
|
|
109
|
+
f'Failed to update workspace configuration due to a timeout '
|
|
110
|
+
f'when trying to acquire the lock at {lock_path}. This may '
|
|
111
|
+
'indicate another SkyPilot process is currently updating the '
|
|
112
|
+
'configuration. Please try again or manually remove the lock '
|
|
113
|
+
f'file if you believe it is stale.') from e
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _validate_workspace_config(workspace_name: str,
|
|
117
|
+
workspace_config: Dict[str, Any]) -> None:
|
|
118
|
+
"""Validate the workspace configuration.
|
|
119
|
+
"""
|
|
120
|
+
workspace_schema = schemas.get_config_schema(
|
|
121
|
+
)['properties']['workspaces']['additionalProperties']
|
|
122
|
+
try:
|
|
123
|
+
common_utils.validate_schema(
|
|
124
|
+
workspace_config, workspace_schema,
|
|
125
|
+
f'Invalid configuration for workspace {workspace_name!r}: ')
|
|
126
|
+
except exceptions.InvalidSkyPilotConfigError as e:
|
|
127
|
+
# We need to replace this exception with a ValueError because: a) it is
|
|
128
|
+
# more user-friendly and b) it will not be caught by the try-except by
|
|
129
|
+
# the caller the may cause confusion.
|
|
130
|
+
raise ValueError(str(e)) from e
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _compare_workspace_configs(
|
|
134
|
+
current_config: Dict[str, Any],
|
|
135
|
+
new_config: Dict[str, Any],
|
|
136
|
+
) -> WorkspaceConfigComparison:
|
|
137
|
+
"""Compare current and new workspace configurations.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
current_config: The current workspace configuration.
|
|
141
|
+
new_config: The new workspace configuration.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
WorkspaceConfigComparison object containing the comparison results.
|
|
145
|
+
"""
|
|
146
|
+
# Get private settings
|
|
147
|
+
private_old = current_config.get('private', False)
|
|
148
|
+
private_new = new_config.get('private', False)
|
|
149
|
+
private_changed = private_old != private_new
|
|
150
|
+
|
|
151
|
+
admin_user_ids = permission.permission_service.get_users_for_role(
|
|
152
|
+
rbac.RoleName.ADMIN.value)
|
|
153
|
+
# Get allowed users (resolve to user IDs for comparison)
|
|
154
|
+
allowed_users_old = workspaces_utils.get_workspace_users(
|
|
155
|
+
current_config) if private_old else []
|
|
156
|
+
allowed_users_old += admin_user_ids
|
|
157
|
+
allowed_users_new = workspaces_utils.get_workspace_users(
|
|
158
|
+
new_config) if private_new else []
|
|
159
|
+
allowed_users_new += admin_user_ids
|
|
160
|
+
|
|
161
|
+
# Convert to sets for easier comparison
|
|
162
|
+
old_users_set = set(allowed_users_old)
|
|
163
|
+
new_users_set = set(allowed_users_new)
|
|
164
|
+
|
|
165
|
+
allowed_users_changed = old_users_set != new_users_set
|
|
166
|
+
removed_users = list(old_users_set - new_users_set)
|
|
167
|
+
added_users = list(new_users_set - old_users_set)
|
|
168
|
+
|
|
169
|
+
# Check if only user access related fields changed
|
|
170
|
+
# Create copies without the user access fields for comparison
|
|
171
|
+
current_without_access = {
|
|
172
|
+
k: v
|
|
173
|
+
for k, v in current_config.items()
|
|
174
|
+
if k not in ['private', 'allowed_users']
|
|
175
|
+
}
|
|
176
|
+
new_without_access = {
|
|
177
|
+
k: v
|
|
178
|
+
for k, v in new_config.items()
|
|
179
|
+
if k not in ['private', 'allowed_users']
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
only_user_access_changes = current_without_access == new_without_access
|
|
183
|
+
|
|
184
|
+
return WorkspaceConfigComparison(
|
|
185
|
+
only_user_access_changes=only_user_access_changes,
|
|
186
|
+
private_changed=private_changed,
|
|
187
|
+
private_old=private_old,
|
|
188
|
+
private_new=private_new,
|
|
189
|
+
allowed_users_changed=allowed_users_changed,
|
|
190
|
+
allowed_users_old=allowed_users_old,
|
|
191
|
+
allowed_users_new=allowed_users_new,
|
|
192
|
+
removed_users=removed_users,
|
|
193
|
+
added_users=added_users)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _validate_workspace_config_changes_with_lock(
|
|
197
|
+
workspace_name: str, current_config: Dict[str, Any],
|
|
198
|
+
new_config: Dict[str, Any]) -> None:
|
|
199
|
+
lock_id = backend_utils.workspace_lock_id(workspace_name)
|
|
200
|
+
lock_timeout = backend_utils.WORKSPACE_LOCK_TIMEOUT_SECONDS
|
|
201
|
+
try:
|
|
202
|
+
with locks.get_lock(lock_id, lock_timeout):
|
|
203
|
+
# Validate the configuration changes based on active resources
|
|
204
|
+
_validate_workspace_config_changes(workspace_name, current_config,
|
|
205
|
+
new_config)
|
|
206
|
+
except locks.LockTimeout as e:
|
|
207
|
+
raise RuntimeError(
|
|
208
|
+
f'Failed to validate workspace {workspace_name!r} due to '
|
|
209
|
+
'a timeout when trying to access database. Please '
|
|
210
|
+
f'try again or manually remove the lock at {lock_id}. '
|
|
211
|
+
f'{common_utils.format_exception(e)}') from None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _validate_workspace_config_changes(workspace_name: str,
|
|
215
|
+
current_config: Dict[str, Any],
|
|
216
|
+
new_config: Dict[str, Any]) -> None:
|
|
217
|
+
"""Validate workspace configuration changes based on active resources.
|
|
218
|
+
|
|
219
|
+
This function implements the logic:
|
|
220
|
+
- If only allowed_users or private changed:
|
|
221
|
+
- If private changed from true to false: allow it
|
|
222
|
+
- If private changed from false to true: check that all active resources
|
|
223
|
+
belong to allowed_users
|
|
224
|
+
- If private didn't change: check that removed users don't have active
|
|
225
|
+
resources
|
|
226
|
+
- Otherwise: check that workspace has no active resources
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
workspace_name: The name of the workspace.
|
|
230
|
+
current_config: The current workspace configuration.
|
|
231
|
+
new_config: The new workspace configuration.
|
|
232
|
+
|
|
233
|
+
Raises:
|
|
234
|
+
ValueError: If the configuration change is not allowed due to active
|
|
235
|
+
resources.
|
|
236
|
+
"""
|
|
237
|
+
config_comparison = _compare_workspace_configs(current_config, new_config)
|
|
238
|
+
|
|
239
|
+
if config_comparison.only_user_access_changes:
|
|
240
|
+
# Only user access settings changed
|
|
241
|
+
if config_comparison.private_changed:
|
|
242
|
+
if (config_comparison.private_old and
|
|
243
|
+
not config_comparison.private_new):
|
|
244
|
+
# Changed from private to public - always allow
|
|
245
|
+
logger.info(
|
|
246
|
+
f'Workspace {workspace_name!r} changed from private to'
|
|
247
|
+
f' public.')
|
|
248
|
+
return
|
|
249
|
+
elif (not config_comparison.private_old and
|
|
250
|
+
config_comparison.private_new):
|
|
251
|
+
# Changed from public to private - check that all active
|
|
252
|
+
# resources belong to the new allowed users
|
|
253
|
+
logger.info(
|
|
254
|
+
f'Workspace {workspace_name!r} changed from public to'
|
|
255
|
+
f' private. Checking that all active resources belong'
|
|
256
|
+
f' to allowed users.')
|
|
257
|
+
|
|
258
|
+
error_summary, missed_users_names, _ = (
|
|
259
|
+
resource_checker.check_users_workspaces_active_resources(
|
|
260
|
+
config_comparison.allowed_users_new, [workspace_name]))
|
|
261
|
+
if error_summary:
|
|
262
|
+
error_msg=f'Cannot change workspace {workspace_name!r}' \
|
|
263
|
+
f' to private '
|
|
264
|
+
if missed_users_names:
|
|
265
|
+
missed_users_list = ', '.join(missed_users_names)
|
|
266
|
+
if len(missed_users_names) == 1:
|
|
267
|
+
error_msg += f'because the user ' \
|
|
268
|
+
f'{missed_users_list!r} has {error_summary}'
|
|
269
|
+
else:
|
|
270
|
+
error_msg += f'because the users ' \
|
|
271
|
+
f'{missed_users_list!r} have {error_summary}'
|
|
272
|
+
error_msg += ' but not in the allowed_users list.' \
|
|
273
|
+
' Please either add the users to allowed_users or' \
|
|
274
|
+
' ask them to terminate their resources.'
|
|
275
|
+
raise ValueError(error_msg)
|
|
276
|
+
else:
|
|
277
|
+
# Private setting didn't change, but allowed_users changed
|
|
278
|
+
if (config_comparison.allowed_users_changed and
|
|
279
|
+
config_comparison.removed_users):
|
|
280
|
+
# Check that removed users don't have active resources
|
|
281
|
+
logger.info(
|
|
282
|
+
f'Checking that removed users'
|
|
283
|
+
f' {config_comparison.removed_users} do not have'
|
|
284
|
+
f' active resources in workspace {workspace_name!r}.')
|
|
285
|
+
error_summary, missed_users_names, missed_user_dict = (
|
|
286
|
+
resource_checker.check_users_workspaces_active_resources(
|
|
287
|
+
config_comparison.allowed_users_new, [workspace_name]))
|
|
288
|
+
if error_summary:
|
|
289
|
+
error_user_ids = []
|
|
290
|
+
for user_id in config_comparison.removed_users:
|
|
291
|
+
if user_id in missed_user_dict:
|
|
292
|
+
error_user_ids.append(user_id)
|
|
293
|
+
error_user_names = []
|
|
294
|
+
if error_user_ids:
|
|
295
|
+
error_user_names = [
|
|
296
|
+
missed_user_dict[user_id]
|
|
297
|
+
for user_id in error_user_ids
|
|
298
|
+
]
|
|
299
|
+
|
|
300
|
+
error_msg = 'Cannot '
|
|
301
|
+
error_users_list = ', '.join(error_user_names)
|
|
302
|
+
if len(error_user_names) == 1:
|
|
303
|
+
error_msg += f'remove user {error_users_list!r} ' \
|
|
304
|
+
f'from workspace {workspace_name!r} because the ' \
|
|
305
|
+
f'user has {error_summary}'
|
|
306
|
+
else:
|
|
307
|
+
error_msg += f'remove users {error_users_list!r}' \
|
|
308
|
+
f' from workspace {workspace_name!r} because the' \
|
|
309
|
+
f' users have {error_summary}'
|
|
310
|
+
error_msg += ', but not in the allowed_users list.' \
|
|
311
|
+
' Please either add the users to allowed_users or' \
|
|
312
|
+
' ask them to terminate their resources.'
|
|
313
|
+
raise ValueError(error_msg)
|
|
314
|
+
else:
|
|
315
|
+
# Other configuration changes - check that workspace has no active
|
|
316
|
+
# resources
|
|
317
|
+
logger.info(
|
|
318
|
+
f'Non-user-access configuration changes detected for'
|
|
319
|
+
f' workspace {workspace_name!r}. Checking that workspace has'
|
|
320
|
+
f' no active resources.')
|
|
321
|
+
resource_checker.check_no_active_resources_for_workspaces([
|
|
322
|
+
(workspace_name, 'update')
|
|
323
|
+
])
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
@usage_lib.entrypoint
|
|
327
|
+
def update_workspace(workspace_name: str, config: Dict[str,
|
|
328
|
+
Any]) -> Dict[str, Any]:
|
|
329
|
+
"""Updates a specific workspace configuration.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
workspace_name: The name of the workspace to update.
|
|
333
|
+
config: The new configuration for the workspace.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
The updated workspaces configuration.
|
|
337
|
+
|
|
338
|
+
Raises:
|
|
339
|
+
ValueError: If the workspace configuration is invalid, or if there are
|
|
340
|
+
active clusters or managed jobs that prevent the configuration
|
|
341
|
+
change.
|
|
342
|
+
The validation logic depends on what changed:
|
|
343
|
+
- If only allowed_users or private changed:
|
|
344
|
+
- Private true->false: Always allowed
|
|
345
|
+
- Private false->true: All active resources must belong to
|
|
346
|
+
allowed_users
|
|
347
|
+
- allowed_users changes: Removed users must not have active
|
|
348
|
+
resources
|
|
349
|
+
- Other changes: Workspace must have no active resources
|
|
350
|
+
FileNotFoundError: If the config file cannot be found.
|
|
351
|
+
PermissionError: If the config file cannot be written.
|
|
352
|
+
"""
|
|
353
|
+
_validate_workspace_config(workspace_name, config)
|
|
354
|
+
|
|
355
|
+
# Get the current workspace configuration for comparison
|
|
356
|
+
current_workspaces = skypilot_config.get_nested(('workspaces',),
|
|
357
|
+
default_value={})
|
|
358
|
+
current_config = current_workspaces.get(workspace_name, {})
|
|
359
|
+
|
|
360
|
+
_validate_workspace_config_changes_with_lock(workspace_name, current_config,
|
|
361
|
+
config)
|
|
362
|
+
|
|
363
|
+
def update_workspace_fn(workspaces: Dict[str, Any]) -> None:
|
|
364
|
+
"""Function to update workspace inside the lock."""
|
|
365
|
+
workspaces[workspace_name] = config
|
|
366
|
+
users = workspaces_utils.get_workspace_users(config)
|
|
367
|
+
permission_service = permission.permission_service
|
|
368
|
+
permission_service.update_workspace_policy(workspace_name, users)
|
|
369
|
+
|
|
370
|
+
# Use the internal helper function to save
|
|
371
|
+
result = _update_workspaces_config(update_workspace_fn)
|
|
372
|
+
|
|
373
|
+
# Validate the workspace by running sky check for it
|
|
374
|
+
try:
|
|
375
|
+
sky_check.check(quiet=True, workspace=workspace_name)
|
|
376
|
+
except Exception as e: # pylint: disable=broad-except
|
|
377
|
+
logger.warning(f'Workspace {workspace_name} configuration saved but '
|
|
378
|
+
f'validation check failed: {e}')
|
|
379
|
+
# Don't fail the update if the check fails, just warn
|
|
380
|
+
|
|
381
|
+
return result
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@usage_lib.entrypoint
|
|
385
|
+
def create_workspace(workspace_name: str, config: Dict[str,
|
|
386
|
+
Any]) -> Dict[str, Any]:
|
|
387
|
+
"""Creates a new workspace configuration.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
workspace_name: The name of the workspace to create.
|
|
391
|
+
config: The configuration for the new workspace.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
The updated workspaces configuration.
|
|
395
|
+
|
|
396
|
+
Raises:
|
|
397
|
+
ValueError: If the workspace already exists or configuration is invalid.
|
|
398
|
+
FileNotFoundError: If the config file cannot be found.
|
|
399
|
+
PermissionError: If the config file cannot be written.
|
|
400
|
+
"""
|
|
401
|
+
# Validate the workspace name
|
|
402
|
+
if not workspace_name or not isinstance(workspace_name, str):
|
|
403
|
+
raise ValueError('Workspace name must be a non-empty string.')
|
|
404
|
+
|
|
405
|
+
_validate_workspace_config(workspace_name, config)
|
|
406
|
+
|
|
407
|
+
def create_workspace_fn(workspaces: Dict[str, Any]) -> None:
|
|
408
|
+
"""Function to create workspace inside the lock."""
|
|
409
|
+
if workspace_name in workspaces:
|
|
410
|
+
raise ValueError(f'Workspace {workspace_name!r} already exists. '
|
|
411
|
+
'Use update instead.')
|
|
412
|
+
workspaces[workspace_name] = config
|
|
413
|
+
# Add policy for the workspace and allowed users
|
|
414
|
+
users = workspaces_utils.get_workspace_users(config)
|
|
415
|
+
permission_service = permission.permission_service
|
|
416
|
+
permission_service.add_workspace_policy(workspace_name, users)
|
|
417
|
+
|
|
418
|
+
# Use the internal helper function to save
|
|
419
|
+
result = _update_workspaces_config(create_workspace_fn)
|
|
420
|
+
|
|
421
|
+
# Validate the workspace by running sky check for it
|
|
422
|
+
try:
|
|
423
|
+
sky_check.check(quiet=True, workspace=workspace_name)
|
|
424
|
+
except Exception as e: # pylint: disable=broad-except
|
|
425
|
+
logger.warning(f'Workspace {workspace_name} configuration saved but '
|
|
426
|
+
f'validation check failed: {e}')
|
|
427
|
+
# Don't fail the update if the check fails, just warn
|
|
428
|
+
|
|
429
|
+
return result
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
@usage_lib.entrypoint
|
|
433
|
+
def delete_workspace(workspace_name: str) -> Dict[str, Any]:
|
|
434
|
+
"""Deletes a workspace configuration.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
workspace_name: The name of the workspace to delete.
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
The updated workspaces configuration.
|
|
441
|
+
|
|
442
|
+
Raises:
|
|
443
|
+
ValueError: If the workspace doesn't exist, is the default workspace,
|
|
444
|
+
or has active clusters or managed jobs.
|
|
445
|
+
FileNotFoundError: If the config file cannot be found.
|
|
446
|
+
PermissionError: If the config file cannot be written.
|
|
447
|
+
"""
|
|
448
|
+
# Prevent deletion of default workspace
|
|
449
|
+
if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
|
|
450
|
+
raise ValueError(f'Cannot delete the default workspace '
|
|
451
|
+
f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
|
|
452
|
+
|
|
453
|
+
# Check if workspace exists
|
|
454
|
+
current_workspaces = get_workspaces()
|
|
455
|
+
if workspace_name not in current_workspaces:
|
|
456
|
+
raise ValueError(f'Workspace {workspace_name!r} does not exist.')
|
|
457
|
+
|
|
458
|
+
# Check for active clusters and managed jobs in the workspace
|
|
459
|
+
resource_checker.check_no_active_resources_for_workspaces([(workspace_name,
|
|
460
|
+
'delete')])
|
|
461
|
+
|
|
462
|
+
def delete_workspace_fn(workspaces: Dict[str, Any]) -> None:
|
|
463
|
+
"""Function to delete workspace inside the lock."""
|
|
464
|
+
if workspace_name not in workspaces:
|
|
465
|
+
raise ValueError(f'Workspace {workspace_name!r} does not exist.')
|
|
466
|
+
del workspaces[workspace_name]
|
|
467
|
+
permission_service = permission.permission_service
|
|
468
|
+
permission_service.remove_workspace_policy(workspace_name)
|
|
469
|
+
|
|
470
|
+
# Use the internal helper function to save
|
|
471
|
+
return _update_workspaces_config(delete_workspace_fn)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
# =========================
|
|
475
|
+
# = Config Management =
|
|
476
|
+
# =========================
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
@usage_lib.entrypoint
|
|
480
|
+
def get_config() -> Dict[str, Any]:
|
|
481
|
+
"""Returns the entire SkyPilot configuration.
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
The complete SkyPilot configuration as a dictionary.
|
|
485
|
+
"""
|
|
486
|
+
return skypilot_config.to_dict()
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@usage_lib.entrypoint
|
|
490
|
+
def update_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
491
|
+
"""Updates the entire SkyPilot configuration.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
config: The new configuration to save.
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
The updated configuration.
|
|
498
|
+
|
|
499
|
+
Raises:
|
|
500
|
+
ValueError: If the configuration is invalid, or if there are
|
|
501
|
+
active clusters or managed jobs in workspaces being modified.
|
|
502
|
+
FileNotFoundError: If the config file cannot be found.
|
|
503
|
+
PermissionError: If the config file cannot be written.
|
|
504
|
+
"""
|
|
505
|
+
# Validate the configuration using the schema
|
|
506
|
+
try:
|
|
507
|
+
common_utils.validate_schema(config, schemas.get_config_schema(),
|
|
508
|
+
'Invalid SkyPilot configuration: ')
|
|
509
|
+
except exceptions.InvalidSkyPilotConfigError as e:
|
|
510
|
+
raise ValueError(str(e)) from e
|
|
511
|
+
|
|
512
|
+
# Check for API server changes and validate them
|
|
513
|
+
current_config = skypilot_config.to_dict()
|
|
514
|
+
# If there is no changes to the config, we can return early
|
|
515
|
+
if current_config == config:
|
|
516
|
+
return config
|
|
517
|
+
|
|
518
|
+
current_endpoint = current_config.get('api_server', {}).get('endpoint')
|
|
519
|
+
new_endpoint = config.get('api_server', {}).get('endpoint')
|
|
520
|
+
if current_endpoint != new_endpoint:
|
|
521
|
+
raise ValueError('API server endpoint should not be changed to avoid '
|
|
522
|
+
'unexpected behavior.')
|
|
523
|
+
|
|
524
|
+
# Check for workspace changes and validate them
|
|
525
|
+
current_workspaces = current_config.get('workspaces', {})
|
|
526
|
+
new_workspaces = config.get('workspaces', {})
|
|
527
|
+
|
|
528
|
+
# Collect all workspaces that need to be checked for active resources
|
|
529
|
+
workspaces_to_check: List[Tuple[str, str]] = []
|
|
530
|
+
workspaces_to_check_policy: Dict[str, Dict[str, List[str]]] = {
|
|
531
|
+
'add': {},
|
|
532
|
+
'update': {},
|
|
533
|
+
'delete': {}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
# Check each workspace that is being modified
|
|
537
|
+
for workspace_name, new_workspace_config in new_workspaces.items():
|
|
538
|
+
if workspace_name not in current_workspaces:
|
|
539
|
+
users = workspaces_utils.get_workspace_users(new_workspace_config)
|
|
540
|
+
workspaces_to_check_policy['add'][workspace_name] = users
|
|
541
|
+
continue
|
|
542
|
+
|
|
543
|
+
current_workspace_config = current_workspaces.get(workspace_name, {})
|
|
544
|
+
|
|
545
|
+
# If workspace configuration is changing, validate and mark for checking
|
|
546
|
+
if current_workspace_config != new_workspace_config:
|
|
547
|
+
_validate_workspace_config(workspace_name, new_workspace_config)
|
|
548
|
+
_validate_workspace_config_changes_with_lock(
|
|
549
|
+
workspace_name, current_workspace_config, new_workspace_config)
|
|
550
|
+
users = workspaces_utils.get_workspace_users(new_workspace_config)
|
|
551
|
+
workspaces_to_check_policy['update'][workspace_name] = users
|
|
552
|
+
|
|
553
|
+
# Check for workspace deletions
|
|
554
|
+
for workspace_name in current_workspaces:
|
|
555
|
+
if workspace_name not in new_workspaces:
|
|
556
|
+
# Workspace is being deleted
|
|
557
|
+
if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
|
|
558
|
+
raise ValueError(f'Cannot delete the default workspace '
|
|
559
|
+
f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
|
|
560
|
+
workspaces_to_check.append((workspace_name, 'delete'))
|
|
561
|
+
workspaces_to_check_policy['delete'][workspace_name] = ['*']
|
|
562
|
+
|
|
563
|
+
# Check all workspaces for active resources in one efficient call
|
|
564
|
+
resource_checker.check_no_active_resources_for_workspaces(
|
|
565
|
+
workspaces_to_check)
|
|
566
|
+
|
|
567
|
+
# Use file locking to prevent race conditions
|
|
568
|
+
lock_path = skypilot_config.get_skypilot_config_lock_path()
|
|
569
|
+
try:
|
|
570
|
+
with filelock.FileLock(lock_path,
|
|
571
|
+
_WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
|
|
572
|
+
# Convert to config_utils.Config and save
|
|
573
|
+
config_obj = config_utils.Config.from_dict(config)
|
|
574
|
+
skypilot_config.update_api_server_config_no_lock(config_obj)
|
|
575
|
+
permission_service = permission.permission_service
|
|
576
|
+
for operation, workspaces in workspaces_to_check_policy.items():
|
|
577
|
+
for workspace_name, users in workspaces.items():
|
|
578
|
+
if operation == 'add':
|
|
579
|
+
permission_service.add_workspace_policy(
|
|
580
|
+
workspace_name, users)
|
|
581
|
+
elif operation == 'update':
|
|
582
|
+
permission_service.update_workspace_policy(
|
|
583
|
+
workspace_name, users)
|
|
584
|
+
elif operation == 'delete':
|
|
585
|
+
permission_service.remove_workspace_policy(
|
|
586
|
+
workspace_name)
|
|
587
|
+
except filelock.Timeout as e:
|
|
588
|
+
raise RuntimeError(
|
|
589
|
+
f'Failed to update configuration due to a timeout '
|
|
590
|
+
f'when trying to acquire the lock at {lock_path}. This may '
|
|
591
|
+
'indicate another SkyPilot process is currently updating the '
|
|
592
|
+
'configuration. Please try again or manually remove the lock '
|
|
593
|
+
f'file if you believe it is stale.') from e
|
|
594
|
+
|
|
595
|
+
# Validate the configuration by running sky check
|
|
596
|
+
try:
|
|
597
|
+
sky_check.check(quiet=True)
|
|
598
|
+
except Exception as e: # pylint: disable=broad-except
|
|
599
|
+
logger.warning(f'Configuration saved but '
|
|
600
|
+
f'validation check failed: {e}')
|
|
601
|
+
# Don't fail the update if the check fails, just warn
|
|
602
|
+
|
|
603
|
+
return config
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def reject_request_for_unauthorized_workspace(user: models.User) -> None:
|
|
607
|
+
"""Rejects a request that has no permission to access active workspace.
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
user: The user making the request.
|
|
611
|
+
|
|
612
|
+
Raises:
|
|
613
|
+
PermissionDeniedError: If the user does not have permission to access
|
|
614
|
+
the active workspace.
|
|
615
|
+
"""
|
|
616
|
+
active_workspace = skypilot_config.get_active_workspace()
|
|
617
|
+
if not permission.permission_service.check_workspace_permission(
|
|
618
|
+
user.id, active_workspace):
|
|
619
|
+
raise exceptions.PermissionDeniedError(
|
|
620
|
+
f'User {user.name} ({user.id}) does not have '
|
|
621
|
+
f'permission to access workspace {active_workspace!r}')
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def is_workspace_private(workspace_config: Dict[str, Any]) -> bool:
|
|
625
|
+
"""Check if a workspace is private.
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
workspace_config: The workspace configuration dictionary.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
True if the workspace is private, False if it's public.
|
|
632
|
+
"""
|
|
633
|
+
return workspace_config.get('private', False)
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
@annotations.lru_cache(scope='request', maxsize=1)
|
|
637
|
+
def workspaces_for_user(user_id: str) -> Dict[str, Any]:
|
|
638
|
+
"""Returns the workspaces that the user has access to.
|
|
639
|
+
|
|
640
|
+
Args:
|
|
641
|
+
user_id: The user id to check.
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
A map from workspace name to workspace configuration.
|
|
645
|
+
"""
|
|
646
|
+
workspaces = skypilot_config.get_nested(('workspaces',), default_value={})
|
|
647
|
+
if constants.SKYPILOT_DEFAULT_WORKSPACE not in workspaces:
|
|
648
|
+
workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
|
|
649
|
+
user_workspaces = {}
|
|
650
|
+
|
|
651
|
+
for workspace_name, workspace_config in workspaces.items():
|
|
652
|
+
if permission.permission_service.check_workspace_permission(
|
|
653
|
+
user_id, workspace_name):
|
|
654
|
+
user_workspaces[workspace_name] = workspace_config
|
|
655
|
+
return user_workspaces
|