skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +10 -2
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +588 -184
- sky/backends/cloud_vm_ray_backend.py +1088 -904
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -0
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +73 -43
- sky/client/cli/command.py +675 -412
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +132 -63
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +6 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +40 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +33 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +174 -165
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +162 -29
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +37 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +97 -26
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +382 -418
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +30 -9
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +5 -3
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +381 -145
- sky/server/requests/payloads.py +71 -18
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +507 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +85 -20
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +420 -172
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +62 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +111 -26
- sky/skylet/events.py +64 -10
- sky/skylet/job_lib.py +141 -104
- sky/skylet/log_lib.py +233 -5
- sky/skylet/log_lib.pyi +40 -2
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +22 -1
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +196 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/permission.py +60 -43
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +22 -0
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +88 -27
- sky/utils/command_runner.pyi +36 -3
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +37 -4
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +107 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/users/permission.py
CHANGED
|
@@ -3,7 +3,7 @@ import contextlib
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
-
from typing import Generator, List
|
|
6
|
+
from typing import Generator, List, Optional
|
|
7
7
|
|
|
8
8
|
import casbin
|
|
9
9
|
import filelock
|
|
@@ -14,6 +14,7 @@ from sky import models
|
|
|
14
14
|
from sky import sky_logging
|
|
15
15
|
from sky.skylet import constants
|
|
16
16
|
from sky.users import rbac
|
|
17
|
+
from sky.utils import annotations
|
|
17
18
|
from sky.utils import common_utils
|
|
18
19
|
from sky.utils.db import db_utils
|
|
19
20
|
|
|
@@ -27,14 +28,14 @@ logger = sky_logging.init_logger(__name__)
|
|
|
27
28
|
POLICY_UPDATE_LOCK_PATH = os.path.expanduser('~/.sky/.policy_update.lock')
|
|
28
29
|
POLICY_UPDATE_LOCK_TIMEOUT_SECONDS = 20
|
|
29
30
|
|
|
30
|
-
_enforcer_instance = None
|
|
31
|
+
_enforcer_instance: Optional['PermissionService'] = None
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class PermissionService:
|
|
34
35
|
"""Permission service for SkyPilot API Server."""
|
|
35
36
|
|
|
36
37
|
def __init__(self):
|
|
37
|
-
self.enforcer = None
|
|
38
|
+
self.enforcer: Optional[casbin.Enforcer] = None
|
|
38
39
|
|
|
39
40
|
def _lazy_initialize(self):
|
|
40
41
|
if self.enforcer is not None:
|
|
@@ -42,7 +43,6 @@ class PermissionService:
|
|
|
42
43
|
with _policy_lock():
|
|
43
44
|
global _enforcer_instance
|
|
44
45
|
if _enforcer_instance is None:
|
|
45
|
-
_enforcer_instance = self
|
|
46
46
|
engine = global_user_state.initialize_and_get_db()
|
|
47
47
|
db_utils.add_all_tables_to_db_sqlalchemy(
|
|
48
48
|
sqlalchemy_adapter.Base.metadata, engine)
|
|
@@ -52,11 +52,23 @@ class PermissionService:
|
|
|
52
52
|
'model.conf')
|
|
53
53
|
enforcer = casbin.Enforcer(model_path, adapter)
|
|
54
54
|
self.enforcer = enforcer
|
|
55
|
+
# Only set the enforcer instance once the enforcer
|
|
56
|
+
# is successfully initialized, if we change it and then fail
|
|
57
|
+
# we will set it to None and all subsequent calls will fail.
|
|
58
|
+
_enforcer_instance = self
|
|
55
59
|
self._maybe_initialize_policies()
|
|
56
60
|
self._maybe_initialize_basic_auth_user()
|
|
57
61
|
else:
|
|
62
|
+
assert _enforcer_instance is not None
|
|
58
63
|
self.enforcer = _enforcer_instance.enforcer
|
|
59
64
|
|
|
65
|
+
def _ensure_enforcer(self) -> casbin.Enforcer:
|
|
66
|
+
"""Ensure enforcer is initialized and return it."""
|
|
67
|
+
self._lazy_initialize()
|
|
68
|
+
assert self.enforcer is not None, (
|
|
69
|
+
'Enforcer should be initialized after _lazy_initialize()')
|
|
70
|
+
return self.enforcer
|
|
71
|
+
|
|
60
72
|
def _maybe_initialize_basic_auth_user(self) -> None:
|
|
61
73
|
"""Initialize basic auth user if it is enabled."""
|
|
62
74
|
basic_auth = os.environ.get(constants.SKYPILOT_INITIAL_BASIC_AUTH)
|
|
@@ -72,9 +84,9 @@ class PermissionService:
|
|
|
72
84
|
return
|
|
73
85
|
global_user_state.add_or_update_user(
|
|
74
86
|
models.User(id=user_hash, name=username, password=password))
|
|
75
|
-
self.
|
|
76
|
-
|
|
77
|
-
|
|
87
|
+
enforcer = self._ensure_enforcer()
|
|
88
|
+
enforcer.add_grouping_policy(user_hash, rbac.RoleName.ADMIN.value)
|
|
89
|
+
enforcer.save_policy()
|
|
78
90
|
logger.info(f'Basic auth user {username} initialized')
|
|
79
91
|
|
|
80
92
|
def _maybe_initialize_policies(self) -> None:
|
|
@@ -86,7 +98,8 @@ class PermissionService:
|
|
|
86
98
|
|
|
87
99
|
# Check if policies are already initialized by looking for existing
|
|
88
100
|
# permission policies in the enforcer
|
|
89
|
-
|
|
101
|
+
enforcer = self._ensure_enforcer()
|
|
102
|
+
existing_policies = enforcer.get_policy()
|
|
90
103
|
|
|
91
104
|
# If we already have policies for the expected roles, skip
|
|
92
105
|
# initialization
|
|
@@ -123,7 +136,7 @@ class PermissionService:
|
|
|
123
136
|
logger.debug('Policies not found or incomplete, initializing...')
|
|
124
137
|
# Only clear p policies (permission policies),
|
|
125
138
|
# keep g policies (role policies)
|
|
126
|
-
|
|
139
|
+
enforcer.remove_filtered_policy(0)
|
|
127
140
|
for role, permissions in role_permissions.items():
|
|
128
141
|
if permissions['permissions'] and 'blocklist' in permissions[
|
|
129
142
|
'permissions']:
|
|
@@ -133,14 +146,14 @@ class PermissionService:
|
|
|
133
146
|
method = item['method']
|
|
134
147
|
logger.debug(f'Adding role policy: role={role}, '
|
|
135
148
|
f'path={path}, method={method}')
|
|
136
|
-
|
|
149
|
+
enforcer.add_policy(role, path, method)
|
|
137
150
|
policy_updated = True
|
|
138
151
|
|
|
139
152
|
for workspace_name, users in workspace_policy_permissions.items():
|
|
140
153
|
for user in users:
|
|
141
154
|
logger.debug(f'Initializing workspace policy: user={user}, '
|
|
142
155
|
f'workspace={workspace_name}')
|
|
143
|
-
|
|
156
|
+
enforcer.add_policy(user, workspace_name, '*')
|
|
144
157
|
policy_updated = True
|
|
145
158
|
logger.debug('Policies initialized successfully')
|
|
146
159
|
else:
|
|
@@ -153,7 +166,7 @@ class PermissionService:
|
|
|
153
166
|
policy_updated = policy_updated or user_added
|
|
154
167
|
|
|
155
168
|
if policy_updated:
|
|
156
|
-
|
|
169
|
+
enforcer.save_policy()
|
|
157
170
|
|
|
158
171
|
def add_user_if_not_exists(self, user_id: str) -> None:
|
|
159
172
|
"""Add user role relationship."""
|
|
@@ -167,34 +180,35 @@ class PermissionService:
|
|
|
167
180
|
Returns:
|
|
168
181
|
True if the user was added, False otherwise.
|
|
169
182
|
"""
|
|
170
|
-
|
|
183
|
+
enforcer = self._ensure_enforcer()
|
|
184
|
+
user_roles = enforcer.get_roles_for_user(user_id)
|
|
171
185
|
if not user_roles:
|
|
172
|
-
|
|
186
|
+
enforcer.add_grouping_policy(user_id, rbac.get_default_role())
|
|
173
187
|
return True
|
|
174
188
|
return False
|
|
175
189
|
|
|
176
190
|
def delete_user(self, user_id: str) -> None:
|
|
177
191
|
"""Delete user role relationship."""
|
|
178
|
-
self._lazy_initialize()
|
|
179
192
|
with _policy_lock():
|
|
180
193
|
# Get current roles
|
|
181
194
|
self._load_policy_no_lock()
|
|
182
195
|
# Avoid calling get_user_roles, as it will require the lock.
|
|
183
|
-
|
|
196
|
+
enforcer = self._ensure_enforcer()
|
|
197
|
+
current_roles = enforcer.get_roles_for_user(user_id)
|
|
184
198
|
if not current_roles:
|
|
185
199
|
logger.debug(f'User {user_id} has no roles')
|
|
186
200
|
return
|
|
187
|
-
|
|
188
|
-
|
|
201
|
+
enforcer.remove_grouping_policy(user_id, current_roles[0])
|
|
202
|
+
enforcer.save_policy()
|
|
189
203
|
|
|
190
204
|
def update_role(self, user_id: str, new_role: str) -> None:
|
|
191
205
|
"""Update user role relationship."""
|
|
192
|
-
self._lazy_initialize()
|
|
193
206
|
with _policy_lock():
|
|
194
207
|
# Get current roles
|
|
195
208
|
self._load_policy_no_lock()
|
|
196
209
|
# Avoid calling get_user_roles, as it will require the lock.
|
|
197
|
-
|
|
210
|
+
enforcer = self._ensure_enforcer()
|
|
211
|
+
current_roles = enforcer.get_roles_for_user(user_id)
|
|
198
212
|
if not current_roles:
|
|
199
213
|
logger.debug(f'User {user_id} has no roles')
|
|
200
214
|
else:
|
|
@@ -203,11 +217,11 @@ class PermissionService:
|
|
|
203
217
|
if current_role == new_role:
|
|
204
218
|
logger.debug(f'User {user_id} already has role {new_role}')
|
|
205
219
|
return
|
|
206
|
-
|
|
220
|
+
enforcer.remove_grouping_policy(user_id, current_role)
|
|
207
221
|
|
|
208
222
|
# Update user role
|
|
209
|
-
|
|
210
|
-
|
|
223
|
+
enforcer.add_grouping_policy(user_id, new_role)
|
|
224
|
+
enforcer.save_policy()
|
|
211
225
|
|
|
212
226
|
def get_user_roles(self, user_id: str) -> List[str]:
|
|
213
227
|
"""Get all roles for a user.
|
|
@@ -222,15 +236,15 @@ class PermissionService:
|
|
|
222
236
|
Returns:
|
|
223
237
|
A list of role names that the user has.
|
|
224
238
|
"""
|
|
225
|
-
self._lazy_initialize()
|
|
226
239
|
self._load_policy_no_lock()
|
|
227
|
-
|
|
240
|
+
enforcer = self._ensure_enforcer()
|
|
241
|
+
return enforcer.get_roles_for_user(user_id)
|
|
228
242
|
|
|
229
243
|
def get_users_for_role(self, role: str) -> List[str]:
|
|
230
244
|
"""Get all users for a role."""
|
|
231
|
-
self._lazy_initialize()
|
|
232
245
|
self._load_policy_no_lock()
|
|
233
|
-
|
|
246
|
+
enforcer = self._ensure_enforcer()
|
|
247
|
+
return enforcer.get_users_for_role(role)
|
|
234
248
|
|
|
235
249
|
def check_endpoint_permission(self, user_id: str, path: str,
|
|
236
250
|
method: str) -> bool:
|
|
@@ -241,19 +255,22 @@ class PermissionService:
|
|
|
241
255
|
# it is a hot path in every request. It is ok to have a stale policy,
|
|
242
256
|
# as long as it is eventually consistent.
|
|
243
257
|
# self._load_policy_no_lock()
|
|
244
|
-
self.
|
|
245
|
-
return
|
|
258
|
+
enforcer = self._ensure_enforcer()
|
|
259
|
+
return enforcer.enforce(user_id, path, method)
|
|
246
260
|
|
|
247
261
|
def _load_policy_no_lock(self):
|
|
248
262
|
"""Load policy from storage."""
|
|
249
|
-
self.
|
|
263
|
+
enforcer = self._ensure_enforcer()
|
|
264
|
+
enforcer.load_policy()
|
|
250
265
|
|
|
251
266
|
def load_policy(self):
|
|
252
267
|
"""Load policy from storage with lock."""
|
|
253
|
-
self._lazy_initialize()
|
|
254
268
|
with _policy_lock():
|
|
255
269
|
self._load_policy_no_lock()
|
|
256
270
|
|
|
271
|
+
# Right now, not a lot of users are using multiple workspaces,
|
|
272
|
+
# so 5 should be more than enough.
|
|
273
|
+
@annotations.lru_cache(scope='request', maxsize=5)
|
|
257
274
|
def check_workspace_permission(self, user_id: str,
|
|
258
275
|
workspace_name: str) -> bool:
|
|
259
276
|
"""Check workspace permission.
|
|
@@ -266,7 +283,6 @@ class PermissionService:
|
|
|
266
283
|
For public workspaces, the permission is granted via a wildcard policy
|
|
267
284
|
('*').
|
|
268
285
|
"""
|
|
269
|
-
self._lazy_initialize()
|
|
270
286
|
if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
|
|
271
287
|
# When it is not on API server, we allow all users to access all
|
|
272
288
|
# workspaces, as the workspace check has been done on API server.
|
|
@@ -279,7 +295,8 @@ class PermissionService:
|
|
|
279
295
|
# r.act == p.act
|
|
280
296
|
# This means if there's a policy ('*', workspace_name, '*'), it will
|
|
281
297
|
# match any user
|
|
282
|
-
|
|
298
|
+
enforcer = self._ensure_enforcer()
|
|
299
|
+
result = enforcer.enforce(user_id, workspace_name, '*')
|
|
283
300
|
logger.debug(f'Workspace permission check: user={user_id}, '
|
|
284
301
|
f'workspace={workspace_name}, result={result}')
|
|
285
302
|
return result
|
|
@@ -323,13 +340,13 @@ class PermissionService:
|
|
|
323
340
|
For public workspaces, this should be ['*'].
|
|
324
341
|
For private workspaces, this should be specific user IDs.
|
|
325
342
|
"""
|
|
326
|
-
self._lazy_initialize()
|
|
327
343
|
with _policy_lock():
|
|
344
|
+
enforcer = self._ensure_enforcer()
|
|
328
345
|
for user in users:
|
|
329
346
|
logger.debug(f'Adding workspace policy: user={user}, '
|
|
330
347
|
f'workspace={workspace_name}')
|
|
331
|
-
|
|
332
|
-
|
|
348
|
+
enforcer.add_policy(user, workspace_name, '*')
|
|
349
|
+
enforcer.save_policy()
|
|
333
350
|
|
|
334
351
|
def update_workspace_policy(self, workspace_name: str,
|
|
335
352
|
users: List[str]) -> None:
|
|
@@ -341,24 +358,24 @@ class PermissionService:
|
|
|
341
358
|
For public workspaces, this should be ['*'].
|
|
342
359
|
For private workspaces, this should be specific user IDs.
|
|
343
360
|
"""
|
|
344
|
-
self._lazy_initialize()
|
|
345
361
|
with _policy_lock():
|
|
346
362
|
self._load_policy_no_lock()
|
|
363
|
+
enforcer = self._ensure_enforcer()
|
|
347
364
|
# Remove all existing policies for this workspace
|
|
348
|
-
|
|
365
|
+
enforcer.remove_filtered_policy(1, workspace_name)
|
|
349
366
|
# Add new policies
|
|
350
367
|
for user in users:
|
|
351
368
|
logger.debug(f'Updating workspace policy: user={user}, '
|
|
352
369
|
f'workspace={workspace_name}')
|
|
353
|
-
|
|
354
|
-
|
|
370
|
+
enforcer.add_policy(user, workspace_name, '*')
|
|
371
|
+
enforcer.save_policy()
|
|
355
372
|
|
|
356
373
|
def remove_workspace_policy(self, workspace_name: str) -> None:
|
|
357
374
|
"""Remove workspace policy."""
|
|
358
|
-
self._lazy_initialize()
|
|
359
375
|
with _policy_lock():
|
|
360
|
-
self.
|
|
361
|
-
|
|
376
|
+
enforcer = self._ensure_enforcer()
|
|
377
|
+
enforcer.remove_filtered_policy(1, workspace_name)
|
|
378
|
+
enforcer.save_policy()
|
|
362
379
|
|
|
363
380
|
|
|
364
381
|
@contextlib.contextmanager
|
|
@@ -3,6 +3,7 @@ import typing
|
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from sky import catalog
|
|
6
|
+
from sky.catalog import common as catalog_common
|
|
6
7
|
from sky.utils import rich_utils
|
|
7
8
|
from sky.utils import ux_utils
|
|
8
9
|
|
|
@@ -34,8 +35,8 @@ if typing.TYPE_CHECKING:
|
|
|
34
35
|
|
|
35
36
|
# Use a cached version of accelerators to cloud mapping, so that we don't have
|
|
36
37
|
# to download and read the catalog file for every cloud locally.
|
|
37
|
-
_accelerator_df =
|
|
38
|
-
_memory_df =
|
|
38
|
+
_accelerator_df = catalog_common.read_catalog('common/accelerators.csv')
|
|
39
|
+
_memory_df = catalog_common.read_catalog('common/metadata.csv')
|
|
39
40
|
|
|
40
41
|
# List of non-GPU accelerators that are supported by our backend for job queue
|
|
41
42
|
# scheduling.
|
|
@@ -107,10 +108,12 @@ def canonicalize_accelerator_name(accelerator: str,
|
|
|
107
108
|
if not names and cloud_str in ['Kubernetes', None]:
|
|
108
109
|
with rich_utils.safe_status(
|
|
109
110
|
ux_utils.spinner_message('Listing accelerators on Kubernetes')):
|
|
111
|
+
# Only search for Kubernetes to reduce the lookup cost.
|
|
112
|
+
# For other clouds, the catalog has been searched in previous steps.
|
|
110
113
|
searched = catalog.list_accelerators(
|
|
111
114
|
name_filter=accelerator,
|
|
112
115
|
case_sensitive=False,
|
|
113
|
-
clouds=
|
|
116
|
+
clouds='Kubernetes',
|
|
114
117
|
)
|
|
115
118
|
names = list(searched.keys())
|
|
116
119
|
if accelerator in names:
|
sky/utils/admin_policy_utils.py
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
import contextlib
|
|
3
3
|
import copy
|
|
4
4
|
import importlib
|
|
5
|
+
import typing
|
|
5
6
|
from typing import Iterator, Optional, Tuple, Union
|
|
6
|
-
import
|
|
7
|
+
from urllib import parse as urlparse
|
|
7
8
|
|
|
8
9
|
import colorama
|
|
9
10
|
|
|
@@ -13,17 +14,21 @@ from sky import exceptions
|
|
|
13
14
|
from sky import sky_logging
|
|
14
15
|
from sky import skypilot_config
|
|
15
16
|
from sky import task as task_lib
|
|
17
|
+
from sky.server.requests import request_names
|
|
16
18
|
from sky.utils import common_utils
|
|
17
19
|
from sky.utils import config_utils
|
|
18
20
|
from sky.utils import ux_utils
|
|
19
21
|
|
|
20
22
|
logger = sky_logging.init_logger(__name__)
|
|
21
23
|
|
|
24
|
+
if typing.TYPE_CHECKING:
|
|
25
|
+
from sky import models
|
|
26
|
+
|
|
22
27
|
|
|
23
28
|
def _is_url(policy_string: str) -> bool:
|
|
24
29
|
"""Check if the policy string is a URL."""
|
|
25
30
|
try:
|
|
26
|
-
parsed =
|
|
31
|
+
parsed = urlparse.urlparse(policy_string)
|
|
27
32
|
return parsed.scheme in ('http', 'https')
|
|
28
33
|
except Exception: # pylint: disable=broad-except
|
|
29
34
|
return False
|
|
@@ -73,6 +78,7 @@ def _get_policy_impl(
|
|
|
73
78
|
@contextlib.contextmanager
|
|
74
79
|
def apply_and_use_config_in_current_request(
|
|
75
80
|
entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
|
|
81
|
+
request_name: request_names.AdminPolicyRequestName,
|
|
76
82
|
request_options: Optional[admin_policy.RequestOptions] = None,
|
|
77
83
|
at_client_side: bool = False,
|
|
78
84
|
) -> Iterator['dag_lib.Dag']:
|
|
@@ -86,7 +92,8 @@ def apply_and_use_config_in_current_request(
|
|
|
86
92
|
Refer to `apply()` for more details.
|
|
87
93
|
"""
|
|
88
94
|
original_config = skypilot_config.to_dict()
|
|
89
|
-
dag, mutated_config = apply(entrypoint, request_options,
|
|
95
|
+
dag, mutated_config = apply(entrypoint, request_name, request_options,
|
|
96
|
+
at_client_side)
|
|
90
97
|
if mutated_config != original_config:
|
|
91
98
|
with skypilot_config.replace_skypilot_config(mutated_config):
|
|
92
99
|
yield dag
|
|
@@ -96,6 +103,7 @@ def apply_and_use_config_in_current_request(
|
|
|
96
103
|
|
|
97
104
|
def apply(
|
|
98
105
|
entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
|
|
106
|
+
request_name: request_names.AdminPolicyRequestName,
|
|
99
107
|
request_options: Optional[admin_policy.RequestOptions] = None,
|
|
100
108
|
at_client_side: bool = False,
|
|
101
109
|
) -> Tuple['dag_lib.Dag', config_utils.Config]:
|
|
@@ -126,9 +134,13 @@ def apply(
|
|
|
126
134
|
if policy is None:
|
|
127
135
|
return dag, skypilot_config.to_dict()
|
|
128
136
|
|
|
137
|
+
user = None
|
|
129
138
|
if at_client_side:
|
|
130
139
|
logger.info(f'Applying client admin policy: {policy}')
|
|
131
140
|
else:
|
|
141
|
+
# When being called by the server, the middleware has set the
|
|
142
|
+
# current user and this information is available at this point.
|
|
143
|
+
user = common_utils.get_current_user()
|
|
132
144
|
logger.info(f'Applying server admin policy: {policy}')
|
|
133
145
|
config = copy.deepcopy(skypilot_config.to_dict())
|
|
134
146
|
mutated_dag = dag_lib.Dag()
|
|
@@ -136,8 +148,9 @@ def apply(
|
|
|
136
148
|
|
|
137
149
|
mutated_config = None
|
|
138
150
|
for task in dag.tasks:
|
|
139
|
-
user_request = admin_policy.UserRequest(task, config,
|
|
140
|
-
at_client_side
|
|
151
|
+
user_request = admin_policy.UserRequest(task, config, request_name,
|
|
152
|
+
request_options, at_client_side,
|
|
153
|
+
user)
|
|
141
154
|
try:
|
|
142
155
|
mutated_user_request = policy.apply(user_request)
|
|
143
156
|
# Avoid duplicate exception wrapping.
|
sky/utils/annotations.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import functools
|
|
4
4
|
from typing import Callable, Literal, TypeVar
|
|
5
5
|
|
|
6
|
+
import cachetools
|
|
6
7
|
from typing_extensions import ParamSpec
|
|
7
8
|
|
|
8
9
|
# Whether the current process is a SkyPilot API server process.
|
|
@@ -56,6 +57,27 @@ def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
|
|
|
56
57
|
return decorator
|
|
57
58
|
|
|
58
59
|
|
|
60
|
+
def ttl_cache(scope: Literal['global', 'request'], *ttl_cache_args,
|
|
61
|
+
**ttl_cache_kwargs) -> Callable:
|
|
62
|
+
"""TTLCache decorator for functions.
|
|
63
|
+
|
|
64
|
+
This decorator allows us to track which functions need to be reloaded for a
|
|
65
|
+
new request using the scope argument.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def decorator(func: Callable[P, T]) -> Callable[P, T]:
|
|
69
|
+
if scope == 'global':
|
|
70
|
+
return cachetools.cached(
|
|
71
|
+
cachetools.TTLCache(*ttl_cache_args, **ttl_cache_kwargs))(func)
|
|
72
|
+
else:
|
|
73
|
+
cached_func = cachetools.cached(
|
|
74
|
+
cachetools.TTLCache(*ttl_cache_args, **ttl_cache_kwargs))(func)
|
|
75
|
+
_FUNCTIONS_NEED_RELOAD_CACHE.append(cached_func)
|
|
76
|
+
return cached_func
|
|
77
|
+
|
|
78
|
+
return decorator
|
|
79
|
+
|
|
80
|
+
|
|
59
81
|
def clear_request_level_cache():
|
|
60
82
|
"""Clear the request-level cache."""
|
|
61
83
|
for func in _FUNCTIONS_NEED_RELOAD_CACHE:
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Asyncio utilities."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import functools
|
|
5
|
+
from typing import Set
|
|
6
|
+
|
|
7
|
+
_background_tasks: Set[asyncio.Task] = set()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def shield(func):
|
|
11
|
+
"""Shield the decorated async function from cancellation.
|
|
12
|
+
|
|
13
|
+
If the outer coroutine is cancelled, the inner decorated function
|
|
14
|
+
will be protected from cancellation by asyncio.shield(). And we will
|
|
15
|
+
maintain a reference to the the inner task to avoid it get GCed before
|
|
16
|
+
it is done.
|
|
17
|
+
|
|
18
|
+
For example, filelock.AsyncFileLock is not cancellation safe. The
|
|
19
|
+
following code:
|
|
20
|
+
|
|
21
|
+
async def fn_with_lock():
|
|
22
|
+
async with filelock.AsyncFileLock('lock'):
|
|
23
|
+
await asyncio.sleep(1)
|
|
24
|
+
|
|
25
|
+
is equivalent to:
|
|
26
|
+
|
|
27
|
+
# The lock may leak if the cancellation happens in
|
|
28
|
+
# lock.acquire() or lock.release()
|
|
29
|
+
async def fn_with_lock():
|
|
30
|
+
lock = filelock.AsyncFileLock('lock')
|
|
31
|
+
await lock.acquire()
|
|
32
|
+
try:
|
|
33
|
+
await asyncio.sleep(1)
|
|
34
|
+
finally:
|
|
35
|
+
await lock.release()
|
|
36
|
+
|
|
37
|
+
Shilding the function ensures there is no cancellation will happen in the
|
|
38
|
+
function, thus the lock will be released properly:
|
|
39
|
+
|
|
40
|
+
@shield
|
|
41
|
+
async def fn_with_lock()
|
|
42
|
+
|
|
43
|
+
Note that the resource acquisition and release should usually be protected
|
|
44
|
+
in one @shield block but not separately, e.g.:
|
|
45
|
+
|
|
46
|
+
lock = filelock.AsyncFileLock('lock')
|
|
47
|
+
|
|
48
|
+
@shield
|
|
49
|
+
async def acquire():
|
|
50
|
+
await lock.acquire()
|
|
51
|
+
|
|
52
|
+
@shield
|
|
53
|
+
async def release():
|
|
54
|
+
await lock.release()
|
|
55
|
+
|
|
56
|
+
async def fn_with_lock():
|
|
57
|
+
await acquire()
|
|
58
|
+
try:
|
|
59
|
+
do_something()
|
|
60
|
+
finally:
|
|
61
|
+
await release()
|
|
62
|
+
|
|
63
|
+
The above code is not safe because if `fn_with_lock` is cancelled,
|
|
64
|
+
`acquire()` and `release()` will be executed in the background
|
|
65
|
+
concurrently and causes race conditions.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
@functools.wraps(func)
|
|
69
|
+
async def async_wrapper(*args, **kwargs):
|
|
70
|
+
task = asyncio.create_task(func(*args, **kwargs))
|
|
71
|
+
try:
|
|
72
|
+
return await asyncio.shield(task)
|
|
73
|
+
except asyncio.CancelledError:
|
|
74
|
+
_background_tasks.add(task)
|
|
75
|
+
task.add_done_callback(lambda _: _background_tasks.discard(task))
|
|
76
|
+
raise
|
|
77
|
+
|
|
78
|
+
return async_wrapper
|
sky/utils/atomic.py
CHANGED
sky/utils/auth_utils.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Utils for managing SkyPilot SSH key pairs."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import os
|
|
5
|
+
from typing import Tuple
|
|
6
|
+
|
|
7
|
+
import filelock
|
|
8
|
+
|
|
9
|
+
from sky import global_user_state
|
|
10
|
+
from sky import sky_logging
|
|
11
|
+
from sky.utils import common_utils
|
|
12
|
+
|
|
13
|
+
logger = sky_logging.init_logger(__name__)
|
|
14
|
+
|
|
15
|
+
MAX_TRIALS = 64
|
|
16
|
+
# TODO(zhwu): Support user specified key pair.
|
|
17
|
+
# We intentionally not have the ssh key pair to be stored in
|
|
18
|
+
# ~/.sky/api_server/clients, i.e. sky.server.common.API_SERVER_CLIENT_DIR,
|
|
19
|
+
# because ssh key pair need to persist across API server restarts, while
|
|
20
|
+
# the former dir is ephemeral.
|
|
21
|
+
_SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_ssh_key_and_lock_path(user_hash: str) -> Tuple[str, str, str]:
|
|
25
|
+
user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
|
|
26
|
+
|
|
27
|
+
os.makedirs(os.path.expanduser(user_ssh_key_prefix),
|
|
28
|
+
exist_ok=True,
|
|
29
|
+
mode=0o700)
|
|
30
|
+
private_key_path = os.path.join(user_ssh_key_prefix, 'sky-key')
|
|
31
|
+
public_key_path = os.path.join(user_ssh_key_prefix, 'sky-key.pub')
|
|
32
|
+
lock_path = os.path.join(user_ssh_key_prefix, '.__internal-sky-key.lock')
|
|
33
|
+
return private_key_path, public_key_path, lock_path
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _generate_rsa_key_pair() -> Tuple[str, str]:
|
|
37
|
+
# Keep the import of the cryptography local to avoid expensive
|
|
38
|
+
# third-party imports when not needed.
|
|
39
|
+
# pylint: disable=import-outside-toplevel
|
|
40
|
+
from cryptography.hazmat.backends import default_backend
|
|
41
|
+
from cryptography.hazmat.primitives import serialization
|
|
42
|
+
from cryptography.hazmat.primitives.asymmetric import rsa
|
|
43
|
+
|
|
44
|
+
key = rsa.generate_private_key(backend=default_backend(),
|
|
45
|
+
public_exponent=65537,
|
|
46
|
+
key_size=2048)
|
|
47
|
+
|
|
48
|
+
private_key = key.private_bytes(
|
|
49
|
+
encoding=serialization.Encoding.PEM,
|
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
|
51
|
+
encryption_algorithm=serialization.NoEncryption()).decode(
|
|
52
|
+
'utf-8').strip()
|
|
53
|
+
|
|
54
|
+
public_key = key.public_key().public_bytes(
|
|
55
|
+
serialization.Encoding.OpenSSH,
|
|
56
|
+
serialization.PublicFormat.OpenSSH).decode('utf-8').strip()
|
|
57
|
+
|
|
58
|
+
return public_key, private_key
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
62
|
+
private_key: str, public_key: str) -> None:
|
|
63
|
+
key_dir = os.path.dirname(private_key_path)
|
|
64
|
+
os.makedirs(key_dir, exist_ok=True, mode=0o700)
|
|
65
|
+
|
|
66
|
+
with open(
|
|
67
|
+
private_key_path,
|
|
68
|
+
'w',
|
|
69
|
+
encoding='utf-8',
|
|
70
|
+
opener=functools.partial(os.open, mode=0o600),
|
|
71
|
+
) as f:
|
|
72
|
+
f.write(private_key)
|
|
73
|
+
|
|
74
|
+
with open(public_key_path,
|
|
75
|
+
'w',
|
|
76
|
+
encoding='utf-8',
|
|
77
|
+
opener=functools.partial(os.open, mode=0o644)) as f:
|
|
78
|
+
f.write(public_key)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_or_generate_keys() -> Tuple[str, str]:
|
|
82
|
+
"""Returns the absolute private and public key paths."""
|
|
83
|
+
user_hash = common_utils.get_user_hash()
|
|
84
|
+
private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path(
|
|
85
|
+
user_hash)
|
|
86
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
87
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
88
|
+
lock_path = os.path.expanduser(lock_path)
|
|
89
|
+
|
|
90
|
+
lock_dir = os.path.dirname(lock_path)
|
|
91
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
92
|
+
# as the ssh configs will be written to this folder as well in
|
|
93
|
+
# backend_utils.SSHConfigHelper
|
|
94
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
95
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
96
|
+
if not os.path.exists(private_key_path):
|
|
97
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
98
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
99
|
+
if not exists:
|
|
100
|
+
ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
|
|
101
|
+
global_user_state.set_ssh_keys(user_hash, ssh_public_key,
|
|
102
|
+
ssh_private_key)
|
|
103
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
104
|
+
ssh_public_key)
|
|
105
|
+
assert os.path.exists(public_key_path), (
|
|
106
|
+
'Private key found, but associated public key '
|
|
107
|
+
f'{public_key_path} does not exist.')
|
|
108
|
+
return private_key_path, public_key_path
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
112
|
+
"""Creates the ssh key files from the database.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
True if the ssh key files are created successfully, False otherwise.
|
|
116
|
+
"""
|
|
117
|
+
# Assume private key path is in the format of
|
|
118
|
+
# ~/.sky/clients/<user_hash>/ssh/sky-key
|
|
119
|
+
separated_path = os.path.normpath(private_key_path).split(os.path.sep)
|
|
120
|
+
assert separated_path[-1] == 'sky-key'
|
|
121
|
+
assert separated_path[-2] == 'ssh'
|
|
122
|
+
user_hash = separated_path[-3]
|
|
123
|
+
|
|
124
|
+
private_key_path_generated, public_key_path, lock_path = (
|
|
125
|
+
get_ssh_key_and_lock_path(user_hash))
|
|
126
|
+
assert private_key_path == os.path.expanduser(private_key_path_generated), (
|
|
127
|
+
f'Private key path {private_key_path} does not '
|
|
128
|
+
'match the generated path '
|
|
129
|
+
f'{os.path.expanduser(private_key_path_generated)}')
|
|
130
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
131
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
132
|
+
lock_path = os.path.expanduser(lock_path)
|
|
133
|
+
lock_dir = os.path.dirname(lock_path)
|
|
134
|
+
|
|
135
|
+
if os.path.exists(private_key_path) and os.path.exists(public_key_path):
|
|
136
|
+
return True
|
|
137
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
138
|
+
# as the ssh configs will be written to this folder as well in
|
|
139
|
+
# backend_utils.SSHConfigHelper
|
|
140
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
141
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
142
|
+
if not os.path.exists(private_key_path):
|
|
143
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
144
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
145
|
+
if not exists:
|
|
146
|
+
logger.debug(f'SSH keys not found for user {user_hash}')
|
|
147
|
+
return False
|
|
148
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
149
|
+
ssh_public_key)
|
|
150
|
+
assert os.path.exists(public_key_path), (
|
|
151
|
+
'Private key found, but associated public key '
|
|
152
|
+
f'{public_key_path} does not exist.')
|
|
153
|
+
return True
|