skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +12 -2
- sky/adaptors/aws.py +27 -22
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/adaptors/slurm.py +478 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +630 -185
- sky/backends/cloud_vm_ray_backend.py +1111 -928
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +971 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -3
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +36 -32
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/slurm_catalog.py +243 -0
- sky/check.py +87 -46
- sky/client/cli/command.py +1004 -434
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +188 -65
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +8 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +47 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/slurm.py +578 -0
- sky/clouds/ssh.py +6 -3
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +43 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +296 -195
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
- sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -0
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +177 -30
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/models.py +2 -0
- sky/optimizer.py +7 -6
- sky/provision/__init__.py +38 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +22 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +112 -28
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +422 -422
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +45 -15
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/slurm/__init__.py +12 -0
- sky/provision/slurm/config.py +13 -0
- sky/provision/slurm/instance.py +572 -0
- sky/provision/slurm/utils.py +583 -0
- sky/provision/vast/instance.py +9 -4
- sky/provision/vast/utils.py +10 -6
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/plugins.py +222 -0
- sky/server/requests/executor.py +384 -145
- sky/server/requests/payloads.py +83 -19
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +123 -0
- sky/server/requests/requests.py +511 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +102 -20
- sky/server/requests/serializers/return_value_serializers.py +60 -0
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +497 -179
- sky/server/server_utils.py +30 -0
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +64 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +116 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +139 -29
- sky/skylet/events.py +74 -14
- sky/skylet/executor/__init__.py +1 -0
- sky/skylet/executor/slurm.py +189 -0
- sky/skylet/job_lib.py +143 -105
- sky/skylet/log_lib.py +252 -8
- sky/skylet/log_lib.pyi +47 -7
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +27 -2
- sky/skylet/subprocess_daemon.py +104 -28
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/constants.py +12 -0
- sky/ssh_node_pools/core.py +40 -3
- sky/ssh_node_pools/deploy/__init__.py +4 -0
- sky/ssh_node_pools/deploy/deploy.py +952 -0
- sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
- sky/ssh_node_pools/deploy/utils.py +173 -0
- sky/ssh_node_pools/server.py +20 -21
- sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +204 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/slurm-ray.yml.j2 +85 -0
- sky/templates/vast-ray.yml.j2 +2 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/model.conf +1 -1
- sky/users/permission.py +84 -44
- sky/users/rbac.py +31 -3
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +128 -6
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +283 -30
- sky/utils/command_runner.pyi +63 -7
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +55 -7
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +7 -376
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +138 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
- skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
- skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
- sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/utils/annotations.py
CHANGED
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
"""Annotations for public APIs."""
|
|
2
2
|
|
|
3
3
|
import functools
|
|
4
|
-
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from typing import Callable, List, Literal, TypeVar
|
|
7
|
+
import weakref
|
|
5
8
|
|
|
9
|
+
import cachetools
|
|
6
10
|
from typing_extensions import ParamSpec
|
|
7
11
|
|
|
8
12
|
# Whether the current process is a SkyPilot API server process.
|
|
9
13
|
is_on_api_server = True
|
|
10
|
-
|
|
14
|
+
_FUNCTIONS_NEED_RELOAD_CACHE_LOCK = threading.Lock()
|
|
15
|
+
# Caches can be thread-local, use weakref to avoid blocking the GC when the
|
|
16
|
+
# thread is destroyed.
|
|
17
|
+
_FUNCTIONS_NEED_RELOAD_CACHE: List[weakref.ReferenceType] = []
|
|
11
18
|
|
|
12
19
|
T = TypeVar('T')
|
|
13
20
|
P = ParamSpec('P')
|
|
@@ -29,6 +36,94 @@ def client_api(func: Callable[P, T]) -> Callable[P, T]:
|
|
|
29
36
|
return wrapper
|
|
30
37
|
|
|
31
38
|
|
|
39
|
+
def _register_functions_need_reload_cache(func: Callable) -> Callable:
|
|
40
|
+
"""Register a cachefunction that needs to be reloaded for a new request.
|
|
41
|
+
|
|
42
|
+
The function will be registered as a weak reference to avoid blocking GC.
|
|
43
|
+
"""
|
|
44
|
+
assert hasattr(func, 'cache_clear'), f'{func.__name__} is not cacheable'
|
|
45
|
+
wrapped_fn = func
|
|
46
|
+
try:
|
|
47
|
+
func_ref = weakref.ref(func)
|
|
48
|
+
except TypeError:
|
|
49
|
+
# The function might be not weakrefable (e.g. functools.lru_cache),
|
|
50
|
+
# wrap it in this case.
|
|
51
|
+
@functools.wraps(func)
|
|
52
|
+
def wrapper(*args, **kwargs):
|
|
53
|
+
return func(*args, **kwargs)
|
|
54
|
+
|
|
55
|
+
wrapper.cache_clear = func.cache_clear # type: ignore[attr-defined]
|
|
56
|
+
func_ref = weakref.ref(wrapper)
|
|
57
|
+
wrapped_fn = wrapper
|
|
58
|
+
with _FUNCTIONS_NEED_RELOAD_CACHE_LOCK:
|
|
59
|
+
_FUNCTIONS_NEED_RELOAD_CACHE.append(func_ref)
|
|
60
|
+
return wrapped_fn
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ThreadLocalTTLCache(threading.local):
|
|
64
|
+
"""Thread-local storage for _thread_local_lru_cache decorator."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, func, maxsize: int, ttl: int):
|
|
67
|
+
super().__init__()
|
|
68
|
+
self.func = func
|
|
69
|
+
self.maxsize = maxsize
|
|
70
|
+
self.ttl = ttl
|
|
71
|
+
|
|
72
|
+
def get_cache(self):
|
|
73
|
+
if not hasattr(self, 'cache'):
|
|
74
|
+
self.cache = ttl_cache(scope='request',
|
|
75
|
+
maxsize=self.maxsize,
|
|
76
|
+
ttl=self.ttl,
|
|
77
|
+
timer=time.time)(self.func)
|
|
78
|
+
return self.cache
|
|
79
|
+
|
|
80
|
+
def __del__(self):
|
|
81
|
+
if hasattr(self, 'cache'):
|
|
82
|
+
self.cache.cache_clear()
|
|
83
|
+
self.cache = None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def thread_local_ttl_cache(maxsize=32, ttl=60 * 55):
|
|
87
|
+
"""Thread-local TTL cache decorator.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
maxsize: Maximum size of the cache.
|
|
91
|
+
ttl: Time to live for the cache in seconds.
|
|
92
|
+
Default is 55 minutes, a bit less than 1 hour
|
|
93
|
+
default lifetime of an STS token.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def decorator(func):
|
|
97
|
+
# Create thread-local storage for the LRU cache
|
|
98
|
+
local_cache = ThreadLocalTTLCache(func, maxsize, ttl)
|
|
99
|
+
|
|
100
|
+
# We can't apply the lru_cache here, because this runs at import time
|
|
101
|
+
# so we will always have the main thread's cache.
|
|
102
|
+
|
|
103
|
+
@functools.wraps(func)
|
|
104
|
+
def wrapper(*args, **kwargs):
|
|
105
|
+
# We are within the actual function call, which may be on a thread,
|
|
106
|
+
# so local_cache.cache will return the correct thread-local cache,
|
|
107
|
+
# which we can now apply and immediately call.
|
|
108
|
+
return local_cache.get_cache()(*args, **kwargs)
|
|
109
|
+
|
|
110
|
+
def cache_info():
|
|
111
|
+
# Note that this will only give the cache info for the current
|
|
112
|
+
# thread's cache.
|
|
113
|
+
return local_cache.get_cache().cache_info()
|
|
114
|
+
|
|
115
|
+
def cache_clear():
|
|
116
|
+
# Note that this will only clear the cache for the current thread.
|
|
117
|
+
local_cache.get_cache().cache_clear()
|
|
118
|
+
|
|
119
|
+
wrapper.cache_info = cache_info # type: ignore[attr-defined]
|
|
120
|
+
wrapper.cache_clear = cache_clear # type: ignore[attr-defined]
|
|
121
|
+
|
|
122
|
+
return wrapper
|
|
123
|
+
|
|
124
|
+
return decorator
|
|
125
|
+
|
|
126
|
+
|
|
32
127
|
def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
|
|
33
128
|
**lru_cache_kwargs) -> Callable:
|
|
34
129
|
"""LRU cache decorator for functions.
|
|
@@ -50,13 +145,40 @@ def lru_cache(scope: Literal['global', 'request'], *lru_cache_args,
|
|
|
50
145
|
else:
|
|
51
146
|
cached_func = functools.lru_cache(*lru_cache_args,
|
|
52
147
|
**lru_cache_kwargs)(func)
|
|
53
|
-
|
|
54
|
-
|
|
148
|
+
return _register_functions_need_reload_cache(cached_func)
|
|
149
|
+
|
|
150
|
+
return decorator
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def ttl_cache(scope: Literal['global', 'request'], *ttl_cache_args,
|
|
154
|
+
**ttl_cache_kwargs) -> Callable:
|
|
155
|
+
"""TTLCache decorator for functions.
|
|
156
|
+
|
|
157
|
+
This decorator allows us to track which functions need to be reloaded for a
|
|
158
|
+
new request using the scope argument.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
def decorator(func: Callable[P, T]) -> Callable[P, T]:
|
|
162
|
+
if scope == 'global':
|
|
163
|
+
return cachetools.cached(
|
|
164
|
+
cachetools.TTLCache(*ttl_cache_args, **ttl_cache_kwargs))(func)
|
|
165
|
+
else:
|
|
166
|
+
cached_func = cachetools.cached(
|
|
167
|
+
cachetools.TTLCache(*ttl_cache_args, **ttl_cache_kwargs))(func)
|
|
168
|
+
return _register_functions_need_reload_cache(cached_func)
|
|
55
169
|
|
|
56
170
|
return decorator
|
|
57
171
|
|
|
58
172
|
|
|
59
173
|
def clear_request_level_cache():
|
|
60
174
|
"""Clear the request-level cache."""
|
|
61
|
-
|
|
62
|
-
|
|
175
|
+
alive_entries = []
|
|
176
|
+
with _FUNCTIONS_NEED_RELOAD_CACHE_LOCK:
|
|
177
|
+
for entry in _FUNCTIONS_NEED_RELOAD_CACHE:
|
|
178
|
+
func = entry()
|
|
179
|
+
if func is None:
|
|
180
|
+
# Has been GC'ed, drop the reference.
|
|
181
|
+
continue
|
|
182
|
+
func.cache_clear()
|
|
183
|
+
alive_entries.append(entry)
|
|
184
|
+
_FUNCTIONS_NEED_RELOAD_CACHE[:] = alive_entries
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Asyncio utilities."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import functools
|
|
5
|
+
from typing import Set
|
|
6
|
+
|
|
7
|
+
_background_tasks: Set[asyncio.Task] = set()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def shield(func):
|
|
11
|
+
"""Shield the decorated async function from cancellation.
|
|
12
|
+
|
|
13
|
+
If the outer coroutine is cancelled, the inner decorated function
|
|
14
|
+
will be protected from cancellation by asyncio.shield(). And we will
|
|
15
|
+
maintain a reference to the the inner task to avoid it get GCed before
|
|
16
|
+
it is done.
|
|
17
|
+
|
|
18
|
+
For example, filelock.AsyncFileLock is not cancellation safe. The
|
|
19
|
+
following code:
|
|
20
|
+
|
|
21
|
+
async def fn_with_lock():
|
|
22
|
+
async with filelock.AsyncFileLock('lock'):
|
|
23
|
+
await asyncio.sleep(1)
|
|
24
|
+
|
|
25
|
+
is equivalent to:
|
|
26
|
+
|
|
27
|
+
# The lock may leak if the cancellation happens in
|
|
28
|
+
# lock.acquire() or lock.release()
|
|
29
|
+
async def fn_with_lock():
|
|
30
|
+
lock = filelock.AsyncFileLock('lock')
|
|
31
|
+
await lock.acquire()
|
|
32
|
+
try:
|
|
33
|
+
await asyncio.sleep(1)
|
|
34
|
+
finally:
|
|
35
|
+
await lock.release()
|
|
36
|
+
|
|
37
|
+
Shilding the function ensures there is no cancellation will happen in the
|
|
38
|
+
function, thus the lock will be released properly:
|
|
39
|
+
|
|
40
|
+
@shield
|
|
41
|
+
async def fn_with_lock()
|
|
42
|
+
|
|
43
|
+
Note that the resource acquisition and release should usually be protected
|
|
44
|
+
in one @shield block but not separately, e.g.:
|
|
45
|
+
|
|
46
|
+
lock = filelock.AsyncFileLock('lock')
|
|
47
|
+
|
|
48
|
+
@shield
|
|
49
|
+
async def acquire():
|
|
50
|
+
await lock.acquire()
|
|
51
|
+
|
|
52
|
+
@shield
|
|
53
|
+
async def release():
|
|
54
|
+
await lock.release()
|
|
55
|
+
|
|
56
|
+
async def fn_with_lock():
|
|
57
|
+
await acquire()
|
|
58
|
+
try:
|
|
59
|
+
do_something()
|
|
60
|
+
finally:
|
|
61
|
+
await release()
|
|
62
|
+
|
|
63
|
+
The above code is not safe because if `fn_with_lock` is cancelled,
|
|
64
|
+
`acquire()` and `release()` will be executed in the background
|
|
65
|
+
concurrently and causes race conditions.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
@functools.wraps(func)
|
|
69
|
+
async def async_wrapper(*args, **kwargs):
|
|
70
|
+
task = asyncio.create_task(func(*args, **kwargs))
|
|
71
|
+
try:
|
|
72
|
+
return await asyncio.shield(task)
|
|
73
|
+
except asyncio.CancelledError:
|
|
74
|
+
_background_tasks.add(task)
|
|
75
|
+
task.add_done_callback(lambda _: _background_tasks.discard(task))
|
|
76
|
+
raise
|
|
77
|
+
|
|
78
|
+
return async_wrapper
|
sky/utils/atomic.py
CHANGED
sky/utils/auth_utils.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Utils for managing SkyPilot SSH key pairs."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import os
|
|
5
|
+
from typing import Tuple
|
|
6
|
+
|
|
7
|
+
import filelock
|
|
8
|
+
|
|
9
|
+
from sky import global_user_state
|
|
10
|
+
from sky import sky_logging
|
|
11
|
+
from sky.utils import common_utils
|
|
12
|
+
|
|
13
|
+
logger = sky_logging.init_logger(__name__)
|
|
14
|
+
|
|
15
|
+
MAX_TRIALS = 64
|
|
16
|
+
# TODO(zhwu): Support user specified key pair.
|
|
17
|
+
# We intentionally not have the ssh key pair to be stored in
|
|
18
|
+
# ~/.sky/api_server/clients, i.e. sky.server.common.API_SERVER_CLIENT_DIR,
|
|
19
|
+
# because ssh key pair need to persist across API server restarts, while
|
|
20
|
+
# the former dir is ephemeral.
|
|
21
|
+
_SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_ssh_key_and_lock_path(user_hash: str) -> Tuple[str, str, str]:
|
|
25
|
+
user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
|
|
26
|
+
|
|
27
|
+
os.makedirs(os.path.expanduser(user_ssh_key_prefix),
|
|
28
|
+
exist_ok=True,
|
|
29
|
+
mode=0o700)
|
|
30
|
+
private_key_path = os.path.join(user_ssh_key_prefix, 'sky-key')
|
|
31
|
+
public_key_path = os.path.join(user_ssh_key_prefix, 'sky-key.pub')
|
|
32
|
+
lock_path = os.path.join(user_ssh_key_prefix, '.__internal-sky-key.lock')
|
|
33
|
+
return private_key_path, public_key_path, lock_path
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _generate_rsa_key_pair() -> Tuple[str, str]:
|
|
37
|
+
# Keep the import of the cryptography local to avoid expensive
|
|
38
|
+
# third-party imports when not needed.
|
|
39
|
+
# pylint: disable=import-outside-toplevel
|
|
40
|
+
from cryptography.hazmat.backends import default_backend
|
|
41
|
+
from cryptography.hazmat.primitives import serialization
|
|
42
|
+
from cryptography.hazmat.primitives.asymmetric import rsa
|
|
43
|
+
|
|
44
|
+
key = rsa.generate_private_key(backend=default_backend(),
|
|
45
|
+
public_exponent=65537,
|
|
46
|
+
key_size=2048)
|
|
47
|
+
|
|
48
|
+
private_key = key.private_bytes(
|
|
49
|
+
encoding=serialization.Encoding.PEM,
|
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
|
51
|
+
encryption_algorithm=serialization.NoEncryption()).decode(
|
|
52
|
+
'utf-8').strip()
|
|
53
|
+
|
|
54
|
+
public_key = key.public_key().public_bytes(
|
|
55
|
+
serialization.Encoding.OpenSSH,
|
|
56
|
+
serialization.PublicFormat.OpenSSH).decode('utf-8').strip()
|
|
57
|
+
|
|
58
|
+
return public_key, private_key
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
62
|
+
private_key: str, public_key: str) -> None:
|
|
63
|
+
key_dir = os.path.dirname(private_key_path)
|
|
64
|
+
os.makedirs(key_dir, exist_ok=True, mode=0o700)
|
|
65
|
+
|
|
66
|
+
with open(
|
|
67
|
+
private_key_path,
|
|
68
|
+
'w',
|
|
69
|
+
encoding='utf-8',
|
|
70
|
+
opener=functools.partial(os.open, mode=0o600),
|
|
71
|
+
) as f:
|
|
72
|
+
f.write(private_key)
|
|
73
|
+
|
|
74
|
+
with open(public_key_path,
|
|
75
|
+
'w',
|
|
76
|
+
encoding='utf-8',
|
|
77
|
+
opener=functools.partial(os.open, mode=0o644)) as f:
|
|
78
|
+
f.write(public_key)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_or_generate_keys() -> Tuple[str, str]:
|
|
82
|
+
"""Returns the absolute private and public key paths."""
|
|
83
|
+
user_hash = common_utils.get_user_hash()
|
|
84
|
+
private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path(
|
|
85
|
+
user_hash)
|
|
86
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
87
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
88
|
+
lock_path = os.path.expanduser(lock_path)
|
|
89
|
+
|
|
90
|
+
lock_dir = os.path.dirname(lock_path)
|
|
91
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
92
|
+
# as the ssh configs will be written to this folder as well in
|
|
93
|
+
# backend_utils.SSHConfigHelper
|
|
94
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
95
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
96
|
+
if not os.path.exists(private_key_path):
|
|
97
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
98
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
99
|
+
if not exists:
|
|
100
|
+
ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
|
|
101
|
+
global_user_state.set_ssh_keys(user_hash, ssh_public_key,
|
|
102
|
+
ssh_private_key)
|
|
103
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
104
|
+
ssh_public_key)
|
|
105
|
+
assert os.path.exists(public_key_path), (
|
|
106
|
+
'Private key found, but associated public key '
|
|
107
|
+
f'{public_key_path} does not exist.')
|
|
108
|
+
return private_key_path, public_key_path
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
112
|
+
"""Creates the ssh key files from the database.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
True if the ssh key files are created successfully, False otherwise.
|
|
116
|
+
"""
|
|
117
|
+
# Assume private key path is in the format of
|
|
118
|
+
# ~/.sky/clients/<user_hash>/ssh/sky-key
|
|
119
|
+
separated_path = os.path.normpath(private_key_path).split(os.path.sep)
|
|
120
|
+
assert separated_path[-1] == 'sky-key'
|
|
121
|
+
assert separated_path[-2] == 'ssh'
|
|
122
|
+
user_hash = separated_path[-3]
|
|
123
|
+
|
|
124
|
+
private_key_path_generated, public_key_path, lock_path = (
|
|
125
|
+
get_ssh_key_and_lock_path(user_hash))
|
|
126
|
+
assert private_key_path == os.path.expanduser(private_key_path_generated), (
|
|
127
|
+
f'Private key path {private_key_path} does not '
|
|
128
|
+
'match the generated path '
|
|
129
|
+
f'{os.path.expanduser(private_key_path_generated)}')
|
|
130
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
131
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
132
|
+
lock_path = os.path.expanduser(lock_path)
|
|
133
|
+
lock_dir = os.path.dirname(lock_path)
|
|
134
|
+
|
|
135
|
+
if os.path.exists(private_key_path) and os.path.exists(public_key_path):
|
|
136
|
+
return True
|
|
137
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
138
|
+
# as the ssh configs will be written to this folder as well in
|
|
139
|
+
# backend_utils.SSHConfigHelper
|
|
140
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
141
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
142
|
+
if not os.path.exists(private_key_path):
|
|
143
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
144
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
145
|
+
if not exists:
|
|
146
|
+
logger.debug(f'SSH keys not found for user {user_hash}')
|
|
147
|
+
return False
|
|
148
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
149
|
+
ssh_public_key)
|
|
150
|
+
assert os.path.exists(public_key_path), (
|
|
151
|
+
'Private key found, but associated public key '
|
|
152
|
+
f'{public_key_path} does not exist.')
|
|
153
|
+
return True
|
|
@@ -11,6 +11,7 @@ from sky.utils import common_utils
|
|
|
11
11
|
from sky.utils import log_utils
|
|
12
12
|
from sky.utils import resources_utils
|
|
13
13
|
from sky.utils import status_lib
|
|
14
|
+
from sky.utils import ux_utils
|
|
14
15
|
|
|
15
16
|
if typing.TYPE_CHECKING:
|
|
16
17
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
@@ -105,11 +106,9 @@ def show_status_table(cluster_records: List[responses.StatusResponse],
|
|
|
105
106
|
|
|
106
107
|
if query_clusters:
|
|
107
108
|
cluster_names = {record['name'] for record in cluster_records}
|
|
108
|
-
not_found_clusters =
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
if cluster not in cluster_names
|
|
112
|
-
]
|
|
109
|
+
not_found_clusters = ux_utils.get_non_matched_query(
|
|
110
|
+
query_clusters, cluster_names)
|
|
111
|
+
not_found_clusters = [repr(cluster) for cluster in not_found_clusters]
|
|
113
112
|
if not_found_clusters:
|
|
114
113
|
cluster_str = 'Cluster'
|
|
115
114
|
if len(not_found_clusters) > 1:
|
|
@@ -283,8 +282,14 @@ def _get_resources(cluster_record: _ClusterRecord,
|
|
|
283
282
|
if resources_str_full is not None:
|
|
284
283
|
resources_str = resources_str_full
|
|
285
284
|
if resources_str is None:
|
|
286
|
-
|
|
287
|
-
|
|
285
|
+
resources_str_simple, resources_str_full = (
|
|
286
|
+
resources_utils.get_readable_resources_repr(
|
|
287
|
+
handle, simplified_only=truncate))
|
|
288
|
+
if truncate:
|
|
289
|
+
resources_str = resources_str_simple
|
|
290
|
+
else:
|
|
291
|
+
assert resources_str_full is not None
|
|
292
|
+
resources_str = resources_str_full
|
|
288
293
|
|
|
289
294
|
return resources_str
|
|
290
295
|
return '-'
|
sky/utils/cluster_utils.py
CHANGED
|
@@ -144,6 +144,9 @@ class SSHConfigHelper(object):
|
|
|
144
144
|
username = docker_user
|
|
145
145
|
|
|
146
146
|
key_path = cls.generate_local_key_file(cluster_name, auth_config)
|
|
147
|
+
# Keep the unexpanded path for SSH config (with ~)
|
|
148
|
+
key_path_for_config = key_path
|
|
149
|
+
# Expand the path for internal operations that need absolute path
|
|
147
150
|
key_path = os.path.expanduser(key_path)
|
|
148
151
|
sky_autogen_comment = ('# Added by sky (use `sky stop/down '
|
|
149
152
|
f'{cluster_name}` to remove)')
|
|
@@ -190,11 +193,29 @@ class SSHConfigHelper(object):
|
|
|
190
193
|
proxy_command = auth_config.get('ssh_proxy_command', None)
|
|
191
194
|
|
|
192
195
|
docker_proxy_command_generator = None
|
|
196
|
+
proxy_command_for_nodes = proxy_command
|
|
193
197
|
if docker_user is not None:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
+
|
|
199
|
+
def _docker_proxy_cmd(ip: str, port: int) -> str:
|
|
200
|
+
inner_proxy = proxy_command
|
|
201
|
+
inner_port = port or 22
|
|
202
|
+
if inner_proxy is not None:
|
|
203
|
+
inner_proxy = inner_proxy.replace('%h', ip)
|
|
204
|
+
inner_proxy = inner_proxy.replace('%p', str(inner_port))
|
|
205
|
+
return ' '.join(['ssh'] + command_runner.ssh_options_list(
|
|
206
|
+
key_path,
|
|
207
|
+
ssh_control_name=None,
|
|
208
|
+
ssh_proxy_command=inner_proxy,
|
|
209
|
+
port=inner_port,
|
|
210
|
+
# ProxyCommand (ssh -W) is a forwarding tunnel, not an
|
|
211
|
+
# interactive session. ControlMaster would cache these
|
|
212
|
+
# processes, causing them to hang and block subsequent
|
|
213
|
+
# connections. Each ProxyCommand should be ephemeral.
|
|
214
|
+
disable_control_master=True
|
|
215
|
+
) + ['-W', '%h:%p', f'{auth_config["ssh_user"]}@{ip}'])
|
|
216
|
+
|
|
217
|
+
docker_proxy_command_generator = _docker_proxy_cmd
|
|
218
|
+
proxy_command_for_nodes = None
|
|
198
219
|
|
|
199
220
|
codegen = ''
|
|
200
221
|
# Add the nodes to the codegen
|
|
@@ -208,8 +229,9 @@ class SSHConfigHelper(object):
|
|
|
208
229
|
node_name = cluster_name if i == 0 else cluster_name + f'-worker{i}'
|
|
209
230
|
# TODO(romilb): Update port number when k8s supports multinode
|
|
210
231
|
codegen += cls._get_generated_config(
|
|
211
|
-
sky_autogen_comment, node_name, ip, username,
|
|
212
|
-
|
|
232
|
+
sky_autogen_comment, node_name, ip, username,
|
|
233
|
+
key_path_for_config, proxy_command_for_nodes, port,
|
|
234
|
+
docker_proxy_command) + '\n'
|
|
213
235
|
|
|
214
236
|
cluster_config_path = os.path.expanduser(
|
|
215
237
|
cls.ssh_cluster_path.format(cluster_name))
|