skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +10 -2
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +588 -184
- sky/backends/cloud_vm_ray_backend.py +1088 -904
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -0
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +73 -43
- sky/client/cli/command.py +675 -412
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +132 -63
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +6 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +40 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +33 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +174 -165
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +162 -29
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +37 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +97 -26
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +382 -418
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +30 -9
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +5 -3
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +381 -145
- sky/server/requests/payloads.py +71 -18
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +507 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +85 -20
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +420 -172
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +62 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +111 -26
- sky/skylet/events.py +64 -10
- sky/skylet/job_lib.py +141 -104
- sky/skylet/log_lib.py +233 -5
- sky/skylet/log_lib.pyi +40 -2
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +22 -1
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +196 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/permission.py +60 -43
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +22 -0
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +88 -27
- sky/utils/command_runner.pyi +36 -3
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +37 -4
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +107 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/authentication.py
CHANGED
|
@@ -19,34 +19,31 @@ controller. (Lambda cloud is an exception, due to the limitation of the cloud
|
|
|
19
19
|
provider. See the comments in setup_lambda_authentication)
|
|
20
20
|
"""
|
|
21
21
|
import copy
|
|
22
|
-
import functools
|
|
23
22
|
import os
|
|
24
23
|
import re
|
|
25
24
|
import socket
|
|
26
25
|
import subprocess
|
|
27
26
|
import sys
|
|
28
|
-
from typing import Any, Dict
|
|
27
|
+
from typing import Any, Dict
|
|
29
28
|
import uuid
|
|
30
29
|
|
|
31
30
|
import colorama
|
|
32
|
-
import filelock
|
|
33
31
|
|
|
34
32
|
from sky import clouds
|
|
35
33
|
from sky import exceptions
|
|
36
|
-
from sky import global_user_state
|
|
37
34
|
from sky import sky_logging
|
|
38
|
-
from sky import skypilot_config
|
|
39
35
|
from sky.adaptors import gcp
|
|
40
36
|
from sky.adaptors import ibm
|
|
41
|
-
from sky.adaptors import kubernetes
|
|
42
37
|
from sky.adaptors import runpod
|
|
38
|
+
from sky.adaptors import seeweb as seeweb_adaptor
|
|
39
|
+
from sky.adaptors import shadeform as shadeform_adaptor
|
|
43
40
|
from sky.adaptors import vast
|
|
44
41
|
from sky.provision.fluidstack import fluidstack_utils
|
|
45
42
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
46
43
|
from sky.provision.lambda_cloud import lambda_utils
|
|
44
|
+
from sky.provision.primeintellect import utils as primeintellect_utils
|
|
45
|
+
from sky.utils import auth_utils
|
|
47
46
|
from sky.utils import common_utils
|
|
48
|
-
from sky.utils import config_utils
|
|
49
|
-
from sky.utils import kubernetes_enums
|
|
50
47
|
from sky.utils import subprocess_utils
|
|
51
48
|
from sky.utils import ux_utils
|
|
52
49
|
from sky.utils import yaml_utils
|
|
@@ -58,144 +55,9 @@ logger = sky_logging.init_logger(__name__)
|
|
|
58
55
|
# using Cloud Client Libraries for Python, where possible, for new code
|
|
59
56
|
# development.
|
|
60
57
|
|
|
61
|
-
MAX_TRIALS = 64
|
|
62
|
-
# TODO(zhwu): Support user specified key pair.
|
|
63
|
-
# We intentionally not have the ssh key pair to be stored in
|
|
64
|
-
# ~/.sky/api_server/clients, i.e. sky.server.common.API_SERVER_CLIENT_DIR,
|
|
65
|
-
# because ssh key pair need to persist across API server restarts, while
|
|
66
|
-
# the former dir is empheral.
|
|
67
|
-
_SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def get_ssh_key_and_lock_path(
|
|
71
|
-
user_hash: Optional[str] = None) -> Tuple[str, str, str]:
|
|
72
|
-
if user_hash is None:
|
|
73
|
-
user_hash = common_utils.get_user_hash()
|
|
74
|
-
user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
|
|
75
|
-
|
|
76
|
-
os.makedirs(os.path.expanduser(user_ssh_key_prefix),
|
|
77
|
-
exist_ok=True,
|
|
78
|
-
mode=0o700)
|
|
79
|
-
private_key_path = os.path.join(user_ssh_key_prefix, 'sky-key')
|
|
80
|
-
public_key_path = os.path.join(user_ssh_key_prefix, 'sky-key.pub')
|
|
81
|
-
lock_path = os.path.join(user_ssh_key_prefix, '.__internal-sky-key.lock')
|
|
82
|
-
return private_key_path, public_key_path, lock_path
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def _generate_rsa_key_pair() -> Tuple[str, str]:
|
|
86
|
-
# Keep the import of the cryptography local to avoid expensive
|
|
87
|
-
# third-party imports when not needed.
|
|
88
|
-
# pylint: disable=import-outside-toplevel
|
|
89
|
-
from cryptography.hazmat.backends import default_backend
|
|
90
|
-
from cryptography.hazmat.primitives import serialization
|
|
91
|
-
from cryptography.hazmat.primitives.asymmetric import rsa
|
|
92
|
-
|
|
93
|
-
key = rsa.generate_private_key(backend=default_backend(),
|
|
94
|
-
public_exponent=65537,
|
|
95
|
-
key_size=2048)
|
|
96
|
-
|
|
97
|
-
private_key = key.private_bytes(
|
|
98
|
-
encoding=serialization.Encoding.PEM,
|
|
99
|
-
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
|
100
|
-
encryption_algorithm=serialization.NoEncryption()).decode(
|
|
101
|
-
'utf-8').strip()
|
|
102
|
-
|
|
103
|
-
public_key = key.public_key().public_bytes(
|
|
104
|
-
serialization.Encoding.OpenSSH,
|
|
105
|
-
serialization.PublicFormat.OpenSSH).decode('utf-8').strip()
|
|
106
|
-
|
|
107
|
-
return public_key, private_key
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
111
|
-
private_key: str, public_key: str) -> None:
|
|
112
|
-
key_dir = os.path.dirname(private_key_path)
|
|
113
|
-
os.makedirs(key_dir, exist_ok=True, mode=0o700)
|
|
114
|
-
|
|
115
|
-
with open(
|
|
116
|
-
private_key_path,
|
|
117
|
-
'w',
|
|
118
|
-
encoding='utf-8',
|
|
119
|
-
opener=functools.partial(os.open, mode=0o600),
|
|
120
|
-
) as f:
|
|
121
|
-
f.write(private_key)
|
|
122
|
-
|
|
123
|
-
with open(public_key_path,
|
|
124
|
-
'w',
|
|
125
|
-
encoding='utf-8',
|
|
126
|
-
opener=functools.partial(os.open, mode=0o644)) as f:
|
|
127
|
-
f.write(public_key)
|
|
128
|
-
|
|
129
|
-
global_user_state.set_ssh_keys(common_utils.get_user_hash(), public_key,
|
|
130
|
-
private_key)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def get_or_generate_keys() -> Tuple[str, str]:
|
|
134
|
-
"""Returns the absolute private and public key paths."""
|
|
135
|
-
private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path()
|
|
136
|
-
private_key_path = os.path.expanduser(private_key_path)
|
|
137
|
-
public_key_path = os.path.expanduser(public_key_path)
|
|
138
|
-
lock_path = os.path.expanduser(lock_path)
|
|
139
|
-
|
|
140
|
-
lock_dir = os.path.dirname(lock_path)
|
|
141
|
-
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
142
|
-
# as the ssh configs will be written to this folder as well in
|
|
143
|
-
# backend_utils.SSHConfigHelper
|
|
144
|
-
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
145
|
-
with filelock.FileLock(lock_path, timeout=10):
|
|
146
|
-
if not os.path.exists(private_key_path):
|
|
147
|
-
ssh_public_key, ssh_private_key, exists = (
|
|
148
|
-
global_user_state.get_ssh_keys(common_utils.get_user_hash()))
|
|
149
|
-
if not exists:
|
|
150
|
-
ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
|
|
151
|
-
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
152
|
-
ssh_public_key)
|
|
153
|
-
assert os.path.exists(public_key_path), (
|
|
154
|
-
'Private key found, but associated public key '
|
|
155
|
-
f'{public_key_path} does not exist.')
|
|
156
|
-
return private_key_path, public_key_path
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
def create_ssh_key_files_from_db(private_key_path: Optional[str] = None):
|
|
160
|
-
if private_key_path is None:
|
|
161
|
-
user_hash = common_utils.get_user_hash()
|
|
162
|
-
else:
|
|
163
|
-
# Assume private key path is in the format of
|
|
164
|
-
# ~/.sky/clients/<user_hash>/ssh/sky-key
|
|
165
|
-
separated_path = os.path.normpath(private_key_path).split(os.path.sep)
|
|
166
|
-
assert separated_path[-1] == 'sky-key'
|
|
167
|
-
assert separated_path[-2] == 'ssh'
|
|
168
|
-
user_hash = separated_path[-3]
|
|
169
|
-
|
|
170
|
-
private_key_path_generated, public_key_path, lock_path = (
|
|
171
|
-
get_ssh_key_and_lock_path(user_hash))
|
|
172
|
-
assert private_key_path == os.path.expanduser(private_key_path_generated), (
|
|
173
|
-
f'Private key path {private_key_path} does not '
|
|
174
|
-
f'match the generated path {private_key_path_generated}')
|
|
175
|
-
private_key_path = os.path.expanduser(private_key_path)
|
|
176
|
-
public_key_path = os.path.expanduser(public_key_path)
|
|
177
|
-
lock_path = os.path.expanduser(lock_path)
|
|
178
|
-
|
|
179
|
-
lock_dir = os.path.dirname(lock_path)
|
|
180
|
-
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
181
|
-
# as the ssh configs will be written to this folder as well in
|
|
182
|
-
# backend_utils.SSHConfigHelper
|
|
183
|
-
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
184
|
-
with filelock.FileLock(lock_path, timeout=10):
|
|
185
|
-
if not os.path.exists(private_key_path):
|
|
186
|
-
ssh_public_key, ssh_private_key, exists = (
|
|
187
|
-
global_user_state.get_ssh_keys(user_hash))
|
|
188
|
-
if not exists:
|
|
189
|
-
raise RuntimeError(f'SSH keys not found for user {user_hash}')
|
|
190
|
-
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
191
|
-
ssh_public_key)
|
|
192
|
-
assert os.path.exists(public_key_path), (
|
|
193
|
-
'Private key found, but associated public key '
|
|
194
|
-
f'{public_key_path} does not exist.')
|
|
195
|
-
|
|
196
58
|
|
|
197
59
|
def configure_ssh_info(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
198
|
-
_, public_key_path = get_or_generate_keys()
|
|
60
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
199
61
|
with open(public_key_path, 'r', encoding='utf-8') as f:
|
|
200
62
|
public_key = f.read().strip()
|
|
201
63
|
config_str = yaml_utils.dump_yaml_str(config)
|
|
@@ -207,6 +69,24 @@ def configure_ssh_info(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
207
69
|
return config
|
|
208
70
|
|
|
209
71
|
|
|
72
|
+
def parse_gcp_project_oslogin(project):
|
|
73
|
+
"""Helper function to parse GCP project metadata."""
|
|
74
|
+
common_metadata = project.get('commonInstanceMetadata', {})
|
|
75
|
+
if not isinstance(common_metadata, dict):
|
|
76
|
+
common_metadata = {}
|
|
77
|
+
|
|
78
|
+
metadata_items = common_metadata.get('items', [])
|
|
79
|
+
if not isinstance(metadata_items, list):
|
|
80
|
+
metadata_items = []
|
|
81
|
+
|
|
82
|
+
project_oslogin = next(
|
|
83
|
+
(item for item in metadata_items
|
|
84
|
+
if isinstance(item, dict) and item.get('key') == 'enable-oslogin'),
|
|
85
|
+
{}).get('value', 'False')
|
|
86
|
+
|
|
87
|
+
return project_oslogin
|
|
88
|
+
|
|
89
|
+
|
|
210
90
|
# Snippets of code inspired from
|
|
211
91
|
# https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/gcp/config.py
|
|
212
92
|
# Takes in config, a yaml dict and outputs a postprocessed dict
|
|
@@ -215,7 +95,7 @@ def configure_ssh_info(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
215
95
|
# Retry for the GCP as sometimes there will be connection reset by peer error.
|
|
216
96
|
@common_utils.retry
|
|
217
97
|
def setup_gcp_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
218
|
-
_, public_key_path = get_or_generate_keys()
|
|
98
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
219
99
|
config = copy.deepcopy(config)
|
|
220
100
|
|
|
221
101
|
project_id = config['provider']['project_id']
|
|
@@ -264,10 +144,7 @@ def setup_gcp_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
264
144
|
'Please check your network connection.')
|
|
265
145
|
raise
|
|
266
146
|
|
|
267
|
-
project_oslogin
|
|
268
|
-
(item for item in project['commonInstanceMetadata'].get('items', [])
|
|
269
|
-
if item['key'] == 'enable-oslogin'), {}).get('value', 'False')
|
|
270
|
-
|
|
147
|
+
project_oslogin = parse_gcp_project_oslogin(project)
|
|
271
148
|
if project_oslogin.lower() == 'true':
|
|
272
149
|
logger.info(
|
|
273
150
|
f'OS Login is enabled for GCP project {project_id}. Running '
|
|
@@ -343,11 +220,11 @@ def setup_gcp_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
343
220
|
|
|
344
221
|
def setup_lambda_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
345
222
|
|
|
346
|
-
get_or_generate_keys()
|
|
223
|
+
auth_utils.get_or_generate_keys()
|
|
347
224
|
|
|
348
225
|
# Ensure ssh key is registered with Lambda Cloud
|
|
349
226
|
lambda_client = lambda_utils.LambdaCloudClient()
|
|
350
|
-
_, public_key_path = get_or_generate_keys()
|
|
227
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
351
228
|
with open(public_key_path, 'r', encoding='utf-8') as f:
|
|
352
229
|
public_key = f.read().strip()
|
|
353
230
|
prefix = f'sky-key-{common_utils.get_user_hash()}'
|
|
@@ -364,7 +241,7 @@ def setup_ibm_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
364
241
|
and updates config file.
|
|
365
242
|
keys default location: '~/.ssh/sky-key' and '~/.ssh/sky-key.pub'
|
|
366
243
|
"""
|
|
367
|
-
private_key_path, _ = get_or_generate_keys()
|
|
244
|
+
private_key_path, _ = auth_utils.get_or_generate_keys()
|
|
368
245
|
|
|
369
246
|
def _get_unique_key_name():
|
|
370
247
|
suffix_len = 10
|
|
@@ -373,7 +250,7 @@ def setup_ibm_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
373
250
|
client = ibm.client(region=config['provider']['region'])
|
|
374
251
|
resource_group_id = config['provider']['resource_group_id']
|
|
375
252
|
|
|
376
|
-
_, public_key_path = get_or_generate_keys()
|
|
253
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
377
254
|
with open(os.path.abspath(os.path.expanduser(public_key_path)),
|
|
378
255
|
'r',
|
|
379
256
|
encoding='utf-8') as file:
|
|
@@ -414,116 +291,30 @@ def setup_ibm_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
414
291
|
|
|
415
292
|
def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
416
293
|
context = kubernetes_utils.get_context_from_config(config['provider'])
|
|
417
|
-
|
|
418
|
-
# Default ssh session is established with kubectl port-forwarding with
|
|
419
|
-
# ClusterIP service.
|
|
420
|
-
nodeport_mode = kubernetes_enums.KubernetesNetworkingMode.NODEPORT
|
|
421
|
-
port_forward_mode = kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD
|
|
422
|
-
network_mode_str = skypilot_config.get_effective_region_config(
|
|
423
|
-
cloud='kubernetes',
|
|
424
|
-
region=context,
|
|
425
|
-
keys=('networking',),
|
|
426
|
-
default_value=port_forward_mode.value)
|
|
427
|
-
try:
|
|
428
|
-
network_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
|
|
429
|
-
network_mode_str)
|
|
430
|
-
except ValueError as e:
|
|
431
|
-
# Add message saying "Please check: ~/.sky/config.yaml" to the error
|
|
432
|
-
# message.
|
|
433
|
-
with ux_utils.print_exception_no_traceback():
|
|
434
|
-
raise ValueError(str(e) +
|
|
435
|
-
' Please check: ~/.sky/config.yaml.') from None
|
|
436
|
-
_, public_key_path = get_or_generate_keys()
|
|
437
|
-
|
|
438
|
-
# Add the user's public key to the SkyPilot cluster.
|
|
439
|
-
secret_name = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME
|
|
440
|
-
secret_field_name = clouds.Kubernetes().ssh_key_secret_field_name
|
|
441
294
|
namespace = kubernetes_utils.get_namespace_from_config(config['provider'])
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
config_utils.merge_k8s_configs(secret_metadata, custom_metadata)
|
|
461
|
-
|
|
462
|
-
secret = k8s.client.V1Secret(
|
|
463
|
-
metadata=k8s.client.V1ObjectMeta(**secret_metadata),
|
|
464
|
-
string_data={secret_field_name: public_key})
|
|
465
|
-
try:
|
|
466
|
-
if kubernetes_utils.check_secret_exists(secret_name, namespace,
|
|
467
|
-
context):
|
|
468
|
-
logger.debug(f'Key {secret_name} exists in the cluster, '
|
|
469
|
-
'patching it...')
|
|
470
|
-
kubernetes.core_api(context).patch_namespaced_secret(
|
|
471
|
-
secret_name, namespace, secret)
|
|
472
|
-
else:
|
|
473
|
-
logger.debug(f'Key {secret_name} does not exist in the cluster, '
|
|
474
|
-
'creating it...')
|
|
475
|
-
kubernetes.core_api(context).create_namespaced_secret(
|
|
476
|
-
namespace, secret)
|
|
477
|
-
except kubernetes.api_exception() as e:
|
|
478
|
-
if e.status == 409 and e.reason == 'AlreadyExists':
|
|
479
|
-
logger.debug(f'Key {secret_name} was created concurrently, '
|
|
480
|
-
'patching it...')
|
|
481
|
-
kubernetes.core_api(context).patch_namespaced_secret(
|
|
482
|
-
secret_name, namespace, secret)
|
|
483
|
-
else:
|
|
484
|
-
raise e
|
|
485
|
-
|
|
486
|
-
private_key_path, _ = get_or_generate_keys()
|
|
487
|
-
if network_mode == nodeport_mode:
|
|
488
|
-
ssh_jump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME
|
|
489
|
-
service_type = kubernetes_enums.KubernetesServiceType.NODEPORT
|
|
490
|
-
# Setup service for SSH jump pod. We create the SSH jump service here
|
|
491
|
-
# because we need to know the service IP address and port to set the
|
|
492
|
-
# ssh_proxy_command in the autoscaler config.
|
|
493
|
-
kubernetes_utils.setup_ssh_jump_svc(ssh_jump_name, namespace, context,
|
|
494
|
-
service_type)
|
|
495
|
-
ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
|
|
496
|
-
ssh_jump_name,
|
|
497
|
-
nodeport_mode,
|
|
498
|
-
private_key_path=private_key_path,
|
|
499
|
-
context=context,
|
|
500
|
-
namespace=namespace)
|
|
501
|
-
elif network_mode == port_forward_mode:
|
|
502
|
-
# Using `kubectl port-forward` creates a direct tunnel to the pod and
|
|
503
|
-
# does not require a ssh jump pod.
|
|
504
|
-
kubernetes_utils.check_port_forward_mode_dependencies()
|
|
505
|
-
# TODO(romilb): This can be further optimized. Instead of using the
|
|
506
|
-
# head node as a jump pod for worker nodes, we can also directly
|
|
507
|
-
# set the ssh_target to the worker node. However, that requires
|
|
508
|
-
# changes in the downstream code to return a mapping of node IPs to
|
|
509
|
-
# pod names (to be used as ssh_target) and updating the upstream
|
|
510
|
-
# SSHConfigHelper to use a different ProxyCommand for each pod.
|
|
511
|
-
# This optimization can reduce SSH time from ~0.35s to ~0.25s, tested
|
|
512
|
-
# on GKE.
|
|
513
|
-
ssh_target = config['cluster_name'] + '-head'
|
|
514
|
-
ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
|
|
515
|
-
ssh_target,
|
|
516
|
-
port_forward_mode,
|
|
517
|
-
private_key_path=private_key_path,
|
|
518
|
-
context=context,
|
|
519
|
-
namespace=namespace)
|
|
520
|
-
else:
|
|
521
|
-
# This should never happen because we check for this in from_str above.
|
|
522
|
-
raise ValueError(f'Unsupported networking mode: {network_mode_str}')
|
|
295
|
+
private_key_path, _ = auth_utils.get_or_generate_keys()
|
|
296
|
+
# Using `kubectl port-forward` creates a direct tunnel to the pod and
|
|
297
|
+
# does not require a ssh jump pod.
|
|
298
|
+
kubernetes_utils.check_port_forward_mode_dependencies()
|
|
299
|
+
# TODO(romilb): This can be further optimized. Instead of using the
|
|
300
|
+
# head node as a jump pod for worker nodes, we can also directly
|
|
301
|
+
# set the ssh_target to the worker node. However, that requires
|
|
302
|
+
# changes in the downstream code to return a mapping of node IPs to
|
|
303
|
+
# pod names (to be used as ssh_target) and updating the upstream
|
|
304
|
+
# SSHConfigHelper to use a different ProxyCommand for each pod.
|
|
305
|
+
# This optimization can reduce SSH time from ~0.35s to ~0.25s, tested
|
|
306
|
+
# on GKE.
|
|
307
|
+
pod_name = config['cluster_name'] + '-head'
|
|
308
|
+
ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
|
|
309
|
+
pod_name,
|
|
310
|
+
private_key_path=private_key_path,
|
|
311
|
+
context=context,
|
|
312
|
+
namespace=namespace)
|
|
523
313
|
config['auth']['ssh_proxy_command'] = ssh_proxy_cmd
|
|
524
314
|
config['auth']['ssh_private_key'] = private_key_path
|
|
525
315
|
|
|
526
|
-
|
|
316
|
+
# Add the user's public key to the SkyPilot cluster.
|
|
317
|
+
return configure_ssh_info(config)
|
|
527
318
|
|
|
528
319
|
|
|
529
320
|
# ---------------------------------- RunPod ---------------------------------- #
|
|
@@ -532,7 +323,7 @@ def setup_runpod_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
532
323
|
- Generates a new SSH key pair if one does not exist.
|
|
533
324
|
- Adds the public SSH key to the user's RunPod account.
|
|
534
325
|
"""
|
|
535
|
-
_, public_key_path = get_or_generate_keys()
|
|
326
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
536
327
|
with open(public_key_path, 'r', encoding='UTF-8') as pub_key_file:
|
|
537
328
|
public_key = pub_key_file.read().strip()
|
|
538
329
|
runpod.runpod.cli.groups.ssh.functions.add_ssh_key(public_key)
|
|
@@ -545,7 +336,7 @@ def setup_vast_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
545
336
|
- Generates a new SSH key pair if one does not exist.
|
|
546
337
|
- Adds the public SSH key to the user's Vast account.
|
|
547
338
|
"""
|
|
548
|
-
_, public_key_path = get_or_generate_keys()
|
|
339
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
549
340
|
with open(public_key_path, 'r', encoding='UTF-8') as pub_key_file:
|
|
550
341
|
public_key = pub_key_file.read().strip()
|
|
551
342
|
current_key_list = vast.vast().show_ssh_keys() # pylint: disable=assignment-from-no-return
|
|
@@ -559,7 +350,7 @@ def setup_vast_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
559
350
|
|
|
560
351
|
def setup_fluidstack_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
561
352
|
|
|
562
|
-
_, public_key_path = get_or_generate_keys()
|
|
353
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
563
354
|
|
|
564
355
|
client = fluidstack_utils.FluidstackClient()
|
|
565
356
|
public_key = None
|
|
@@ -572,7 +363,7 @@ def setup_fluidstack_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
572
363
|
|
|
573
364
|
def setup_hyperbolic_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
574
365
|
"""Sets up SSH authentication for Hyperbolic."""
|
|
575
|
-
_, public_key_path = get_or_generate_keys()
|
|
366
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
576
367
|
with open(public_key_path, 'r', encoding='utf-8') as f:
|
|
577
368
|
public_key = f.read().strip()
|
|
578
369
|
|
|
@@ -586,3 +377,106 @@ def setup_hyperbolic_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
586
377
|
config['auth']['ssh_public_key'] = public_key_path
|
|
587
378
|
|
|
588
379
|
return configure_ssh_info(config)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def setup_shadeform_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
383
|
+
"""Sets up SSH authentication for Shadeform.
|
|
384
|
+
- Generates a new SSH key pair if one does not exist.
|
|
385
|
+
- Adds the public SSH key to the user's Shadeform account.
|
|
386
|
+
|
|
387
|
+
Note: This assumes there is a Shadeform Python SDK available.
|
|
388
|
+
If no official SDK exists, this function would need to use direct API calls.
|
|
389
|
+
"""
|
|
390
|
+
|
|
391
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
392
|
+
ssh_key_id = None
|
|
393
|
+
|
|
394
|
+
with open(public_key_path, 'r', encoding='utf-8') as f:
|
|
395
|
+
public_key = f.read().strip()
|
|
396
|
+
|
|
397
|
+
try:
|
|
398
|
+
# Add SSH key to Shadeform using our utility functions
|
|
399
|
+
ssh_key_id = shadeform_adaptor.add_ssh_key_to_shadeform(public_key)
|
|
400
|
+
|
|
401
|
+
except ImportError as e:
|
|
402
|
+
# If required dependencies are missing
|
|
403
|
+
logger.warning(
|
|
404
|
+
f'Failed to add Shadeform SSH key due to missing dependencies: '
|
|
405
|
+
f'{e}. Manually configure SSH keys in your Shadeform account.')
|
|
406
|
+
|
|
407
|
+
except Exception as e:
|
|
408
|
+
logger.warning(f'Failed to set up Shadeform authentication: {e}')
|
|
409
|
+
raise exceptions.CloudUserIdentityError(
|
|
410
|
+
'Failed to set up SSH authentication for Shadeform. '
|
|
411
|
+
f'Please ensure your Shadeform credentials are configured: {e}'
|
|
412
|
+
) from e
|
|
413
|
+
|
|
414
|
+
if ssh_key_id is None:
|
|
415
|
+
raise Exception('Failed to add SSH key to Shadeform')
|
|
416
|
+
|
|
417
|
+
# Configure SSH info in the config
|
|
418
|
+
config['auth']['ssh_public_key'] = public_key_path
|
|
419
|
+
config['auth']['ssh_key_id'] = ssh_key_id
|
|
420
|
+
|
|
421
|
+
return configure_ssh_info(config)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def setup_primeintellect_authentication(
|
|
425
|
+
config: Dict[str, Any]) -> Dict[str, Any]:
|
|
426
|
+
"""Sets up SSH authentication for Prime Intellect.
|
|
427
|
+
- Generates a new SSH key pair if one does not exist.
|
|
428
|
+
- Adds the public SSH key to the user's Prime Intellect account.
|
|
429
|
+
"""
|
|
430
|
+
# Ensure local SSH keypair exists and fetch public key content
|
|
431
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
432
|
+
with open(public_key_path, 'r', encoding='utf-8') as f:
|
|
433
|
+
public_key = f.read().strip()
|
|
434
|
+
|
|
435
|
+
# Register the public key with Prime Intellect (no-op if already exists)
|
|
436
|
+
client = primeintellect_utils.PrimeIntellectAPIClient()
|
|
437
|
+
client.get_or_add_ssh_key(public_key)
|
|
438
|
+
|
|
439
|
+
# Set up auth section for Ray template
|
|
440
|
+
config.setdefault('auth', {})
|
|
441
|
+
# Default username for Prime Intellect images
|
|
442
|
+
config['auth']['ssh_user'] = 'ubuntu'
|
|
443
|
+
config['auth']['ssh_public_key'] = public_key_path
|
|
444
|
+
|
|
445
|
+
return configure_ssh_info(config)
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def setup_seeweb_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
449
|
+
"""Registers the public key with Seeweb and notes the remote name."""
|
|
450
|
+
# 1. local key pair
|
|
451
|
+
auth_utils.get_or_generate_keys()
|
|
452
|
+
|
|
453
|
+
# 2. public key
|
|
454
|
+
_, public_key_path = auth_utils.get_or_generate_keys()
|
|
455
|
+
with open(public_key_path, 'r', encoding='utf-8') as f:
|
|
456
|
+
public_key = f.read().strip()
|
|
457
|
+
|
|
458
|
+
# 3. Seeweb API client
|
|
459
|
+
client = seeweb_adaptor.client()
|
|
460
|
+
|
|
461
|
+
# 4. Check if key is already registered
|
|
462
|
+
prefix = f'sky-key-{common_utils.get_user_hash()}'
|
|
463
|
+
remote_name = None
|
|
464
|
+
for k in client.fetch_ssh_keys():
|
|
465
|
+
if k.key.strip() == public_key:
|
|
466
|
+
remote_name = k.label # already present
|
|
467
|
+
break
|
|
468
|
+
|
|
469
|
+
# 5. doesn't exist, choose a unique name and create it
|
|
470
|
+
if remote_name is None:
|
|
471
|
+
suffix = 1
|
|
472
|
+
remote_name = prefix
|
|
473
|
+
existing_names = {k.label for k in client.fetch_ssh_keys()}
|
|
474
|
+
while remote_name in existing_names:
|
|
475
|
+
suffix += 1
|
|
476
|
+
remote_name = f'{prefix}-{suffix}'
|
|
477
|
+
client.create_ssh_key(label=remote_name, key=public_key)
|
|
478
|
+
|
|
479
|
+
# 6. Put the remote name in cluster-config (like for Lambda)
|
|
480
|
+
config['auth']['remote_key_name'] = remote_name
|
|
481
|
+
|
|
482
|
+
return config
|
sky/backends/__init__.py
CHANGED
|
@@ -4,11 +4,12 @@ from sky.backends.backend import ResourceHandle
|
|
|
4
4
|
from sky.backends.cloud_vm_ray_backend import CloudVmRayBackend
|
|
5
5
|
from sky.backends.cloud_vm_ray_backend import CloudVmRayResourceHandle
|
|
6
6
|
from sky.backends.cloud_vm_ray_backend import LocalResourcesHandle
|
|
7
|
+
from sky.backends.cloud_vm_ray_backend import SkyletClient
|
|
7
8
|
from sky.backends.local_docker_backend import LocalDockerBackend
|
|
8
9
|
from sky.backends.local_docker_backend import LocalDockerResourceHandle
|
|
9
10
|
|
|
10
11
|
__all__ = [
|
|
11
12
|
'Backend', 'ResourceHandle', 'CloudVmRayBackend',
|
|
12
|
-
'CloudVmRayResourceHandle', '
|
|
13
|
-
'LocalDockerResourceHandle'
|
|
13
|
+
'CloudVmRayResourceHandle', 'SkyletClient', 'LocalResourcesHandle',
|
|
14
|
+
'LocalDockerBackend', 'LocalDockerResourceHandle'
|
|
14
15
|
]
|
sky/backends/backend.py
CHANGED
|
@@ -95,6 +95,12 @@ class Backend(Generic[_ResourceHandleType]):
|
|
|
95
95
|
envs_and_secrets: Dict[str, str]) -> None:
|
|
96
96
|
return self._sync_workdir(handle, workdir, envs_and_secrets)
|
|
97
97
|
|
|
98
|
+
@timeline.event
|
|
99
|
+
@usage_lib.messages.usage.update_runtime('download_file')
|
|
100
|
+
def download_file(self, handle: _ResourceHandleType, local_file_path: str,
|
|
101
|
+
remote_file_path: str) -> None:
|
|
102
|
+
return self._download_file(handle, local_file_path, remote_file_path)
|
|
103
|
+
|
|
98
104
|
@timeline.event
|
|
99
105
|
@usage_lib.messages.usage.update_runtime('sync_file_mounts')
|
|
100
106
|
def sync_file_mounts(
|
|
@@ -120,7 +126,6 @@ class Backend(Generic[_ResourceHandleType]):
|
|
|
120
126
|
def execute(self,
|
|
121
127
|
handle: _ResourceHandleType,
|
|
122
128
|
task: 'task_lib.Task',
|
|
123
|
-
detach_run: bool,
|
|
124
129
|
dryrun: bool = False) -> Optional[int]:
|
|
125
130
|
"""Execute the task on the cluster.
|
|
126
131
|
|
|
@@ -131,7 +136,7 @@ class Backend(Generic[_ResourceHandleType]):
|
|
|
131
136
|
handle.get_cluster_name())
|
|
132
137
|
usage_lib.messages.usage.update_actual_task(task)
|
|
133
138
|
with rich_utils.safe_status(ux_utils.spinner_message('Submitting job')):
|
|
134
|
-
return self._execute(handle, task,
|
|
139
|
+
return self._execute(handle, task, dryrun)
|
|
135
140
|
|
|
136
141
|
@timeline.event
|
|
137
142
|
def post_execute(self, handle: _ResourceHandleType, down: bool) -> None:
|
|
@@ -172,6 +177,10 @@ class Backend(Generic[_ResourceHandleType]):
|
|
|
172
177
|
envs_and_secrets: Dict[str, str]) -> None:
|
|
173
178
|
raise NotImplementedError
|
|
174
179
|
|
|
180
|
+
def _download_file(self, handle: _ResourceHandleType, local_file_path: str,
|
|
181
|
+
remote_file_path: str) -> None:
|
|
182
|
+
raise NotImplementedError
|
|
183
|
+
|
|
175
184
|
def _sync_file_mounts(
|
|
176
185
|
self,
|
|
177
186
|
handle: _ResourceHandleType,
|
|
@@ -187,7 +196,6 @@ class Backend(Generic[_ResourceHandleType]):
|
|
|
187
196
|
def _execute(self,
|
|
188
197
|
handle: _ResourceHandleType,
|
|
189
198
|
task: 'task_lib.Task',
|
|
190
|
-
detach_run: bool,
|
|
191
199
|
dryrun: bool = False) -> Optional[int]:
|
|
192
200
|
raise NotImplementedError
|
|
193
201
|
|