skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""SSH Node Pool management package."""
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""SSH Node Pool management core functionality."""
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
from sky.utils import yaml_utils
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SSHNodePoolManager:
|
|
12
|
+
"""Manager for SSH Node Pool configurations."""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self.config_path = Path.home() / '.sky' / 'ssh_node_pools.yaml'
|
|
16
|
+
self.keys_dir = Path.home() / '.sky' / 'ssh_keys'
|
|
17
|
+
self.keys_dir.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
def get_all_pools(self) -> Dict[str, Any]:
|
|
20
|
+
"""Read all SSH Node Pool configurations from YAML file."""
|
|
21
|
+
if not self.config_path.exists():
|
|
22
|
+
return {}
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
with open(self.config_path, 'r', encoding='utf-8') as f:
|
|
26
|
+
return yaml_utils.safe_load(f) or {}
|
|
27
|
+
except Exception as e:
|
|
28
|
+
raise RuntimeError(
|
|
29
|
+
f'Failed to read SSH Node Pool config: {e}') from e
|
|
30
|
+
|
|
31
|
+
def save_all_pools(self, pools_config: Dict[str, Any]) -> None:
|
|
32
|
+
"""Write SSH Node Pool configurations to YAML file."""
|
|
33
|
+
try:
|
|
34
|
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
with open(self.config_path, 'w', encoding='utf-8') as f:
|
|
36
|
+
yaml.dump(pools_config, f, default_flow_style=False)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise RuntimeError(
|
|
39
|
+
f'Failed to save SSH Node Pool config: {e}') from e
|
|
40
|
+
|
|
41
|
+
def update_pools(self, pools_config: Dict[str, Any]) -> None:
|
|
42
|
+
"""Update SSH Node Pool configurations."""
|
|
43
|
+
all_pools = self.get_all_pools()
|
|
44
|
+
all_pools.update(pools_config)
|
|
45
|
+
self.save_all_pools(all_pools)
|
|
46
|
+
|
|
47
|
+
def add_or_update_pool(self, pool_name: str,
|
|
48
|
+
pool_config: Dict[str, Any]) -> None:
|
|
49
|
+
"""Add or update a single SSH Node Pool configuration."""
|
|
50
|
+
# Validate pool configuration
|
|
51
|
+
self._validate_pool_config(pool_config)
|
|
52
|
+
|
|
53
|
+
all_pools = self.get_all_pools()
|
|
54
|
+
all_pools[pool_name] = pool_config
|
|
55
|
+
self.save_all_pools(all_pools)
|
|
56
|
+
|
|
57
|
+
def delete_pool(self, pool_name: str) -> bool:
|
|
58
|
+
"""Delete a SSH Node Pool configuration."""
|
|
59
|
+
all_pools = self.get_all_pools()
|
|
60
|
+
if pool_name in all_pools:
|
|
61
|
+
del all_pools[pool_name]
|
|
62
|
+
self.save_all_pools(all_pools)
|
|
63
|
+
return True
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
def save_ssh_key(self, key_name: str, key_content: str) -> str:
|
|
67
|
+
"""Save SSH private key to ~/.sky/ssh_keys/ directory."""
|
|
68
|
+
# Validate key name
|
|
69
|
+
if not key_name or '/' in key_name or key_name.startswith('.'):
|
|
70
|
+
raise ValueError('Invalid key name')
|
|
71
|
+
|
|
72
|
+
key_path = self.keys_dir / key_name
|
|
73
|
+
try:
|
|
74
|
+
with open(key_path, 'w', encoding='utf-8') as f:
|
|
75
|
+
f.write(key_content)
|
|
76
|
+
os.chmod(key_path, 0o600) # Set secure permissions
|
|
77
|
+
return str(key_path)
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise RuntimeError(f'Failed to save SSH key: {e}') from e
|
|
80
|
+
|
|
81
|
+
def list_ssh_keys(self) -> List[str]:
|
|
82
|
+
"""List available SSH key files."""
|
|
83
|
+
if not self.keys_dir.exists():
|
|
84
|
+
return []
|
|
85
|
+
try:
|
|
86
|
+
return [f.name for f in self.keys_dir.iterdir() if f.is_file()]
|
|
87
|
+
except Exception: # pylint: disable=broad-except
|
|
88
|
+
return []
|
|
89
|
+
|
|
90
|
+
def _validate_pool_config(self, config: Dict[str, Any]) -> None:
|
|
91
|
+
"""Validate SSH Node Pool configuration."""
|
|
92
|
+
if 'hosts' not in config:
|
|
93
|
+
raise ValueError('Pool configuration must include `hosts`')
|
|
94
|
+
|
|
95
|
+
if not isinstance(config['hosts'], list) or not config['hosts']:
|
|
96
|
+
raise ValueError('`hosts` must be a non-empty list')
|
|
97
|
+
|
|
98
|
+
# Validate user field
|
|
99
|
+
if not config.get('user', '').strip():
|
|
100
|
+
raise ValueError('Pool configuration must include `user`')
|
|
101
|
+
|
|
102
|
+
# Validate authentication - must have either identity_file or password
|
|
103
|
+
if not config.get('identity_file') and not config.get('password'):
|
|
104
|
+
raise ValueError('Pool configuration must include '
|
|
105
|
+
'either `identity_file` or `password`')
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_all_pools() -> Dict[str, Any]:
|
|
109
|
+
"""Get all SSH Node Pool configurations."""
|
|
110
|
+
manager = SSHNodePoolManager()
|
|
111
|
+
return manager.get_all_pools()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def update_pools(pools_config: Dict[str, Any]) -> None:
|
|
115
|
+
"""Update SSH Node Pool configurations."""
|
|
116
|
+
manager = SSHNodePoolManager()
|
|
117
|
+
manager.update_pools(pools_config)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def delete_pool(pool_name: str) -> bool:
|
|
121
|
+
"""Delete a SSH Node Pool configuration."""
|
|
122
|
+
manager = SSHNodePoolManager()
|
|
123
|
+
return manager.delete_pool(pool_name)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def upload_ssh_key(key_name: str, key_content: str) -> str:
|
|
127
|
+
"""Upload SSH private key."""
|
|
128
|
+
manager = SSHNodePoolManager()
|
|
129
|
+
return manager.save_ssh_key(key_name, key_content)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def list_ssh_keys() -> List[str]:
|
|
133
|
+
"""List available SSH keys."""
|
|
134
|
+
manager = SSHNodePoolManager()
|
|
135
|
+
return manager.list_ssh_keys()
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""SSH Node Pool management API endpoints."""
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
import fastapi
|
|
6
|
+
|
|
7
|
+
from sky import core as sky_core
|
|
8
|
+
from sky.server.requests import executor
|
|
9
|
+
from sky.server.requests import payloads
|
|
10
|
+
from sky.server.requests import request_names
|
|
11
|
+
from sky.server.requests import requests as requests_lib
|
|
12
|
+
from sky.ssh_node_pools import core as ssh_node_pools_core
|
|
13
|
+
from sky.utils import common_utils
|
|
14
|
+
|
|
15
|
+
router = fastapi.APIRouter()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@router.get('')
|
|
19
|
+
def get_ssh_node_pools() -> Dict[str, Any]:
|
|
20
|
+
"""Get all SSH Node Pool configurations."""
|
|
21
|
+
try:
|
|
22
|
+
return ssh_node_pools_core.get_all_pools()
|
|
23
|
+
except Exception as e:
|
|
24
|
+
raise fastapi.HTTPException(
|
|
25
|
+
status_code=500,
|
|
26
|
+
detail=
|
|
27
|
+
f'Failed to get SSH Node Pools: {common_utils.format_exception(e)}')
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@router.post('')
|
|
31
|
+
def update_ssh_node_pools(pools_config: Dict[str, Any]) -> Dict[str, str]:
|
|
32
|
+
"""Update SSH Node Pool configurations."""
|
|
33
|
+
try:
|
|
34
|
+
ssh_node_pools_core.update_pools(pools_config)
|
|
35
|
+
return {'status': 'success'}
|
|
36
|
+
except Exception as e:
|
|
37
|
+
raise fastapi.HTTPException(status_code=400,
|
|
38
|
+
detail=f'Failed to update SSH Node Pools:'
|
|
39
|
+
f' {common_utils.format_exception(e)}')
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@router.delete('/{pool_name}')
|
|
43
|
+
def delete_ssh_node_pool(pool_name: str) -> Dict[str, str]:
|
|
44
|
+
"""Delete a SSH Node Pool configuration."""
|
|
45
|
+
try:
|
|
46
|
+
if ssh_node_pools_core.delete_pool(pool_name):
|
|
47
|
+
return {'status': 'success'}
|
|
48
|
+
else:
|
|
49
|
+
raise fastapi.HTTPException(
|
|
50
|
+
status_code=404,
|
|
51
|
+
detail=f'SSH Node Pool `{pool_name}` not found')
|
|
52
|
+
except fastapi.HTTPException:
|
|
53
|
+
raise
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise fastapi.HTTPException(status_code=500,
|
|
56
|
+
detail='Failed to delete SSH Node Pool: '
|
|
57
|
+
f'{common_utils.format_exception(e)}')
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@router.post('/keys')
|
|
61
|
+
async def upload_ssh_key(request: fastapi.Request) -> Dict[str, str]:
|
|
62
|
+
"""Upload SSH private key."""
|
|
63
|
+
try:
|
|
64
|
+
form = await request.form()
|
|
65
|
+
key_name = form.get('key_name')
|
|
66
|
+
key_file = form.get('key_file')
|
|
67
|
+
|
|
68
|
+
if not key_name or not key_file:
|
|
69
|
+
raise fastapi.HTTPException(status_code=400,
|
|
70
|
+
detail='Missing key_name or key_file')
|
|
71
|
+
|
|
72
|
+
key_content = await key_file.read()
|
|
73
|
+
key_path = ssh_node_pools_core.upload_ssh_key(key_name,
|
|
74
|
+
key_content.decode())
|
|
75
|
+
|
|
76
|
+
return {'status': 'success', 'key_path': key_path}
|
|
77
|
+
except fastapi.HTTPException:
|
|
78
|
+
raise
|
|
79
|
+
except Exception as e:
|
|
80
|
+
raise fastapi.HTTPException(
|
|
81
|
+
status_code=500,
|
|
82
|
+
detail=
|
|
83
|
+
f'Failed to upload SSH key: {common_utils.format_exception(e)}')
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@router.get('/keys')
|
|
87
|
+
def list_ssh_keys() -> List[str]:
|
|
88
|
+
"""List available SSH keys."""
|
|
89
|
+
try:
|
|
90
|
+
return ssh_node_pools_core.list_ssh_keys()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
exception_msg = common_utils.format_exception(e)
|
|
93
|
+
raise fastapi.HTTPException(
|
|
94
|
+
status_code=500, detail=f'Failed to list SSH keys: {exception_msg}')
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@router.post('/{pool_name}/deploy')
|
|
98
|
+
async def deploy_ssh_node_pool(request: fastapi.Request,
|
|
99
|
+
pool_name: str) -> Dict[str, str]:
|
|
100
|
+
"""Deploy SSH Node Pool using existing ssh_up functionality."""
|
|
101
|
+
try:
|
|
102
|
+
ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=False)
|
|
103
|
+
await executor.schedule_request_async(
|
|
104
|
+
request_id=request.state.request_id,
|
|
105
|
+
request_name=request_names.RequestName.SSH_NODE_POOLS_UP,
|
|
106
|
+
request_body=ssh_up_body,
|
|
107
|
+
func=sky_core.ssh_up,
|
|
108
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
'status': 'success',
|
|
113
|
+
'request_id': request.state.request_id,
|
|
114
|
+
'message': f'SSH Node Pool `{pool_name}` deployment started'
|
|
115
|
+
}
|
|
116
|
+
except Exception as e:
|
|
117
|
+
raise fastapi.HTTPException(status_code=500,
|
|
118
|
+
detail=f'Failed to deploy SSH Node Pool: '
|
|
119
|
+
f'{common_utils.format_exception(e)}')
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@router.post('/deploy')
|
|
123
|
+
async def deploy_ssh_node_pool_general(
|
|
124
|
+
request: fastapi.Request,
|
|
125
|
+
ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
|
|
126
|
+
"""Deploys all SSH Node Pools."""
|
|
127
|
+
try:
|
|
128
|
+
await executor.schedule_request_async(
|
|
129
|
+
request_id=request.state.request_id,
|
|
130
|
+
request_name=request_names.RequestName.SSH_NODE_POOLS_UP,
|
|
131
|
+
request_body=ssh_up_body,
|
|
132
|
+
func=sky_core.ssh_up,
|
|
133
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
pool_name = ssh_up_body.infra or 'default'
|
|
137
|
+
return {
|
|
138
|
+
'status': 'success',
|
|
139
|
+
'request_id': request.state.request_id,
|
|
140
|
+
'message': f'SSH Node Pool `{pool_name}` deployment started'
|
|
141
|
+
}
|
|
142
|
+
except Exception as e:
|
|
143
|
+
raise fastapi.HTTPException(status_code=500,
|
|
144
|
+
detail=f'Failed to deploy SSH Node Pool: '
|
|
145
|
+
f'{common_utils.format_exception(e)}')
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@router.post('/{pool_name}/down')
|
|
149
|
+
async def down_ssh_node_pool(request: fastapi.Request,
|
|
150
|
+
pool_name: str) -> Dict[str, str]:
|
|
151
|
+
"""Cleans up a SSH Node Pools."""
|
|
152
|
+
try:
|
|
153
|
+
ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=True)
|
|
154
|
+
await executor.schedule_request_async(
|
|
155
|
+
request_id=request.state.request_id,
|
|
156
|
+
request_name=request_names.RequestName.SSH_NODE_POOLS_DOWN,
|
|
157
|
+
request_body=ssh_up_body,
|
|
158
|
+
func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
|
|
159
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
'status': 'success',
|
|
164
|
+
'request_id': request.state.request_id,
|
|
165
|
+
'message': f'SSH Node Pool `{pool_name}` teardown started'
|
|
166
|
+
}
|
|
167
|
+
except Exception as e:
|
|
168
|
+
raise fastapi.HTTPException(
|
|
169
|
+
status_code=500,
|
|
170
|
+
detail=f'Failed to tear down SSH Node Pool: '
|
|
171
|
+
f'{common_utils.format_exception(e)}')
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@router.post('/down')
|
|
175
|
+
async def down_ssh_node_pool_general(
|
|
176
|
+
request: fastapi.Request,
|
|
177
|
+
ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
|
|
178
|
+
"""Cleans up all SSH Node Pools."""
|
|
179
|
+
try:
|
|
180
|
+
# Set cleanup=True for down operation
|
|
181
|
+
ssh_up_body.cleanup = True
|
|
182
|
+
await executor.schedule_request_async(
|
|
183
|
+
request_id=request.state.request_id,
|
|
184
|
+
request_name=request_names.RequestName.SSH_NODE_POOLS_DOWN,
|
|
185
|
+
request_body=ssh_up_body,
|
|
186
|
+
func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
|
|
187
|
+
schedule_type=requests_lib.ScheduleType.LONG,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
pool_name = ssh_up_body.infra or 'default'
|
|
191
|
+
return {
|
|
192
|
+
'status': 'success',
|
|
193
|
+
'request_id': request.state.request_id,
|
|
194
|
+
'message': f'SSH Node Pool `{pool_name}` teardown started'
|
|
195
|
+
}
|
|
196
|
+
except Exception as e:
|
|
197
|
+
raise fastapi.HTTPException(
|
|
198
|
+
status_code=500,
|
|
199
|
+
detail=f'Failed to tear down SSH Node Pool: '
|
|
200
|
+
f'{common_utils.format_exception(e)}')
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@router.get('/{pool_name}/status')
|
|
204
|
+
def get_ssh_node_pool_status(pool_name: str) -> Dict[str, str]:
|
|
205
|
+
"""Get the status of a specific SSH Node Pool."""
|
|
206
|
+
try:
|
|
207
|
+
# Call ssh_status to check the context
|
|
208
|
+
context_name = f'ssh-{pool_name}'
|
|
209
|
+
is_ready, reason = sky_core.ssh_status(context_name)
|
|
210
|
+
|
|
211
|
+
# Strip ANSI escape codes from the reason text
|
|
212
|
+
def strip_ansi_codes(text):
|
|
213
|
+
if not text:
|
|
214
|
+
return text
|
|
215
|
+
# Remove ANSI escape sequences (color codes, formatting, etc.)
|
|
216
|
+
text = re.sub(r'\x1b\[[0-9;]*m', '', text)
|
|
217
|
+
# Remove 'disabled. Reason: ' prefix if present
|
|
218
|
+
text = text.replace('disabled. Reason: ', '')
|
|
219
|
+
return text
|
|
220
|
+
|
|
221
|
+
cleaned_reason = strip_ansi_codes(reason) if reason else reason
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
'pool_name': pool_name,
|
|
225
|
+
'context_name': context_name,
|
|
226
|
+
'status': 'Ready' if is_ready else 'Not Ready',
|
|
227
|
+
'reason': cleaned_reason
|
|
228
|
+
}
|
|
229
|
+
except Exception as e:
|
|
230
|
+
raise fastapi.HTTPException(
|
|
231
|
+
status_code=500,
|
|
232
|
+
detail=f'Failed to get SSH Node Pool status: '
|
|
233
|
+
f'{common_utils.format_exception(e)}')
|