skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/auth_utils.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Utils for managing SkyPilot SSH key pairs."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import os
|
|
5
|
+
from typing import Tuple
|
|
6
|
+
|
|
7
|
+
import filelock
|
|
8
|
+
|
|
9
|
+
from sky import global_user_state
|
|
10
|
+
from sky import sky_logging
|
|
11
|
+
from sky.utils import common_utils
|
|
12
|
+
|
|
13
|
+
logger = sky_logging.init_logger(__name__)
|
|
14
|
+
|
|
15
|
+
MAX_TRIALS = 64
|
|
16
|
+
# TODO(zhwu): Support user specified key pair.
|
|
17
|
+
# We intentionally not have the ssh key pair to be stored in
|
|
18
|
+
# ~/.sky/api_server/clients, i.e. sky.server.common.API_SERVER_CLIENT_DIR,
|
|
19
|
+
# because ssh key pair need to persist across API server restarts, while
|
|
20
|
+
# the former dir is ephemeral.
|
|
21
|
+
_SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_ssh_key_and_lock_path(user_hash: str) -> Tuple[str, str, str]:
|
|
25
|
+
user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
|
|
26
|
+
|
|
27
|
+
os.makedirs(os.path.expanduser(user_ssh_key_prefix),
|
|
28
|
+
exist_ok=True,
|
|
29
|
+
mode=0o700)
|
|
30
|
+
private_key_path = os.path.join(user_ssh_key_prefix, 'sky-key')
|
|
31
|
+
public_key_path = os.path.join(user_ssh_key_prefix, 'sky-key.pub')
|
|
32
|
+
lock_path = os.path.join(user_ssh_key_prefix, '.__internal-sky-key.lock')
|
|
33
|
+
return private_key_path, public_key_path, lock_path
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _generate_rsa_key_pair() -> Tuple[str, str]:
|
|
37
|
+
# Keep the import of the cryptography local to avoid expensive
|
|
38
|
+
# third-party imports when not needed.
|
|
39
|
+
# pylint: disable=import-outside-toplevel
|
|
40
|
+
from cryptography.hazmat.backends import default_backend
|
|
41
|
+
from cryptography.hazmat.primitives import serialization
|
|
42
|
+
from cryptography.hazmat.primitives.asymmetric import rsa
|
|
43
|
+
|
|
44
|
+
key = rsa.generate_private_key(backend=default_backend(),
|
|
45
|
+
public_exponent=65537,
|
|
46
|
+
key_size=2048)
|
|
47
|
+
|
|
48
|
+
private_key = key.private_bytes(
|
|
49
|
+
encoding=serialization.Encoding.PEM,
|
|
50
|
+
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
|
51
|
+
encryption_algorithm=serialization.NoEncryption()).decode(
|
|
52
|
+
'utf-8').strip()
|
|
53
|
+
|
|
54
|
+
public_key = key.public_key().public_bytes(
|
|
55
|
+
serialization.Encoding.OpenSSH,
|
|
56
|
+
serialization.PublicFormat.OpenSSH).decode('utf-8').strip()
|
|
57
|
+
|
|
58
|
+
return public_key, private_key
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _save_key_pair(private_key_path: str, public_key_path: str,
|
|
62
|
+
private_key: str, public_key: str) -> None:
|
|
63
|
+
key_dir = os.path.dirname(private_key_path)
|
|
64
|
+
os.makedirs(key_dir, exist_ok=True, mode=0o700)
|
|
65
|
+
|
|
66
|
+
with open(
|
|
67
|
+
private_key_path,
|
|
68
|
+
'w',
|
|
69
|
+
encoding='utf-8',
|
|
70
|
+
opener=functools.partial(os.open, mode=0o600),
|
|
71
|
+
) as f:
|
|
72
|
+
f.write(private_key)
|
|
73
|
+
|
|
74
|
+
with open(public_key_path,
|
|
75
|
+
'w',
|
|
76
|
+
encoding='utf-8',
|
|
77
|
+
opener=functools.partial(os.open, mode=0o644)) as f:
|
|
78
|
+
f.write(public_key)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_or_generate_keys() -> Tuple[str, str]:
|
|
82
|
+
"""Returns the absolute private and public key paths."""
|
|
83
|
+
user_hash = common_utils.get_user_hash()
|
|
84
|
+
private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path(
|
|
85
|
+
user_hash)
|
|
86
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
87
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
88
|
+
lock_path = os.path.expanduser(lock_path)
|
|
89
|
+
|
|
90
|
+
lock_dir = os.path.dirname(lock_path)
|
|
91
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
92
|
+
# as the ssh configs will be written to this folder as well in
|
|
93
|
+
# backend_utils.SSHConfigHelper
|
|
94
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
95
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
96
|
+
if not os.path.exists(private_key_path):
|
|
97
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
98
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
99
|
+
if not exists:
|
|
100
|
+
ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
|
|
101
|
+
global_user_state.set_ssh_keys(user_hash, ssh_public_key,
|
|
102
|
+
ssh_private_key)
|
|
103
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
104
|
+
ssh_public_key)
|
|
105
|
+
assert os.path.exists(public_key_path), (
|
|
106
|
+
'Private key found, but associated public key '
|
|
107
|
+
f'{public_key_path} does not exist.')
|
|
108
|
+
return private_key_path, public_key_path
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def create_ssh_key_files_from_db(private_key_path: str) -> bool:
|
|
112
|
+
"""Creates the ssh key files from the database.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
True if the ssh key files are created successfully, False otherwise.
|
|
116
|
+
"""
|
|
117
|
+
# Assume private key path is in the format of
|
|
118
|
+
# ~/.sky/clients/<user_hash>/ssh/sky-key
|
|
119
|
+
separated_path = os.path.normpath(private_key_path).split(os.path.sep)
|
|
120
|
+
assert separated_path[-1] == 'sky-key'
|
|
121
|
+
assert separated_path[-2] == 'ssh'
|
|
122
|
+
user_hash = separated_path[-3]
|
|
123
|
+
|
|
124
|
+
private_key_path_generated, public_key_path, lock_path = (
|
|
125
|
+
get_ssh_key_and_lock_path(user_hash))
|
|
126
|
+
assert private_key_path == os.path.expanduser(private_key_path_generated), (
|
|
127
|
+
f'Private key path {private_key_path} does not '
|
|
128
|
+
'match the generated path '
|
|
129
|
+
f'{os.path.expanduser(private_key_path_generated)}')
|
|
130
|
+
private_key_path = os.path.expanduser(private_key_path)
|
|
131
|
+
public_key_path = os.path.expanduser(public_key_path)
|
|
132
|
+
lock_path = os.path.expanduser(lock_path)
|
|
133
|
+
lock_dir = os.path.dirname(lock_path)
|
|
134
|
+
|
|
135
|
+
if os.path.exists(private_key_path) and os.path.exists(public_key_path):
|
|
136
|
+
return True
|
|
137
|
+
# We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
|
|
138
|
+
# as the ssh configs will be written to this folder as well in
|
|
139
|
+
# backend_utils.SSHConfigHelper
|
|
140
|
+
os.makedirs(lock_dir, exist_ok=True, mode=0o700)
|
|
141
|
+
with filelock.FileLock(lock_path, timeout=10):
|
|
142
|
+
if not os.path.exists(private_key_path):
|
|
143
|
+
ssh_public_key, ssh_private_key, exists = (
|
|
144
|
+
global_user_state.get_ssh_keys(user_hash))
|
|
145
|
+
if not exists:
|
|
146
|
+
logger.debug(f'SSH keys not found for user {user_hash}')
|
|
147
|
+
return False
|
|
148
|
+
_save_key_pair(private_key_path, public_key_path, ssh_private_key,
|
|
149
|
+
ssh_public_key)
|
|
150
|
+
assert os.path.exists(public_key_path), (
|
|
151
|
+
'Private key found, but associated public key '
|
|
152
|
+
f'{public_key_path} does not exist.')
|
|
153
|
+
return True
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Utility functions for benchmarking."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
from sky import sky_logging
|
|
9
|
+
|
|
10
|
+
logger = sky_logging.init_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def log_execution_time(func: Optional[Callable] = None,
|
|
14
|
+
*,
|
|
15
|
+
name: Optional[str] = None,
|
|
16
|
+
level: int = logging.DEBUG,
|
|
17
|
+
precision: int = 4) -> Callable:
|
|
18
|
+
"""Mark a function and log its execution time.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
func: Function to decorate.
|
|
22
|
+
name: Name of the function.
|
|
23
|
+
level: Logging level.
|
|
24
|
+
precision: Number of decimal places (default: 4).
|
|
25
|
+
|
|
26
|
+
Usage:
|
|
27
|
+
from sky.utils import benchmark_utils
|
|
28
|
+
|
|
29
|
+
@benchmark_utils.log_execution_time
|
|
30
|
+
def my_function():
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@benchmark_utils.log_execution_time(name='my_module.my_function2')
|
|
34
|
+
def my_function2():
|
|
35
|
+
pass
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def decorator(f: Callable) -> Callable:
|
|
39
|
+
|
|
40
|
+
@functools.wraps(f)
|
|
41
|
+
def wrapper(*args, **kwargs):
|
|
42
|
+
nonlocal name
|
|
43
|
+
name = name or f.__name__
|
|
44
|
+
start_time = time.perf_counter()
|
|
45
|
+
try:
|
|
46
|
+
result = f(*args, **kwargs)
|
|
47
|
+
return result
|
|
48
|
+
finally:
|
|
49
|
+
end_time = time.perf_counter()
|
|
50
|
+
execution_time = end_time - start_time
|
|
51
|
+
log = (f'Method {name} executed in '
|
|
52
|
+
f'{execution_time:.{precision}f}')
|
|
53
|
+
logger.log(level, log)
|
|
54
|
+
|
|
55
|
+
return wrapper
|
|
56
|
+
|
|
57
|
+
if func is None:
|
|
58
|
+
return decorator
|
|
59
|
+
else:
|
|
60
|
+
return decorator(func)
|
|
@@ -6,11 +6,12 @@ import click
|
|
|
6
6
|
import colorama
|
|
7
7
|
|
|
8
8
|
from sky import backends
|
|
9
|
+
from sky.schemas.api import responses
|
|
9
10
|
from sky.utils import common_utils
|
|
10
|
-
from sky.utils import controller_utils
|
|
11
11
|
from sky.utils import log_utils
|
|
12
12
|
from sky.utils import resources_utils
|
|
13
13
|
from sky.utils import status_lib
|
|
14
|
+
from sky.utils import ux_utils
|
|
14
15
|
|
|
15
16
|
if typing.TYPE_CHECKING:
|
|
16
17
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
|
@@ -33,24 +34,23 @@ class StatusColumn:
|
|
|
33
34
|
def __init__(self,
|
|
34
35
|
name: str,
|
|
35
36
|
calc_func: Callable,
|
|
36
|
-
|
|
37
|
+
truncate: bool = True,
|
|
37
38
|
show_by_default: bool = True):
|
|
38
39
|
self.name = name
|
|
39
40
|
self.calc_func = calc_func
|
|
40
|
-
self.
|
|
41
|
+
self.truncate: bool = truncate
|
|
41
42
|
self.show_by_default = show_by_default
|
|
42
43
|
|
|
43
44
|
def calc(self, record):
|
|
44
|
-
val = self.calc_func(record)
|
|
45
|
-
if self.trunc_length != 0:
|
|
46
|
-
val = common_utils.truncate_long_string(str(val), self.trunc_length)
|
|
45
|
+
val = self.calc_func(record, self.truncate)
|
|
47
46
|
return val
|
|
48
47
|
|
|
49
48
|
|
|
50
|
-
def show_status_table(cluster_records: List[
|
|
49
|
+
def show_status_table(cluster_records: List[responses.StatusResponse],
|
|
51
50
|
show_all: bool,
|
|
52
51
|
show_user: bool,
|
|
53
|
-
query_clusters: Optional[List[str]] = None
|
|
52
|
+
query_clusters: Optional[List[str]] = None,
|
|
53
|
+
show_workspaces: bool = False) -> int:
|
|
54
54
|
"""Compute cluster table values and display.
|
|
55
55
|
|
|
56
56
|
Returns:
|
|
@@ -58,7 +58,6 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
|
58
58
|
STOPPED.
|
|
59
59
|
"""
|
|
60
60
|
# TODO(zhwu): Update the information for autostop clusters.
|
|
61
|
-
|
|
62
61
|
status_columns = [
|
|
63
62
|
StatusColumn('NAME', _get_name),
|
|
64
63
|
]
|
|
@@ -68,19 +67,24 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
|
68
67
|
StatusColumn('USER_ID', _get_user_hash, show_by_default=False))
|
|
69
68
|
|
|
70
69
|
status_columns += [
|
|
71
|
-
StatusColumn('
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
StatusColumn('
|
|
76
|
-
StatusColumn('ZONE', _get_zone, show_by_default=False),
|
|
70
|
+
StatusColumn('WORKSPACE',
|
|
71
|
+
_get_workspace,
|
|
72
|
+
show_by_default=show_workspaces),
|
|
73
|
+
StatusColumn('INFRA', _get_infra, truncate=not show_all),
|
|
74
|
+
StatusColumn('RESOURCES', _get_resources, truncate=not show_all),
|
|
77
75
|
StatusColumn('STATUS', _get_status_colored),
|
|
78
76
|
StatusColumn('AUTOSTOP', _get_autostop),
|
|
79
|
-
StatusColumn('
|
|
80
|
-
StatusColumn('COMMAND',
|
|
81
|
-
_get_command,
|
|
82
|
-
trunc_length=COMMAND_TRUNC_LENGTH if not show_all else 0),
|
|
77
|
+
StatusColumn('LAUNCHED', _get_launched),
|
|
83
78
|
]
|
|
79
|
+
if show_all:
|
|
80
|
+
status_columns += [
|
|
81
|
+
StatusColumn('HEAD_IP', _get_head_ip, show_by_default=False),
|
|
82
|
+
StatusColumn('COMMAND',
|
|
83
|
+
_get_command,
|
|
84
|
+
truncate=not show_all,
|
|
85
|
+
show_by_default=False),
|
|
86
|
+
StatusColumn('LAST_EVENT', _get_last_event, show_by_default=False),
|
|
87
|
+
]
|
|
84
88
|
|
|
85
89
|
columns = []
|
|
86
90
|
for status_column in status_columns:
|
|
@@ -102,17 +106,16 @@ def show_status_table(cluster_records: List[_ClusterRecord],
|
|
|
102
106
|
|
|
103
107
|
if query_clusters:
|
|
104
108
|
cluster_names = {record['name'] for record in cluster_records}
|
|
105
|
-
not_found_clusters =
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
cluster_str += '
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
click.echo(f'{cluster_str} not found.')
|
|
109
|
+
not_found_clusters = ux_utils.get_non_matched_query(
|
|
110
|
+
query_clusters, cluster_names)
|
|
111
|
+
not_found_clusters = [repr(cluster) for cluster in not_found_clusters]
|
|
112
|
+
if not_found_clusters:
|
|
113
|
+
cluster_str = 'Cluster'
|
|
114
|
+
if len(not_found_clusters) > 1:
|
|
115
|
+
cluster_str += 's'
|
|
116
|
+
cluster_str += ' '
|
|
117
|
+
cluster_str += ', '.join(not_found_clusters)
|
|
118
|
+
click.echo(f'{cluster_str} not found.')
|
|
116
119
|
elif not cluster_records:
|
|
117
120
|
click.echo('No existing clusters.')
|
|
118
121
|
return num_pending_autostop
|
|
@@ -134,7 +137,8 @@ def get_total_cost_of_displayed_records(
|
|
|
134
137
|
|
|
135
138
|
def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
136
139
|
show_all: bool,
|
|
137
|
-
controller_name: Optional[str] = None
|
|
140
|
+
controller_name: Optional[str] = None,
|
|
141
|
+
days: Optional[int] = None):
|
|
138
142
|
"""Compute cluster table values and display for cost report.
|
|
139
143
|
|
|
140
144
|
For each cluster, this shows: cluster name, resources, launched time,
|
|
@@ -160,10 +164,10 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
|
160
164
|
status_columns = [
|
|
161
165
|
StatusColumn('NAME', _get_name),
|
|
162
166
|
StatusColumn('LAUNCHED', _get_launched),
|
|
163
|
-
StatusColumn('DURATION', _get_duration,
|
|
167
|
+
StatusColumn('DURATION', _get_duration, truncate=False),
|
|
164
168
|
StatusColumn('RESOURCES',
|
|
165
169
|
_get_resources_for_cost_report,
|
|
166
|
-
|
|
170
|
+
truncate=False),
|
|
167
171
|
StatusColumn('STATUS',
|
|
168
172
|
_get_status_for_cost_report,
|
|
169
173
|
show_by_default=True),
|
|
@@ -197,22 +201,21 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
|
197
201
|
cluster_table.add_row(row)
|
|
198
202
|
|
|
199
203
|
if cluster_records:
|
|
204
|
+
controller_record = cluster_records[0]
|
|
200
205
|
if controller_name is not None:
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
autostop_minutes, _ = (
|
|
205
|
-
controller_utils.get_controller_autostop_config(
|
|
206
|
-
controller=controller))
|
|
207
|
-
if autostop_minutes is not None:
|
|
206
|
+
autostop = controller_record.get('autostop', None)
|
|
207
|
+
autostop_str = ''
|
|
208
|
+
if autostop is not None:
|
|
208
209
|
autostop_str = (f'{colorama.Style.DIM} (will be autostopped if '
|
|
209
|
-
f'idle for {
|
|
210
|
+
f'idle for {autostop}min)'
|
|
210
211
|
f'{colorama.Style.RESET_ALL}')
|
|
211
212
|
click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
|
212
213
|
f'{controller_name}{colorama.Style.RESET_ALL}'
|
|
213
214
|
f'{autostop_str}')
|
|
214
215
|
else:
|
|
215
|
-
|
|
216
|
+
days_str = '' if days is None else f' (last {days} days)'
|
|
217
|
+
click.echo(f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
|
|
218
|
+
f'Clusters{days_str}'
|
|
216
219
|
f'{colorama.Style.RESET_ALL}')
|
|
217
220
|
click.echo(cluster_table)
|
|
218
221
|
|
|
@@ -220,47 +223,80 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
|
|
|
220
223
|
# Some of these lambdas are invoked on both _ClusterRecord and
|
|
221
224
|
# _ClusterCostReportRecord, which is okay as we guarantee the queried fields
|
|
222
225
|
# exist in those cases.
|
|
223
|
-
_get_name = (lambda cluster_record: cluster_record['name'])
|
|
224
|
-
_get_user_hash = (lambda cluster_record: cluster_record['user_hash'])
|
|
225
|
-
_get_user_name = (
|
|
226
|
-
|
|
226
|
+
_get_name = (lambda cluster_record, _: cluster_record['name'])
|
|
227
|
+
_get_user_hash = (lambda cluster_record, _: cluster_record['user_hash'])
|
|
228
|
+
_get_user_name = (
|
|
229
|
+
lambda cluster_record, _: cluster_record.get('user_name', '-'))
|
|
230
|
+
_get_launched = (lambda cluster_record, _: log_utils.readable_time_duration(
|
|
227
231
|
cluster_record['launched_at']))
|
|
228
|
-
|
|
229
|
-
lambda clusters_status: clusters_status['handle'].launched_resources.region)
|
|
230
|
-
_get_command = (lambda cluster_record: cluster_record['last_use'])
|
|
231
|
-
_get_duration = (lambda cluster_record: log_utils.readable_time_duration(
|
|
232
|
+
_get_duration = (lambda cluster_record, _: log_utils.readable_time_duration(
|
|
232
233
|
0, cluster_record['duration'], absolute=True))
|
|
233
234
|
|
|
234
235
|
|
|
235
|
-
def
|
|
236
|
-
|
|
236
|
+
def _get_command(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
|
237
|
+
command = cluster_record.get('last_use', '-')
|
|
238
|
+
if truncate:
|
|
239
|
+
return common_utils.truncate_long_string(command, COMMAND_TRUNC_LENGTH)
|
|
240
|
+
return command
|
|
237
241
|
|
|
238
242
|
|
|
239
|
-
def
|
|
240
|
-
|
|
243
|
+
def _get_status(cluster_record: _ClusterRecord,
|
|
244
|
+
truncate: bool = True) -> status_lib.ClusterStatus:
|
|
245
|
+
del truncate
|
|
246
|
+
return cluster_record['status']
|
|
241
247
|
|
|
242
248
|
|
|
243
|
-
def
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
if isinstance(handle, backends.LocalDockerResourceHandle):
|
|
248
|
-
resources_str = 'docker'
|
|
249
|
-
elif isinstance(handle, backends.CloudVmRayResourceHandle):
|
|
250
|
-
resources_str = resources_utils.get_readable_resources_repr(handle)
|
|
251
|
-
else:
|
|
252
|
-
raise ValueError(f'Unknown handle type {type(handle)} encountered.')
|
|
253
|
-
return resources_str
|
|
249
|
+
def _get_workspace(cluster_record: _ClusterRecord,
|
|
250
|
+
truncate: bool = True) -> str:
|
|
251
|
+
del truncate
|
|
252
|
+
return cluster_record['workspace']
|
|
254
253
|
|
|
255
254
|
|
|
256
|
-
def
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
return zone_str
|
|
255
|
+
def _get_status_colored(cluster_record: _ClusterRecord,
|
|
256
|
+
truncate: bool = True) -> str:
|
|
257
|
+
del truncate
|
|
258
|
+
return _get_status(cluster_record).colored_str()
|
|
261
259
|
|
|
262
260
|
|
|
263
|
-
def
|
|
261
|
+
def _get_resources(cluster_record: _ClusterRecord,
|
|
262
|
+
truncate: bool = True) -> str:
|
|
263
|
+
"""Get the resources information for a cluster.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
A string in one of the following formats:
|
|
267
|
+
- For cloud VMs: "Nx instance_type" (e.g., "1x m6i.2xlarge")
|
|
268
|
+
- For K8S/SSH: "Nx (...)"
|
|
269
|
+
- "-" if no resource information is available
|
|
270
|
+
"""
|
|
271
|
+
handle = cluster_record['handle']
|
|
272
|
+
if isinstance(handle, backends.CloudVmRayResourceHandle):
|
|
273
|
+
launched_resources = handle.launched_resources
|
|
274
|
+
if launched_resources is None:
|
|
275
|
+
return '-'
|
|
276
|
+
|
|
277
|
+
# For cloud VMs, show instance type directly
|
|
278
|
+
# For K8S/SSH, show (...) as the resource type
|
|
279
|
+
resources_str = cluster_record.get('resources_str', None)
|
|
280
|
+
if not truncate:
|
|
281
|
+
resources_str_full = cluster_record.get('resources_str_full', None)
|
|
282
|
+
if resources_str_full is not None:
|
|
283
|
+
resources_str = resources_str_full
|
|
284
|
+
if resources_str is None:
|
|
285
|
+
resources_str_simple, resources_str_full = (
|
|
286
|
+
resources_utils.get_readable_resources_repr(
|
|
287
|
+
handle, simplified_only=truncate))
|
|
288
|
+
if truncate:
|
|
289
|
+
resources_str = resources_str_simple
|
|
290
|
+
else:
|
|
291
|
+
assert resources_str_full is not None
|
|
292
|
+
resources_str = resources_str_full
|
|
293
|
+
|
|
294
|
+
return resources_str
|
|
295
|
+
return '-'
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _get_autostop(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
|
299
|
+
del truncate
|
|
264
300
|
autostop_str = ''
|
|
265
301
|
separation = ''
|
|
266
302
|
if cluster_record['autostop'] >= 0:
|
|
@@ -275,7 +311,8 @@ def _get_autostop(cluster_record: _ClusterRecord) -> str:
|
|
|
275
311
|
return autostop_str
|
|
276
312
|
|
|
277
313
|
|
|
278
|
-
def _get_head_ip(cluster_record: _ClusterRecord) -> str:
|
|
314
|
+
def _get_head_ip(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
|
315
|
+
del truncate # Unused
|
|
279
316
|
handle = cluster_record['handle']
|
|
280
317
|
if not isinstance(handle, backends.CloudVmRayResourceHandle):
|
|
281
318
|
return '-'
|
|
@@ -284,17 +321,46 @@ def _get_head_ip(cluster_record: _ClusterRecord) -> str:
|
|
|
284
321
|
return handle.head_ip
|
|
285
322
|
|
|
286
323
|
|
|
324
|
+
def _get_last_event(cluster_record: _ClusterRecord,
|
|
325
|
+
truncate: bool = True) -> str:
|
|
326
|
+
del truncate
|
|
327
|
+
if cluster_record.get('last_event', None) is None:
|
|
328
|
+
return 'No recorded events.'
|
|
329
|
+
return cluster_record['last_event']
|
|
330
|
+
|
|
331
|
+
|
|
287
332
|
def _is_pending_autostop(cluster_record: _ClusterRecord) -> bool:
|
|
288
333
|
# autostop < 0 means nothing scheduled.
|
|
289
334
|
return cluster_record['autostop'] >= 0 and _get_status(
|
|
290
335
|
cluster_record) != status_lib.ClusterStatus.STOPPED
|
|
291
336
|
|
|
292
337
|
|
|
338
|
+
def _get_infra(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
|
|
339
|
+
"""Get the infrastructure information for a cluster.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
A string in one of the following formats:
|
|
343
|
+
- AWS/region (e.g., "AWS/us-east-1")
|
|
344
|
+
- K8S/context (e.g., "K8S/my-ctx")
|
|
345
|
+
- SSH/hostname (e.g., "SSH/my-tobi-box")
|
|
346
|
+
- "-" if no infrastructure information is available
|
|
347
|
+
"""
|
|
348
|
+
handle = cluster_record['handle']
|
|
349
|
+
if isinstance(handle, backends.CloudVmRayResourceHandle):
|
|
350
|
+
if handle.launched_resources is None:
|
|
351
|
+
# If launched_resources is None, try to get infra from the record
|
|
352
|
+
return cluster_record.get('infra', '-')
|
|
353
|
+
return handle.launched_resources.infra.formatted_str(truncate)
|
|
354
|
+
return '-'
|
|
355
|
+
|
|
356
|
+
|
|
293
357
|
# ---- 'sky cost-report' helper functions below ----
|
|
294
358
|
|
|
295
359
|
|
|
296
360
|
def _get_status_value_for_cost_report(
|
|
297
|
-
cluster_cost_report_record: _ClusterCostReportRecord
|
|
361
|
+
cluster_cost_report_record: _ClusterCostReportRecord,
|
|
362
|
+
truncate: bool = True) -> int:
|
|
363
|
+
del truncate
|
|
298
364
|
status = cluster_cost_report_record['status']
|
|
299
365
|
if status is None:
|
|
300
366
|
return -1
|
|
@@ -302,7 +368,9 @@ def _get_status_value_for_cost_report(
|
|
|
302
368
|
|
|
303
369
|
|
|
304
370
|
def _get_status_for_cost_report(
|
|
305
|
-
cluster_cost_report_record: _ClusterCostReportRecord
|
|
371
|
+
cluster_cost_report_record: _ClusterCostReportRecord,
|
|
372
|
+
truncate: bool = True) -> str:
|
|
373
|
+
del truncate
|
|
306
374
|
status = cluster_cost_report_record['status']
|
|
307
375
|
if status is None:
|
|
308
376
|
return f'{colorama.Style.DIM}TERMINATED{colorama.Style.RESET_ALL}'
|
|
@@ -310,7 +378,9 @@ def _get_status_for_cost_report(
|
|
|
310
378
|
|
|
311
379
|
|
|
312
380
|
def _get_resources_for_cost_report(
|
|
313
|
-
cluster_cost_report_record: _ClusterCostReportRecord
|
|
381
|
+
cluster_cost_report_record: _ClusterCostReportRecord,
|
|
382
|
+
truncate: bool = True) -> str:
|
|
383
|
+
del truncate
|
|
314
384
|
launched_nodes = cluster_cost_report_record['num_nodes']
|
|
315
385
|
launched_resources = cluster_cost_report_record['resources']
|
|
316
386
|
|
|
@@ -322,7 +392,9 @@ def _get_resources_for_cost_report(
|
|
|
322
392
|
|
|
323
393
|
|
|
324
394
|
def _get_price_for_cost_report(
|
|
325
|
-
cluster_cost_report_record: _ClusterCostReportRecord
|
|
395
|
+
cluster_cost_report_record: _ClusterCostReportRecord,
|
|
396
|
+
truncate: bool = True) -> str:
|
|
397
|
+
del truncate
|
|
326
398
|
launched_nodes = cluster_cost_report_record['num_nodes']
|
|
327
399
|
launched_resources = cluster_cost_report_record['resources']
|
|
328
400
|
|
|
@@ -332,7 +404,9 @@ def _get_price_for_cost_report(
|
|
|
332
404
|
|
|
333
405
|
|
|
334
406
|
def _get_estimated_cost_for_cost_report(
|
|
335
|
-
cluster_cost_report_record: _ClusterCostReportRecord
|
|
407
|
+
cluster_cost_report_record: _ClusterCostReportRecord,
|
|
408
|
+
truncate: bool = True) -> str:
|
|
409
|
+
del truncate
|
|
336
410
|
cost = cluster_cost_report_record['total_cost']
|
|
337
411
|
|
|
338
412
|
if not cost:
|
|
@@ -342,18 +416,17 @@ def _get_estimated_cost_for_cost_report(
|
|
|
342
416
|
|
|
343
417
|
|
|
344
418
|
def show_kubernetes_cluster_status_table(
|
|
345
|
-
clusters: List['kubernetes_utils.
|
|
419
|
+
clusters: List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
|
|
346
420
|
show_all: bool) -> None:
|
|
347
421
|
"""Compute cluster table values and display for Kubernetes clusters."""
|
|
348
422
|
status_columns = [
|
|
349
|
-
StatusColumn('USER', lambda c: c.user),
|
|
350
|
-
StatusColumn('NAME', lambda c: c.cluster_name),
|
|
351
|
-
StatusColumn('
|
|
352
|
-
|
|
353
|
-
StatusColumn(
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
StatusColumn('STATUS', lambda c: c.status.colored_str()),
|
|
423
|
+
StatusColumn('USER', lambda c, _: c.user),
|
|
424
|
+
StatusColumn('NAME', lambda c, _: c.cluster_name),
|
|
425
|
+
StatusColumn('RESOURCES', lambda c, _: c.resources_str, truncate=False),
|
|
426
|
+
StatusColumn('STATUS', lambda c, _: c.status.colored_str()),
|
|
427
|
+
StatusColumn(
|
|
428
|
+
'LAUNCHED',
|
|
429
|
+
lambda c, _: log_utils.readable_time_duration(c.launched_at)),
|
|
357
430
|
# TODO(romilb): We should consider adding POD_NAME field here when --all
|
|
358
431
|
# is passed to help users fetch pod name programmatically.
|
|
359
432
|
]
|