skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/server/requests/payloads.py
CHANGED
|
@@ -1,11 +1,28 @@
|
|
|
1
1
|
"""Payloads for the Sky API requests.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
All the payloads that will be used between the client and server communication
|
|
4
|
+
must be defined here to make sure it get covered by our API compatbility tests.
|
|
5
|
+
|
|
6
|
+
Compatibility note:
|
|
7
|
+
- Adding a new body for new API is compatible as long as the SDK method using
|
|
8
|
+
the new API is properly decorated with `versions.minimal_api_version`.
|
|
9
|
+
- Adding a new field with default value to an existing body is compatible at
|
|
10
|
+
API level, but the business logic must handle the case where the field is
|
|
11
|
+
not proccessed by an old version of remote client/server. This can usually
|
|
12
|
+
be done by checking `versions.get_remote_api_version()`.
|
|
13
|
+
- Other changes are not compatible at API level, so must be handled specially.
|
|
14
|
+
A common pattern is to keep both the old and new version of the body and
|
|
15
|
+
checking `versions.get_remote_api_version()` to decide which body to use. For
|
|
16
|
+
example, say we refactor the `LaunchBody`, the original `LaunchBody` must be
|
|
17
|
+
kept in the codebase and the new body should be added via `LaunchBodyV2`.
|
|
18
|
+
Then if the remote runs in an old version, the local code should still send
|
|
19
|
+
`LaunchBody` to keep the backward compatibility. `LaunchBody` can be removed
|
|
20
|
+
later when constants.MIN_COMPATIBLE_API_VERSION is updated to a version that
|
|
21
|
+
supports `LaunchBodyV2`
|
|
22
|
+
|
|
23
|
+
Also refer to sky.server.constants.MIN_COMPATIBLE_API_VERSION and the
|
|
24
|
+
sky.server.versions module for more details.
|
|
7
25
|
"""
|
|
8
|
-
import getpass
|
|
9
26
|
import os
|
|
10
27
|
import typing
|
|
11
28
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
@@ -16,6 +33,7 @@ from sky import sky_logging
|
|
|
16
33
|
from sky import skypilot_config
|
|
17
34
|
from sky.adaptors import common as adaptors_common
|
|
18
35
|
from sky.server import common
|
|
36
|
+
from sky.skylet import autostop_lib
|
|
19
37
|
from sky.skylet import constants
|
|
20
38
|
from sky.usage import constants as usage_constants
|
|
21
39
|
from sky.usage import usage_lib
|
|
@@ -38,51 +56,96 @@ logger = sky_logging.init_logger(__name__)
|
|
|
38
56
|
EXTERNAL_LOCAL_ENV_VARS = [
|
|
39
57
|
# Allow overriding the AWS authentication.
|
|
40
58
|
'AWS_PROFILE',
|
|
59
|
+
'AWS_DEFAULT_PROFILE',
|
|
41
60
|
'AWS_ACCESS_KEY_ID',
|
|
42
61
|
'AWS_SECRET_ACCESS_KEY',
|
|
62
|
+
'AWS_SESSION_TOKEN',
|
|
63
|
+
# Allow overriding the Azure authentication.
|
|
64
|
+
'AZURE_CLIENT_ID',
|
|
65
|
+
'AZURE_CLIENT_SECRET',
|
|
66
|
+
'AZURE_TENANT_ID',
|
|
67
|
+
'AZURE_SUBSCRIPTION_ID',
|
|
43
68
|
# Allow overriding the GCP authentication.
|
|
44
69
|
'GOOGLE_APPLICATION_CREDENTIALS',
|
|
70
|
+
# Allow overriding the kubeconfig.
|
|
71
|
+
'KUBECONFIG',
|
|
45
72
|
]
|
|
46
73
|
|
|
47
74
|
|
|
48
|
-
@annotations.lru_cache(scope='global')
|
|
49
75
|
def request_body_env_vars() -> dict:
|
|
50
76
|
env_vars = {}
|
|
51
77
|
for env_var in os.environ:
|
|
52
|
-
if env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX)
|
|
78
|
+
if (env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX) and
|
|
79
|
+
not env_var.startswith(
|
|
80
|
+
constants.SKYPILOT_SERVER_ENV_VAR_PREFIX)):
|
|
53
81
|
env_vars[env_var] = os.environ[env_var]
|
|
54
82
|
if common.is_api_server_local() and env_var in EXTERNAL_LOCAL_ENV_VARS:
|
|
55
83
|
env_vars[env_var] = os.environ[env_var]
|
|
56
84
|
env_vars[constants.USER_ID_ENV_VAR] = common_utils.get_user_hash()
|
|
57
|
-
env_vars[constants.USER_ENV_VAR] =
|
|
58
|
-
getpass.getuser())
|
|
85
|
+
env_vars[constants.USER_ENV_VAR] = common_utils.get_current_user_name()
|
|
59
86
|
env_vars[
|
|
60
87
|
usage_constants.USAGE_RUN_ID_ENV_VAR] = usage_lib.messages.usage.run_id
|
|
88
|
+
if not common.is_api_server_local():
|
|
89
|
+
# Used in job controller, for local API server, keep the
|
|
90
|
+
# SKYPILOT_CONFIG env var to use the config for the managed job.
|
|
91
|
+
env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
|
|
61
92
|
# Remove the path to config file, as the config content is included in the
|
|
62
93
|
# request body and will be merged with the config on the server side.
|
|
63
|
-
env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
|
|
64
94
|
env_vars.pop(skypilot_config.ENV_VAR_GLOBAL_CONFIG, None)
|
|
65
95
|
env_vars.pop(skypilot_config.ENV_VAR_PROJECT_CONFIG, None)
|
|
96
|
+
# Remove the config related env vars, as the client config override
|
|
97
|
+
# should be passed in the request body.
|
|
98
|
+
# Any new environment variables that are server-specific should
|
|
99
|
+
# use SKYPILOT_SERVER_ENV_VAR_PREFIX.
|
|
100
|
+
env_vars.pop(constants.ENV_VAR_DB_CONNECTION_URI, None)
|
|
66
101
|
return env_vars
|
|
67
102
|
|
|
68
103
|
|
|
69
104
|
def get_override_skypilot_config_from_client() -> Dict[str, Any]:
|
|
70
105
|
"""Returns the override configs from the client."""
|
|
106
|
+
if annotations.is_on_api_server:
|
|
107
|
+
return {}
|
|
71
108
|
config = skypilot_config.to_dict()
|
|
72
109
|
# Remove the API server config, as we should not specify the SkyPilot
|
|
73
110
|
# server endpoint on the server side. This avoids the warning at
|
|
74
111
|
# server-side.
|
|
75
112
|
config.pop_nested(('api_server',), default_value=None)
|
|
113
|
+
# Remove the admin policy, as the policy has been applied on the client
|
|
114
|
+
# side.
|
|
115
|
+
config.pop_nested(('admin_policy',), default_value=None)
|
|
76
116
|
return config
|
|
77
117
|
|
|
78
118
|
|
|
79
|
-
|
|
119
|
+
def get_override_skypilot_config_path_from_client() -> Optional[str]:
|
|
120
|
+
"""Returns the override config path from the client."""
|
|
121
|
+
if annotations.is_on_api_server:
|
|
122
|
+
return None
|
|
123
|
+
# Currently, we don't need to check if the client-side config
|
|
124
|
+
# has been overridden because we only deal with cases where
|
|
125
|
+
# client has a project-level config/changed config and the
|
|
126
|
+
# api server has a different config.
|
|
127
|
+
return skypilot_config.loaded_config_path_serialized()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class BasePayload(pydantic.BaseModel):
|
|
131
|
+
"""The base payload for the SkyPilot API."""
|
|
132
|
+
# Ignore extra fields in the request body, which is useful for backward
|
|
133
|
+
# compatibility. The difference with `allow` is that `ignore` will not
|
|
134
|
+
# include the unknown fields when dump the model, i.e., we can add new
|
|
135
|
+
# fields to the request body without breaking the existing old API server
|
|
136
|
+
# where the handler function does not accept the new field in function
|
|
137
|
+
# signature.
|
|
138
|
+
model_config = pydantic.ConfigDict(extra='ignore')
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class RequestBody(BasePayload):
|
|
80
142
|
"""The request body for the SkyPilot API."""
|
|
81
143
|
env_vars: Dict[str, str] = {}
|
|
82
144
|
entrypoint: str = ''
|
|
83
145
|
entrypoint_command: str = ''
|
|
84
146
|
using_remote_api_server: bool = False
|
|
85
147
|
override_skypilot_config: Optional[Dict[str, Any]] = {}
|
|
148
|
+
override_skypilot_config_path: Optional[str] = None
|
|
86
149
|
|
|
87
150
|
def __init__(self, **data):
|
|
88
151
|
data['env_vars'] = data.get('env_vars', request_body_env_vars())
|
|
@@ -97,6 +160,9 @@ class RequestBody(pydantic.BaseModel):
|
|
|
97
160
|
data['override_skypilot_config'] = data.get(
|
|
98
161
|
'override_skypilot_config',
|
|
99
162
|
get_override_skypilot_config_from_client())
|
|
163
|
+
data['override_skypilot_config_path'] = data.get(
|
|
164
|
+
'override_skypilot_config_path',
|
|
165
|
+
get_override_skypilot_config_path_from_client())
|
|
100
166
|
super().__init__(**data)
|
|
101
167
|
|
|
102
168
|
def to_kwargs(self) -> Dict[str, Any]:
|
|
@@ -111,6 +177,7 @@ class RequestBody(pydantic.BaseModel):
|
|
|
111
177
|
kwargs.pop('entrypoint_command')
|
|
112
178
|
kwargs.pop('using_remote_api_server')
|
|
113
179
|
kwargs.pop('override_skypilot_config')
|
|
180
|
+
kwargs.pop('override_skypilot_config_path')
|
|
114
181
|
return kwargs
|
|
115
182
|
|
|
116
183
|
@property
|
|
@@ -122,6 +189,13 @@ class CheckBody(RequestBody):
|
|
|
122
189
|
"""The request body for the check endpoint."""
|
|
123
190
|
clouds: Optional[Tuple[str, ...]] = None
|
|
124
191
|
verbose: bool = False
|
|
192
|
+
workspace: Optional[str] = None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class EnabledCloudsBody(RequestBody):
|
|
196
|
+
"""The request body for the enabled clouds endpoint."""
|
|
197
|
+
workspace: Optional[str] = None
|
|
198
|
+
expand: bool = False
|
|
125
199
|
|
|
126
200
|
|
|
127
201
|
class DagRequestBody(RequestBody):
|
|
@@ -144,17 +218,33 @@ class DagRequestBody(RequestBody):
|
|
|
144
218
|
return kwargs
|
|
145
219
|
|
|
146
220
|
|
|
147
|
-
class
|
|
221
|
+
class DagRequestBodyWithRequestOptions(DagRequestBody):
|
|
222
|
+
"""Request body base class for endpoints with a dag and request options."""
|
|
223
|
+
request_options: Optional[admin_policy.RequestOptions]
|
|
224
|
+
|
|
225
|
+
def get_request_options(self) -> Optional[admin_policy.RequestOptions]:
|
|
226
|
+
"""Get the request options."""
|
|
227
|
+
if self.request_options is None:
|
|
228
|
+
return None
|
|
229
|
+
if isinstance(self.request_options, dict):
|
|
230
|
+
return admin_policy.RequestOptions(**self.request_options)
|
|
231
|
+
return self.request_options
|
|
232
|
+
|
|
233
|
+
def to_kwargs(self) -> Dict[str, Any]:
|
|
234
|
+
kwargs = super().to_kwargs()
|
|
235
|
+
kwargs['request_options'] = self.get_request_options()
|
|
236
|
+
return kwargs
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class ValidateBody(DagRequestBodyWithRequestOptions):
|
|
148
240
|
"""The request body for the validate endpoint."""
|
|
149
241
|
dag: str
|
|
150
|
-
request_options: Optional[admin_policy.RequestOptions]
|
|
151
242
|
|
|
152
243
|
|
|
153
|
-
class OptimizeBody(
|
|
244
|
+
class OptimizeBody(DagRequestBodyWithRequestOptions):
|
|
154
245
|
"""The request body for the optimize endpoint."""
|
|
155
246
|
dag: str
|
|
156
247
|
minimize: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
|
|
157
|
-
request_options: Optional[admin_policy.RequestOptions]
|
|
158
248
|
|
|
159
249
|
|
|
160
250
|
class LaunchBody(RequestBody):
|
|
@@ -162,8 +252,10 @@ class LaunchBody(RequestBody):
|
|
|
162
252
|
task: str
|
|
163
253
|
cluster_name: str
|
|
164
254
|
retry_until_up: bool = False
|
|
255
|
+
# TODO(aylei): remove this field in v0.12.0
|
|
165
256
|
idle_minutes_to_autostop: Optional[int] = None
|
|
166
257
|
dryrun: bool = False
|
|
258
|
+
# TODO(aylei): remove this field in v0.12.0
|
|
167
259
|
down: bool = False
|
|
168
260
|
backend: Optional[str] = None
|
|
169
261
|
optimize_target: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
|
|
@@ -229,12 +321,20 @@ class StatusBody(RequestBody):
|
|
|
229
321
|
cluster_names: Optional[List[str]] = None
|
|
230
322
|
refresh: common_lib.StatusRefreshMode = common_lib.StatusRefreshMode.NONE
|
|
231
323
|
all_users: bool = True
|
|
324
|
+
# TODO (kyuds): default to False post 0.12.0
|
|
325
|
+
include_credentials: bool = True
|
|
326
|
+
# Only return fields that are needed for the
|
|
327
|
+
# dashboard / CLI summary response
|
|
328
|
+
summary_response: bool = False
|
|
329
|
+
# Include the cluster handle in the response
|
|
330
|
+
include_handle: bool = True
|
|
232
331
|
|
|
233
332
|
|
|
234
333
|
class StartBody(RequestBody):
|
|
235
334
|
"""The request body for the start endpoint."""
|
|
236
335
|
cluster_name: str
|
|
237
336
|
idle_minutes_to_autostop: Optional[int] = None
|
|
337
|
+
wait_for: Optional[autostop_lib.AutostopWaitFor] = None
|
|
238
338
|
retry_until_up: bool = False
|
|
239
339
|
down: bool = False
|
|
240
340
|
force: bool = False
|
|
@@ -244,6 +344,7 @@ class AutostopBody(RequestBody):
|
|
|
244
344
|
"""The request body for the autostop endpoint."""
|
|
245
345
|
cluster_name: str
|
|
246
346
|
idle_minutes: int
|
|
347
|
+
wait_for: Optional[autostop_lib.AutostopWaitFor] = None
|
|
247
348
|
down: bool = False
|
|
248
349
|
|
|
249
350
|
|
|
@@ -271,9 +372,10 @@ class CancelBody(RequestBody):
|
|
|
271
372
|
return kwargs
|
|
272
373
|
|
|
273
374
|
|
|
274
|
-
class
|
|
375
|
+
class ProvisionLogsBody(RequestBody):
|
|
275
376
|
"""Cluster node."""
|
|
276
377
|
cluster_name: str
|
|
378
|
+
worker: Optional[int] = None
|
|
277
379
|
|
|
278
380
|
|
|
279
381
|
class ClusterJobBody(RequestBody):
|
|
@@ -297,6 +399,63 @@ class ClusterJobsDownloadLogsBody(RequestBody):
|
|
|
297
399
|
local_dir: str = constants.SKY_LOGS_DIRECTORY
|
|
298
400
|
|
|
299
401
|
|
|
402
|
+
class UserCreateBody(RequestBody):
|
|
403
|
+
"""The request body for the user create endpoint."""
|
|
404
|
+
username: str
|
|
405
|
+
password: str
|
|
406
|
+
role: Optional[str] = None
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class UserDeleteBody(RequestBody):
|
|
410
|
+
"""The request body for the user delete endpoint."""
|
|
411
|
+
user_id: str
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
class UserUpdateBody(RequestBody):
|
|
415
|
+
"""The request body for the user update endpoint."""
|
|
416
|
+
user_id: str
|
|
417
|
+
role: Optional[str] = None
|
|
418
|
+
password: Optional[str] = None
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
class UserImportBody(RequestBody):
|
|
422
|
+
"""The request body for the user import endpoint."""
|
|
423
|
+
csv_content: str
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class ServiceAccountTokenCreateBody(RequestBody):
|
|
427
|
+
"""The request body for creating a service account token."""
|
|
428
|
+
token_name: str
|
|
429
|
+
expires_in_days: Optional[int] = None
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class ServiceAccountTokenDeleteBody(RequestBody):
|
|
433
|
+
"""The request body for deleting a service account token."""
|
|
434
|
+
token_id: str
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class UpdateRoleBody(RequestBody):
|
|
438
|
+
"""The request body for updating a user role."""
|
|
439
|
+
role: str
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
class ServiceAccountTokenRoleBody(RequestBody):
|
|
443
|
+
"""The request body for getting a service account token role."""
|
|
444
|
+
token_id: str
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class ServiceAccountTokenUpdateRoleBody(RequestBody):
|
|
448
|
+
"""The request body for updating a service account token role."""
|
|
449
|
+
token_id: str
|
|
450
|
+
role: str
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
class ServiceAccountTokenRotateBody(RequestBody):
|
|
454
|
+
"""The request body for rotating a service account token."""
|
|
455
|
+
token_id: str
|
|
456
|
+
expires_in_days: Optional[int] = None
|
|
457
|
+
|
|
458
|
+
|
|
300
459
|
class DownloadBody(RequestBody):
|
|
301
460
|
"""The request body for the download endpoint."""
|
|
302
461
|
folder_paths: List[str]
|
|
@@ -307,6 +466,40 @@ class StorageBody(RequestBody):
|
|
|
307
466
|
name: str
|
|
308
467
|
|
|
309
468
|
|
|
469
|
+
class VolumeApplyBody(RequestBody):
|
|
470
|
+
"""The request body for the volume apply endpoint."""
|
|
471
|
+
name: str
|
|
472
|
+
volume_type: str
|
|
473
|
+
cloud: str
|
|
474
|
+
region: Optional[str] = None
|
|
475
|
+
zone: Optional[str] = None
|
|
476
|
+
size: Optional[str] = None
|
|
477
|
+
config: Optional[Dict[str, Any]] = None
|
|
478
|
+
labels: Optional[Dict[str, str]] = None
|
|
479
|
+
use_existing: Optional[bool] = None
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class VolumeDeleteBody(RequestBody):
|
|
483
|
+
"""The request body for the volume delete endpoint."""
|
|
484
|
+
names: List[str]
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
class VolumeListBody(RequestBody):
|
|
488
|
+
"""The request body for the volume list endpoint."""
|
|
489
|
+
pass
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
class VolumeValidateBody(RequestBody):
|
|
493
|
+
"""The request body for the volume validate endpoint."""
|
|
494
|
+
name: Optional[str] = None
|
|
495
|
+
volume_type: Optional[str] = None
|
|
496
|
+
infra: Optional[str] = None
|
|
497
|
+
size: Optional[str] = None
|
|
498
|
+
labels: Optional[Dict[str, str]] = None
|
|
499
|
+
config: Optional[Dict[str, Any]] = None
|
|
500
|
+
use_existing: Optional[bool] = None
|
|
501
|
+
|
|
502
|
+
|
|
310
503
|
class EndpointsBody(RequestBody):
|
|
311
504
|
"""The request body for the endpoint."""
|
|
312
505
|
cluster: str
|
|
@@ -328,6 +521,8 @@ class JobsLaunchBody(RequestBody):
|
|
|
328
521
|
"""The request body for the jobs launch endpoint."""
|
|
329
522
|
task: str
|
|
330
523
|
name: Optional[str]
|
|
524
|
+
pool: Optional[str] = None
|
|
525
|
+
num_jobs: Optional[int] = None
|
|
331
526
|
|
|
332
527
|
def to_kwargs(self) -> Dict[str, Any]:
|
|
333
528
|
kwargs = super().to_kwargs()
|
|
@@ -341,6 +536,25 @@ class JobsQueueBody(RequestBody):
|
|
|
341
536
|
refresh: bool = False
|
|
342
537
|
skip_finished: bool = False
|
|
343
538
|
all_users: bool = False
|
|
539
|
+
job_ids: Optional[List[int]] = None
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
class JobsQueueV2Body(RequestBody):
|
|
543
|
+
"""The request body for the jobs queue endpoint."""
|
|
544
|
+
refresh: bool = False
|
|
545
|
+
skip_finished: bool = False
|
|
546
|
+
all_users: bool = False
|
|
547
|
+
job_ids: Optional[List[int]] = None
|
|
548
|
+
user_match: Optional[str] = None
|
|
549
|
+
workspace_match: Optional[str] = None
|
|
550
|
+
name_match: Optional[str] = None
|
|
551
|
+
pool_match: Optional[str] = None
|
|
552
|
+
page: Optional[int] = None
|
|
553
|
+
limit: Optional[int] = None
|
|
554
|
+
statuses: Optional[List[str]] = None
|
|
555
|
+
# The fields to return in the response.
|
|
556
|
+
# Refer to the fields in the `class ManagedJobRecord` in `response.py`
|
|
557
|
+
fields: Optional[List[str]] = None
|
|
344
558
|
|
|
345
559
|
|
|
346
560
|
class JobsCancelBody(RequestBody):
|
|
@@ -349,6 +563,7 @@ class JobsCancelBody(RequestBody):
|
|
|
349
563
|
job_ids: Optional[List[int]] = None
|
|
350
564
|
all: bool = False
|
|
351
565
|
all_users: bool = False
|
|
566
|
+
pool: Optional[str] = None
|
|
352
567
|
|
|
353
568
|
|
|
354
569
|
class JobsLogsBody(RequestBody):
|
|
@@ -358,6 +573,7 @@ class JobsLogsBody(RequestBody):
|
|
|
358
573
|
follow: bool = True
|
|
359
574
|
controller: bool = False
|
|
360
575
|
refresh: bool = False
|
|
576
|
+
tail: Optional[int] = None
|
|
361
577
|
|
|
362
578
|
|
|
363
579
|
class RequestCancelBody(RequestBody):
|
|
@@ -371,6 +587,8 @@ class RequestStatusBody(pydantic.BaseModel):
|
|
|
371
587
|
"""The request body for the API request status endpoint."""
|
|
372
588
|
request_ids: Optional[List[str]] = None
|
|
373
589
|
all_status: bool = False
|
|
590
|
+
limit: Optional[int] = None
|
|
591
|
+
fields: Optional[List[str]] = None
|
|
374
592
|
|
|
375
593
|
|
|
376
594
|
class ServeUpBody(RequestBody):
|
|
@@ -421,6 +639,7 @@ class ServeLogsBody(RequestBody):
|
|
|
421
639
|
target: Union[str, serve.ServiceComponent]
|
|
422
640
|
replica_id: Optional[int] = None
|
|
423
641
|
follow: bool = True
|
|
642
|
+
tail: Optional[int] = None
|
|
424
643
|
|
|
425
644
|
|
|
426
645
|
class ServeDownloadLogsBody(RequestBody):
|
|
@@ -430,6 +649,7 @@ class ServeDownloadLogsBody(RequestBody):
|
|
|
430
649
|
targets: Optional[Union[str, serve.ServiceComponent,
|
|
431
650
|
List[Union[str, serve.ServiceComponent]]]]
|
|
432
651
|
replica_ids: Optional[List[int]] = None
|
|
652
|
+
tail: Optional[int] = None
|
|
433
653
|
|
|
434
654
|
|
|
435
655
|
class ServeStatusBody(RequestBody):
|
|
@@ -439,9 +659,10 @@ class ServeStatusBody(RequestBody):
|
|
|
439
659
|
|
|
440
660
|
class RealtimeGpuAvailabilityRequestBody(RequestBody):
|
|
441
661
|
"""The request body for the realtime GPU availability endpoint."""
|
|
442
|
-
context: Optional[str]
|
|
443
|
-
name_filter: Optional[str]
|
|
444
|
-
quantity_filter: Optional[int]
|
|
662
|
+
context: Optional[str] = None
|
|
663
|
+
name_filter: Optional[str] = None
|
|
664
|
+
quantity_filter: Optional[int] = None
|
|
665
|
+
is_ssh: Optional[bool] = None
|
|
445
666
|
|
|
446
667
|
|
|
447
668
|
class KubernetesNodeInfoRequestBody(RequestBody):
|
|
@@ -473,12 +694,19 @@ class ListAcceleratorCountsBody(RequestBody):
|
|
|
473
694
|
class LocalUpBody(RequestBody):
|
|
474
695
|
"""The request body for the local up endpoint."""
|
|
475
696
|
gpus: bool = True
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
697
|
+
name: Optional[str] = None
|
|
698
|
+
port_start: Optional[int] = None
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
class LocalDownBody(RequestBody):
|
|
702
|
+
"""The request body for the local down endpoint."""
|
|
703
|
+
name: Optional[str] = None
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
class SSHUpBody(RequestBody):
|
|
707
|
+
"""The request body for the SSH up/down endpoints."""
|
|
708
|
+
infra: Optional[str] = None
|
|
479
709
|
cleanup: bool = False
|
|
480
|
-
context_name: Optional[str] = None
|
|
481
|
-
password: Optional[str] = None
|
|
482
710
|
|
|
483
711
|
|
|
484
712
|
class ServeTerminateReplicaBody(RequestBody):
|
|
@@ -510,7 +738,119 @@ class JobsDownloadLogsBody(RequestBody):
|
|
|
510
738
|
local_dir: str = constants.SKY_LOGS_DIRECTORY
|
|
511
739
|
|
|
512
740
|
|
|
741
|
+
class JobsPoolApplyBody(RequestBody):
|
|
742
|
+
"""The request body for the jobs pool apply endpoint."""
|
|
743
|
+
task: Optional[str] = None
|
|
744
|
+
workers: Optional[int] = None
|
|
745
|
+
pool_name: str
|
|
746
|
+
mode: serve.UpdateMode
|
|
747
|
+
|
|
748
|
+
def to_kwargs(self) -> Dict[str, Any]:
|
|
749
|
+
kwargs = super().to_kwargs()
|
|
750
|
+
if self.task is not None:
|
|
751
|
+
dag = common.process_mounts_in_task_on_api_server(
|
|
752
|
+
self.task, self.env_vars, workdir_only=False)
|
|
753
|
+
assert len(
|
|
754
|
+
dag.tasks) == 1, ('Must only specify one task in the DAG for '
|
|
755
|
+
'a pool.', dag)
|
|
756
|
+
kwargs['task'] = dag.tasks[0]
|
|
757
|
+
else:
|
|
758
|
+
kwargs['task'] = None
|
|
759
|
+
return kwargs
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
class JobsPoolDownBody(RequestBody):
|
|
763
|
+
"""The request body for the jobs pool down endpoint."""
|
|
764
|
+
pool_names: Optional[Union[str, List[str]]]
|
|
765
|
+
all: bool = False
|
|
766
|
+
purge: bool = False
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
class JobsPoolStatusBody(RequestBody):
|
|
770
|
+
"""The request body for the jobs pool status endpoint."""
|
|
771
|
+
pool_names: Optional[Union[str, List[str]]]
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
class JobsPoolLogsBody(RequestBody):
|
|
775
|
+
"""The request body for the jobs pool logs endpoint."""
|
|
776
|
+
pool_name: str
|
|
777
|
+
target: Union[str, serve.ServiceComponent]
|
|
778
|
+
worker_id: Optional[int] = None
|
|
779
|
+
follow: bool = True
|
|
780
|
+
tail: Optional[int] = None
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
class JobsPoolDownloadLogsBody(RequestBody):
|
|
784
|
+
"""The request body for the jobs pool download logs endpoint."""
|
|
785
|
+
pool_name: str
|
|
786
|
+
local_dir: str
|
|
787
|
+
targets: Optional[Union[str, serve.ServiceComponent,
|
|
788
|
+
List[Union[str, serve.ServiceComponent]]]]
|
|
789
|
+
worker_ids: Optional[List[int]] = None
|
|
790
|
+
tail: Optional[int] = None
|
|
791
|
+
|
|
792
|
+
|
|
513
793
|
class UploadZipFileResponse(pydantic.BaseModel):
|
|
514
794
|
"""The response body for the upload zip file endpoint."""
|
|
515
795
|
status: str
|
|
516
796
|
missing_chunks: Optional[List[str]] = None
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
class UpdateWorkspaceBody(RequestBody):
|
|
800
|
+
"""The request body for updating a specific workspace configuration."""
|
|
801
|
+
workspace_name: str = '' # Will be set from path parameter
|
|
802
|
+
config: Dict[str, Any]
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
class CreateWorkspaceBody(RequestBody):
|
|
806
|
+
"""The request body for creating a new workspace."""
|
|
807
|
+
workspace_name: str = '' # Will be set from path parameter
|
|
808
|
+
config: Dict[str, Any]
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
class DeleteWorkspaceBody(RequestBody):
|
|
812
|
+
"""The request body for deleting a workspace."""
|
|
813
|
+
workspace_name: str
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
class UpdateConfigBody(RequestBody):
|
|
817
|
+
"""The request body for updating the entire SkyPilot configuration."""
|
|
818
|
+
config: Dict[str, Any]
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
class GetConfigBody(RequestBody):
|
|
822
|
+
"""The request body for getting the entire SkyPilot configuration."""
|
|
823
|
+
pass
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
class CostReportBody(RequestBody):
|
|
827
|
+
"""The request body for the cost report endpoint."""
|
|
828
|
+
days: Optional[int] = 30
|
|
829
|
+
# we use hashes instead of names to avoid the case where
|
|
830
|
+
# the name is not unique
|
|
831
|
+
cluster_hashes: Optional[List[str]] = None
|
|
832
|
+
# Only return fields that are needed for the dashboard
|
|
833
|
+
# summary page
|
|
834
|
+
dashboard_summary_response: bool = False
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
class RequestPayload(BasePayload):
|
|
838
|
+
"""The payload for the requests."""
|
|
839
|
+
|
|
840
|
+
request_id: str
|
|
841
|
+
name: str
|
|
842
|
+
entrypoint: str
|
|
843
|
+
request_body: str
|
|
844
|
+
status: str
|
|
845
|
+
created_at: float
|
|
846
|
+
user_id: str
|
|
847
|
+
return_value: str
|
|
848
|
+
error: str
|
|
849
|
+
pid: Optional[int]
|
|
850
|
+
schedule_type: str
|
|
851
|
+
user_name: Optional[str] = None
|
|
852
|
+
# Resources the request operates on.
|
|
853
|
+
cluster_name: Optional[str] = None
|
|
854
|
+
status_msg: Optional[str] = None
|
|
855
|
+
should_retry: bool = False
|
|
856
|
+
finished_at: Optional[float] = None
|
|
@@ -90,7 +90,7 @@ class Precondition(abc.ABC):
|
|
|
90
90
|
while True:
|
|
91
91
|
if self.timeout > 0 and time.time() - start_time > self.timeout:
|
|
92
92
|
# Cancel the request on timeout.
|
|
93
|
-
api_requests.
|
|
93
|
+
await api_requests.set_request_failed_async(
|
|
94
94
|
self.request_id,
|
|
95
95
|
exceptions.RequestCancelled(
|
|
96
96
|
f'Request {self.request_id} precondition wait timed '
|
|
@@ -98,13 +98,15 @@ class Precondition(abc.ABC):
|
|
|
98
98
|
return False
|
|
99
99
|
|
|
100
100
|
# Check if the request has been cancelled
|
|
101
|
-
request = api_requests.
|
|
101
|
+
request = await api_requests.get_request_async(self.request_id,
|
|
102
|
+
fields=['status'])
|
|
102
103
|
if request is None:
|
|
103
104
|
logger.error(f'Request {self.request_id} not found')
|
|
104
105
|
return False
|
|
105
106
|
if request.status == api_requests.RequestStatus.CANCELLED:
|
|
106
107
|
logger.debug(f'Request {self.request_id} cancelled')
|
|
107
108
|
return False
|
|
109
|
+
del request
|
|
108
110
|
|
|
109
111
|
try:
|
|
110
112
|
met, status_msg = await self.check()
|
|
@@ -112,12 +114,11 @@ class Precondition(abc.ABC):
|
|
|
112
114
|
return True
|
|
113
115
|
if status_msg is not None and status_msg != last_status_msg:
|
|
114
116
|
# Update the status message if it has changed.
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
req.status_msg = status_msg
|
|
117
|
+
await api_requests.update_status_msg_async(
|
|
118
|
+
self.request_id, status_msg)
|
|
118
119
|
last_status_msg = status_msg
|
|
119
120
|
except (Exception, SystemExit, KeyboardInterrupt) as e: # pylint: disable=broad-except
|
|
120
|
-
api_requests.
|
|
121
|
+
await api_requests.set_request_failed_async(self.request_id, e)
|
|
121
122
|
logger.info(f'Request {self.request_id} failed due to '
|
|
122
123
|
f'{common_utils.format_exception(e)}')
|
|
123
124
|
return False
|
|
@@ -145,10 +146,9 @@ class ClusterStartCompletePrecondition(Precondition):
|
|
|
145
146
|
self.cluster_name = cluster_name
|
|
146
147
|
|
|
147
148
|
async def check(self) -> Tuple[bool, Optional[str]]:
|
|
148
|
-
|
|
149
|
+
cluster_status = global_user_state.get_status_from_cluster_name(
|
|
149
150
|
self.cluster_name)
|
|
150
|
-
if
|
|
151
|
-
cluster_record['status'] is status_lib.ClusterStatus.UP):
|
|
151
|
+
if cluster_status is status_lib.ClusterStatus.UP:
|
|
152
152
|
# Shortcut for started clusters, ignore cluster not found
|
|
153
153
|
# since the cluster record might not yet be created by the
|
|
154
154
|
# launch task.
|
|
@@ -161,14 +161,18 @@ class ClusterStartCompletePrecondition(Precondition):
|
|
|
161
161
|
# We unify these situations into a single state: the process of starting
|
|
162
162
|
# the cluster is done (either normally or abnormally) but cluster is not
|
|
163
163
|
# in UP status.
|
|
164
|
-
requests = api_requests.
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
164
|
+
requests = await api_requests.get_request_tasks_async(
|
|
165
|
+
req_filter=api_requests.RequestTaskFilter(
|
|
166
|
+
status=[
|
|
167
|
+
api_requests.RequestStatus.PENDING,
|
|
168
|
+
api_requests.RequestStatus.RUNNING
|
|
169
|
+
],
|
|
170
|
+
include_request_names=['sky.launch', 'sky.start'],
|
|
171
|
+
cluster_names=[self.cluster_name],
|
|
172
|
+
# Only get the request ID to avoid fetching the whole request.
|
|
173
|
+
# We're only interested in the count, not the whole request.
|
|
174
|
+
fields=['request_id']))
|
|
171
175
|
if len(requests) == 0:
|
|
172
|
-
# No
|
|
176
|
+
# No running or pending tasks, the start process is done.
|
|
173
177
|
return True, None
|
|
174
178
|
return False, f'Waiting for cluster {self.cluster_name} to be UP.'
|