skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/server/requests/payloads.py
CHANGED
|
@@ -1,11 +1,28 @@
|
|
|
1
1
|
"""Payloads for the Sky API requests.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
All the payloads that will be used between the client and server communication
|
|
4
|
+
must be defined here to make sure it get covered by our API compatbility tests.
|
|
5
|
+
|
|
6
|
+
Compatibility note:
|
|
7
|
+
- Adding a new body for new API is compatible as long as the SDK method using
|
|
8
|
+
the new API is properly decorated with `versions.minimal_api_version`.
|
|
9
|
+
- Adding a new field with default value to an existing body is compatible at
|
|
10
|
+
API level, but the business logic must handle the case where the field is
|
|
11
|
+
not proccessed by an old version of remote client/server. This can usually
|
|
12
|
+
be done by checking `versions.get_remote_api_version()`.
|
|
13
|
+
- Other changes are not compatible at API level, so must be handled specially.
|
|
14
|
+
A common pattern is to keep both the old and new version of the body and
|
|
15
|
+
checking `versions.get_remote_api_version()` to decide which body to use. For
|
|
16
|
+
example, say we refactor the `LaunchBody`, the original `LaunchBody` must be
|
|
17
|
+
kept in the codebase and the new body should be added via `LaunchBodyV2`.
|
|
18
|
+
Then if the remote runs in an old version, the local code should still send
|
|
19
|
+
`LaunchBody` to keep the backward compatibility. `LaunchBody` can be removed
|
|
20
|
+
later when constants.MIN_COMPATIBLE_API_VERSION is updated to a version that
|
|
21
|
+
supports `LaunchBodyV2`
|
|
22
|
+
|
|
23
|
+
Also refer to sky.server.constants.MIN_COMPATIBLE_API_VERSION and the
|
|
24
|
+
sky.server.versions module for more details.
|
|
7
25
|
"""
|
|
8
|
-
import getpass
|
|
9
26
|
import os
|
|
10
27
|
import typing
|
|
11
28
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
@@ -16,6 +33,7 @@ from sky import sky_logging
|
|
|
16
33
|
from sky import skypilot_config
|
|
17
34
|
from sky.adaptors import common as adaptors_common
|
|
18
35
|
from sky.server import common
|
|
36
|
+
from sky.skylet import autostop_lib
|
|
19
37
|
from sky.skylet import constants
|
|
20
38
|
from sky.usage import constants as usage_constants
|
|
21
39
|
from sky.usage import usage_lib
|
|
@@ -53,13 +71,14 @@ EXTERNAL_LOCAL_ENV_VARS = [
|
|
|
53
71
|
def request_body_env_vars() -> dict:
|
|
54
72
|
env_vars = {}
|
|
55
73
|
for env_var in os.environ:
|
|
56
|
-
if env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX)
|
|
74
|
+
if (env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX) and
|
|
75
|
+
not env_var.startswith(
|
|
76
|
+
constants.SKYPILOT_SERVER_ENV_VAR_PREFIX)):
|
|
57
77
|
env_vars[env_var] = os.environ[env_var]
|
|
58
78
|
if common.is_api_server_local() and env_var in EXTERNAL_LOCAL_ENV_VARS:
|
|
59
79
|
env_vars[env_var] = os.environ[env_var]
|
|
60
80
|
env_vars[constants.USER_ID_ENV_VAR] = common_utils.get_user_hash()
|
|
61
|
-
env_vars[constants.USER_ENV_VAR] =
|
|
62
|
-
getpass.getuser())
|
|
81
|
+
env_vars[constants.USER_ENV_VAR] = common_utils.get_current_user_name()
|
|
63
82
|
env_vars[
|
|
64
83
|
usage_constants.USAGE_RUN_ID_ENV_VAR] = usage_lib.messages.usage.run_id
|
|
65
84
|
# Remove the path to config file, as the config content is included in the
|
|
@@ -67,26 +86,59 @@ def request_body_env_vars() -> dict:
|
|
|
67
86
|
env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
|
|
68
87
|
env_vars.pop(skypilot_config.ENV_VAR_GLOBAL_CONFIG, None)
|
|
69
88
|
env_vars.pop(skypilot_config.ENV_VAR_PROJECT_CONFIG, None)
|
|
89
|
+
# Remove the config related env vars, as the client config override
|
|
90
|
+
# should be passed in the request body.
|
|
91
|
+
# Any new environment variables that are server-specific should
|
|
92
|
+
# use SKYPILOT_SERVER_ENV_VAR_PREFIX.
|
|
93
|
+
env_vars.pop(constants.ENV_VAR_DB_CONNECTION_URI, None)
|
|
70
94
|
return env_vars
|
|
71
95
|
|
|
72
96
|
|
|
73
97
|
def get_override_skypilot_config_from_client() -> Dict[str, Any]:
|
|
74
98
|
"""Returns the override configs from the client."""
|
|
99
|
+
if annotations.is_on_api_server:
|
|
100
|
+
return {}
|
|
75
101
|
config = skypilot_config.to_dict()
|
|
76
102
|
# Remove the API server config, as we should not specify the SkyPilot
|
|
77
103
|
# server endpoint on the server side. This avoids the warning at
|
|
78
104
|
# server-side.
|
|
79
105
|
config.pop_nested(('api_server',), default_value=None)
|
|
106
|
+
# Remove the admin policy, as the policy has been applied on the client
|
|
107
|
+
# side.
|
|
108
|
+
config.pop_nested(('admin_policy',), default_value=None)
|
|
80
109
|
return config
|
|
81
110
|
|
|
82
111
|
|
|
83
|
-
|
|
112
|
+
def get_override_skypilot_config_path_from_client() -> Optional[str]:
|
|
113
|
+
"""Returns the override config path from the client."""
|
|
114
|
+
if annotations.is_on_api_server:
|
|
115
|
+
return None
|
|
116
|
+
# Currently, we don't need to check if the client-side config
|
|
117
|
+
# has been overridden because we only deal with cases where
|
|
118
|
+
# client has a project-level config/changed config and the
|
|
119
|
+
# api server has a different config.
|
|
120
|
+
return skypilot_config.loaded_config_path_serialized()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class BasePayload(pydantic.BaseModel):
|
|
124
|
+
"""The base payload for the SkyPilot API."""
|
|
125
|
+
# Ignore extra fields in the request body, which is useful for backward
|
|
126
|
+
# compatibility. The difference with `allow` is that `ignore` will not
|
|
127
|
+
# include the unknown fields when dump the model, i.e., we can add new
|
|
128
|
+
# fields to the request body without breaking the existing old API server
|
|
129
|
+
# where the handler function does not accept the new field in function
|
|
130
|
+
# signature.
|
|
131
|
+
model_config = pydantic.ConfigDict(extra='ignore')
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class RequestBody(BasePayload):
|
|
84
135
|
"""The request body for the SkyPilot API."""
|
|
85
136
|
env_vars: Dict[str, str] = {}
|
|
86
137
|
entrypoint: str = ''
|
|
87
138
|
entrypoint_command: str = ''
|
|
88
139
|
using_remote_api_server: bool = False
|
|
89
140
|
override_skypilot_config: Optional[Dict[str, Any]] = {}
|
|
141
|
+
override_skypilot_config_path: Optional[str] = None
|
|
90
142
|
|
|
91
143
|
def __init__(self, **data):
|
|
92
144
|
data['env_vars'] = data.get('env_vars', request_body_env_vars())
|
|
@@ -101,6 +153,9 @@ class RequestBody(pydantic.BaseModel):
|
|
|
101
153
|
data['override_skypilot_config'] = data.get(
|
|
102
154
|
'override_skypilot_config',
|
|
103
155
|
get_override_skypilot_config_from_client())
|
|
156
|
+
data['override_skypilot_config_path'] = data.get(
|
|
157
|
+
'override_skypilot_config_path',
|
|
158
|
+
get_override_skypilot_config_path_from_client())
|
|
104
159
|
super().__init__(**data)
|
|
105
160
|
|
|
106
161
|
def to_kwargs(self) -> Dict[str, Any]:
|
|
@@ -115,6 +170,7 @@ class RequestBody(pydantic.BaseModel):
|
|
|
115
170
|
kwargs.pop('entrypoint_command')
|
|
116
171
|
kwargs.pop('using_remote_api_server')
|
|
117
172
|
kwargs.pop('override_skypilot_config')
|
|
173
|
+
kwargs.pop('override_skypilot_config_path')
|
|
118
174
|
return kwargs
|
|
119
175
|
|
|
120
176
|
@property
|
|
@@ -126,6 +182,13 @@ class CheckBody(RequestBody):
|
|
|
126
182
|
"""The request body for the check endpoint."""
|
|
127
183
|
clouds: Optional[Tuple[str, ...]] = None
|
|
128
184
|
verbose: bool = False
|
|
185
|
+
workspace: Optional[str] = None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class EnabledCloudsBody(RequestBody):
|
|
189
|
+
"""The request body for the enabled clouds endpoint."""
|
|
190
|
+
workspace: Optional[str] = None
|
|
191
|
+
expand: bool = False
|
|
129
192
|
|
|
130
193
|
|
|
131
194
|
class DagRequestBody(RequestBody):
|
|
@@ -148,17 +211,33 @@ class DagRequestBody(RequestBody):
|
|
|
148
211
|
return kwargs
|
|
149
212
|
|
|
150
213
|
|
|
151
|
-
class
|
|
214
|
+
class DagRequestBodyWithRequestOptions(DagRequestBody):
|
|
215
|
+
"""Request body base class for endpoints with a dag and request options."""
|
|
216
|
+
request_options: Optional[admin_policy.RequestOptions]
|
|
217
|
+
|
|
218
|
+
def get_request_options(self) -> Optional[admin_policy.RequestOptions]:
|
|
219
|
+
"""Get the request options."""
|
|
220
|
+
if self.request_options is None:
|
|
221
|
+
return None
|
|
222
|
+
if isinstance(self.request_options, dict):
|
|
223
|
+
return admin_policy.RequestOptions(**self.request_options)
|
|
224
|
+
return self.request_options
|
|
225
|
+
|
|
226
|
+
def to_kwargs(self) -> Dict[str, Any]:
|
|
227
|
+
kwargs = super().to_kwargs()
|
|
228
|
+
kwargs['request_options'] = self.get_request_options()
|
|
229
|
+
return kwargs
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class ValidateBody(DagRequestBodyWithRequestOptions):
|
|
152
233
|
"""The request body for the validate endpoint."""
|
|
153
234
|
dag: str
|
|
154
|
-
request_options: Optional[admin_policy.RequestOptions]
|
|
155
235
|
|
|
156
236
|
|
|
157
|
-
class OptimizeBody(
|
|
237
|
+
class OptimizeBody(DagRequestBodyWithRequestOptions):
|
|
158
238
|
"""The request body for the optimize endpoint."""
|
|
159
239
|
dag: str
|
|
160
240
|
minimize: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
|
|
161
|
-
request_options: Optional[admin_policy.RequestOptions]
|
|
162
241
|
|
|
163
242
|
|
|
164
243
|
class LaunchBody(RequestBody):
|
|
@@ -166,8 +245,10 @@ class LaunchBody(RequestBody):
|
|
|
166
245
|
task: str
|
|
167
246
|
cluster_name: str
|
|
168
247
|
retry_until_up: bool = False
|
|
248
|
+
# TODO(aylei): remove this field in v0.12.0
|
|
169
249
|
idle_minutes_to_autostop: Optional[int] = None
|
|
170
250
|
dryrun: bool = False
|
|
251
|
+
# TODO(aylei): remove this field in v0.12.0
|
|
171
252
|
down: bool = False
|
|
172
253
|
backend: Optional[str] = None
|
|
173
254
|
optimize_target: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
|
|
@@ -233,12 +314,20 @@ class StatusBody(RequestBody):
|
|
|
233
314
|
cluster_names: Optional[List[str]] = None
|
|
234
315
|
refresh: common_lib.StatusRefreshMode = common_lib.StatusRefreshMode.NONE
|
|
235
316
|
all_users: bool = True
|
|
317
|
+
# TODO (kyuds): default to False post 0.10.5
|
|
318
|
+
include_credentials: bool = True
|
|
319
|
+
# Only return fields that are needed for the
|
|
320
|
+
# dashboard / CLI summary response
|
|
321
|
+
summary_response: bool = False
|
|
322
|
+
# Include the cluster handle in the response
|
|
323
|
+
include_handle: bool = True
|
|
236
324
|
|
|
237
325
|
|
|
238
326
|
class StartBody(RequestBody):
|
|
239
327
|
"""The request body for the start endpoint."""
|
|
240
328
|
cluster_name: str
|
|
241
329
|
idle_minutes_to_autostop: Optional[int] = None
|
|
330
|
+
wait_for: Optional[autostop_lib.AutostopWaitFor] = None
|
|
242
331
|
retry_until_up: bool = False
|
|
243
332
|
down: bool = False
|
|
244
333
|
force: bool = False
|
|
@@ -248,6 +337,7 @@ class AutostopBody(RequestBody):
|
|
|
248
337
|
"""The request body for the autostop endpoint."""
|
|
249
338
|
cluster_name: str
|
|
250
339
|
idle_minutes: int
|
|
340
|
+
wait_for: Optional[autostop_lib.AutostopWaitFor] = None
|
|
251
341
|
down: bool = False
|
|
252
342
|
|
|
253
343
|
|
|
@@ -275,9 +365,10 @@ class CancelBody(RequestBody):
|
|
|
275
365
|
return kwargs
|
|
276
366
|
|
|
277
367
|
|
|
278
|
-
class
|
|
368
|
+
class ProvisionLogsBody(RequestBody):
|
|
279
369
|
"""Cluster node."""
|
|
280
370
|
cluster_name: str
|
|
371
|
+
worker: Optional[int] = None
|
|
281
372
|
|
|
282
373
|
|
|
283
374
|
class ClusterJobBody(RequestBody):
|
|
@@ -301,6 +392,63 @@ class ClusterJobsDownloadLogsBody(RequestBody):
|
|
|
301
392
|
local_dir: str = constants.SKY_LOGS_DIRECTORY
|
|
302
393
|
|
|
303
394
|
|
|
395
|
+
class UserCreateBody(RequestBody):
|
|
396
|
+
"""The request body for the user create endpoint."""
|
|
397
|
+
username: str
|
|
398
|
+
password: str
|
|
399
|
+
role: Optional[str] = None
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
class UserDeleteBody(RequestBody):
|
|
403
|
+
"""The request body for the user delete endpoint."""
|
|
404
|
+
user_id: str
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class UserUpdateBody(RequestBody):
|
|
408
|
+
"""The request body for the user update endpoint."""
|
|
409
|
+
user_id: str
|
|
410
|
+
role: Optional[str] = None
|
|
411
|
+
password: Optional[str] = None
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
class UserImportBody(RequestBody):
|
|
415
|
+
"""The request body for the user import endpoint."""
|
|
416
|
+
csv_content: str
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
class ServiceAccountTokenCreateBody(RequestBody):
|
|
420
|
+
"""The request body for creating a service account token."""
|
|
421
|
+
token_name: str
|
|
422
|
+
expires_in_days: Optional[int] = None
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
class ServiceAccountTokenDeleteBody(RequestBody):
|
|
426
|
+
"""The request body for deleting a service account token."""
|
|
427
|
+
token_id: str
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class UpdateRoleBody(RequestBody):
|
|
431
|
+
"""The request body for updating a user role."""
|
|
432
|
+
role: str
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class ServiceAccountTokenRoleBody(RequestBody):
|
|
436
|
+
"""The request body for getting a service account token role."""
|
|
437
|
+
token_id: str
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
class ServiceAccountTokenUpdateRoleBody(RequestBody):
|
|
441
|
+
"""The request body for updating a service account token role."""
|
|
442
|
+
token_id: str
|
|
443
|
+
role: str
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
class ServiceAccountTokenRotateBody(RequestBody):
|
|
447
|
+
"""The request body for rotating a service account token."""
|
|
448
|
+
token_id: str
|
|
449
|
+
expires_in_days: Optional[int] = None
|
|
450
|
+
|
|
451
|
+
|
|
304
452
|
class DownloadBody(RequestBody):
|
|
305
453
|
"""The request body for the download endpoint."""
|
|
306
454
|
folder_paths: List[str]
|
|
@@ -311,6 +459,39 @@ class StorageBody(RequestBody):
|
|
|
311
459
|
name: str
|
|
312
460
|
|
|
313
461
|
|
|
462
|
+
class VolumeApplyBody(RequestBody):
|
|
463
|
+
"""The request body for the volume apply endpoint."""
|
|
464
|
+
name: str
|
|
465
|
+
volume_type: str
|
|
466
|
+
cloud: str
|
|
467
|
+
region: Optional[str] = None
|
|
468
|
+
zone: Optional[str] = None
|
|
469
|
+
size: Optional[str] = None
|
|
470
|
+
config: Optional[Dict[str, Any]] = None
|
|
471
|
+
labels: Optional[Dict[str, str]] = None
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
class VolumeDeleteBody(RequestBody):
|
|
475
|
+
"""The request body for the volume delete endpoint."""
|
|
476
|
+
names: List[str]
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
class VolumeListBody(RequestBody):
|
|
480
|
+
"""The request body for the volume list endpoint."""
|
|
481
|
+
pass
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
class VolumeValidateBody(RequestBody):
|
|
485
|
+
"""The request body for the volume validate endpoint."""
|
|
486
|
+
name: Optional[str] = None
|
|
487
|
+
volume_type: Optional[str] = None
|
|
488
|
+
infra: Optional[str] = None
|
|
489
|
+
size: Optional[str] = None
|
|
490
|
+
labels: Optional[Dict[str, str]] = None
|
|
491
|
+
resource_name: Optional[str] = None
|
|
492
|
+
config: Optional[Dict[str, Any]] = None
|
|
493
|
+
|
|
494
|
+
|
|
314
495
|
class EndpointsBody(RequestBody):
|
|
315
496
|
"""The request body for the endpoint."""
|
|
316
497
|
cluster: str
|
|
@@ -332,6 +513,8 @@ class JobsLaunchBody(RequestBody):
|
|
|
332
513
|
"""The request body for the jobs launch endpoint."""
|
|
333
514
|
task: str
|
|
334
515
|
name: Optional[str]
|
|
516
|
+
pool: Optional[str] = None
|
|
517
|
+
num_jobs: Optional[int] = None
|
|
335
518
|
|
|
336
519
|
def to_kwargs(self) -> Dict[str, Any]:
|
|
337
520
|
kwargs = super().to_kwargs()
|
|
@@ -345,6 +528,25 @@ class JobsQueueBody(RequestBody):
|
|
|
345
528
|
refresh: bool = False
|
|
346
529
|
skip_finished: bool = False
|
|
347
530
|
all_users: bool = False
|
|
531
|
+
job_ids: Optional[List[int]] = None
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
class JobsQueueV2Body(RequestBody):
|
|
535
|
+
"""The request body for the jobs queue endpoint."""
|
|
536
|
+
refresh: bool = False
|
|
537
|
+
skip_finished: bool = False
|
|
538
|
+
all_users: bool = False
|
|
539
|
+
job_ids: Optional[List[int]] = None
|
|
540
|
+
user_match: Optional[str] = None
|
|
541
|
+
workspace_match: Optional[str] = None
|
|
542
|
+
name_match: Optional[str] = None
|
|
543
|
+
pool_match: Optional[str] = None
|
|
544
|
+
page: Optional[int] = None
|
|
545
|
+
limit: Optional[int] = None
|
|
546
|
+
statuses: Optional[List[str]] = None
|
|
547
|
+
# The fields to return in the response.
|
|
548
|
+
# Refer to the fields in the `class ManagedJobRecord` in `response.py`
|
|
549
|
+
fields: Optional[List[str]] = None
|
|
348
550
|
|
|
349
551
|
|
|
350
552
|
class JobsCancelBody(RequestBody):
|
|
@@ -353,6 +555,7 @@ class JobsCancelBody(RequestBody):
|
|
|
353
555
|
job_ids: Optional[List[int]] = None
|
|
354
556
|
all: bool = False
|
|
355
557
|
all_users: bool = False
|
|
558
|
+
pool: Optional[str] = None
|
|
356
559
|
|
|
357
560
|
|
|
358
561
|
class JobsLogsBody(RequestBody):
|
|
@@ -362,6 +565,7 @@ class JobsLogsBody(RequestBody):
|
|
|
362
565
|
follow: bool = True
|
|
363
566
|
controller: bool = False
|
|
364
567
|
refresh: bool = False
|
|
568
|
+
tail: Optional[int] = None
|
|
365
569
|
|
|
366
570
|
|
|
367
571
|
class RequestCancelBody(RequestBody):
|
|
@@ -375,6 +579,8 @@ class RequestStatusBody(pydantic.BaseModel):
|
|
|
375
579
|
"""The request body for the API request status endpoint."""
|
|
376
580
|
request_ids: Optional[List[str]] = None
|
|
377
581
|
all_status: bool = False
|
|
582
|
+
limit: Optional[int] = None
|
|
583
|
+
fields: Optional[List[str]] = None
|
|
378
584
|
|
|
379
585
|
|
|
380
586
|
class ServeUpBody(RequestBody):
|
|
@@ -425,6 +631,7 @@ class ServeLogsBody(RequestBody):
|
|
|
425
631
|
target: Union[str, serve.ServiceComponent]
|
|
426
632
|
replica_id: Optional[int] = None
|
|
427
633
|
follow: bool = True
|
|
634
|
+
tail: Optional[int] = None
|
|
428
635
|
|
|
429
636
|
|
|
430
637
|
class ServeDownloadLogsBody(RequestBody):
|
|
@@ -434,6 +641,7 @@ class ServeDownloadLogsBody(RequestBody):
|
|
|
434
641
|
targets: Optional[Union[str, serve.ServiceComponent,
|
|
435
642
|
List[Union[str, serve.ServiceComponent]]]]
|
|
436
643
|
replica_ids: Optional[List[int]] = None
|
|
644
|
+
tail: Optional[int] = None
|
|
437
645
|
|
|
438
646
|
|
|
439
647
|
class ServeStatusBody(RequestBody):
|
|
@@ -443,9 +651,10 @@ class ServeStatusBody(RequestBody):
|
|
|
443
651
|
|
|
444
652
|
class RealtimeGpuAvailabilityRequestBody(RequestBody):
|
|
445
653
|
"""The request body for the realtime GPU availability endpoint."""
|
|
446
|
-
context: Optional[str]
|
|
447
|
-
name_filter: Optional[str]
|
|
448
|
-
quantity_filter: Optional[int]
|
|
654
|
+
context: Optional[str] = None
|
|
655
|
+
name_filter: Optional[str] = None
|
|
656
|
+
quantity_filter: Optional[int] = None
|
|
657
|
+
is_ssh: Optional[bool] = None
|
|
449
658
|
|
|
450
659
|
|
|
451
660
|
class KubernetesNodeInfoRequestBody(RequestBody):
|
|
@@ -483,6 +692,19 @@ class LocalUpBody(RequestBody):
|
|
|
483
692
|
cleanup: bool = False
|
|
484
693
|
context_name: Optional[str] = None
|
|
485
694
|
password: Optional[str] = None
|
|
695
|
+
name: Optional[str] = None
|
|
696
|
+
port_start: Optional[int] = None
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
class LocalDownBody(RequestBody):
|
|
700
|
+
"""The request body for the local down endpoint."""
|
|
701
|
+
name: Optional[str] = None
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
class SSHUpBody(RequestBody):
|
|
705
|
+
"""The request body for the SSH up/down endpoints."""
|
|
706
|
+
infra: Optional[str] = None
|
|
707
|
+
cleanup: bool = False
|
|
486
708
|
|
|
487
709
|
|
|
488
710
|
class ServeTerminateReplicaBody(RequestBody):
|
|
@@ -514,7 +736,119 @@ class JobsDownloadLogsBody(RequestBody):
|
|
|
514
736
|
local_dir: str = constants.SKY_LOGS_DIRECTORY
|
|
515
737
|
|
|
516
738
|
|
|
739
|
+
class JobsPoolApplyBody(RequestBody):
|
|
740
|
+
"""The request body for the jobs pool apply endpoint."""
|
|
741
|
+
task: Optional[str] = None
|
|
742
|
+
workers: Optional[int] = None
|
|
743
|
+
pool_name: str
|
|
744
|
+
mode: serve.UpdateMode
|
|
745
|
+
|
|
746
|
+
def to_kwargs(self) -> Dict[str, Any]:
|
|
747
|
+
kwargs = super().to_kwargs()
|
|
748
|
+
if self.task is not None:
|
|
749
|
+
dag = common.process_mounts_in_task_on_api_server(
|
|
750
|
+
self.task, self.env_vars, workdir_only=False)
|
|
751
|
+
assert len(
|
|
752
|
+
dag.tasks) == 1, ('Must only specify one task in the DAG for '
|
|
753
|
+
'a pool.', dag)
|
|
754
|
+
kwargs['task'] = dag.tasks[0]
|
|
755
|
+
else:
|
|
756
|
+
kwargs['task'] = None
|
|
757
|
+
return kwargs
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
class JobsPoolDownBody(RequestBody):
|
|
761
|
+
"""The request body for the jobs pool down endpoint."""
|
|
762
|
+
pool_names: Optional[Union[str, List[str]]]
|
|
763
|
+
all: bool = False
|
|
764
|
+
purge: bool = False
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
class JobsPoolStatusBody(RequestBody):
|
|
768
|
+
"""The request body for the jobs pool status endpoint."""
|
|
769
|
+
pool_names: Optional[Union[str, List[str]]]
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
class JobsPoolLogsBody(RequestBody):
|
|
773
|
+
"""The request body for the jobs pool logs endpoint."""
|
|
774
|
+
pool_name: str
|
|
775
|
+
target: Union[str, serve.ServiceComponent]
|
|
776
|
+
worker_id: Optional[int] = None
|
|
777
|
+
follow: bool = True
|
|
778
|
+
tail: Optional[int] = None
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
class JobsPoolDownloadLogsBody(RequestBody):
|
|
782
|
+
"""The request body for the jobs pool download logs endpoint."""
|
|
783
|
+
pool_name: str
|
|
784
|
+
local_dir: str
|
|
785
|
+
targets: Optional[Union[str, serve.ServiceComponent,
|
|
786
|
+
List[Union[str, serve.ServiceComponent]]]]
|
|
787
|
+
worker_ids: Optional[List[int]] = None
|
|
788
|
+
tail: Optional[int] = None
|
|
789
|
+
|
|
790
|
+
|
|
517
791
|
class UploadZipFileResponse(pydantic.BaseModel):
|
|
518
792
|
"""The response body for the upload zip file endpoint."""
|
|
519
793
|
status: str
|
|
520
794
|
missing_chunks: Optional[List[str]] = None
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
class UpdateWorkspaceBody(RequestBody):
|
|
798
|
+
"""The request body for updating a specific workspace configuration."""
|
|
799
|
+
workspace_name: str = '' # Will be set from path parameter
|
|
800
|
+
config: Dict[str, Any]
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
class CreateWorkspaceBody(RequestBody):
|
|
804
|
+
"""The request body for creating a new workspace."""
|
|
805
|
+
workspace_name: str = '' # Will be set from path parameter
|
|
806
|
+
config: Dict[str, Any]
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
class DeleteWorkspaceBody(RequestBody):
|
|
810
|
+
"""The request body for deleting a workspace."""
|
|
811
|
+
workspace_name: str
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
class UpdateConfigBody(RequestBody):
|
|
815
|
+
"""The request body for updating the entire SkyPilot configuration."""
|
|
816
|
+
config: Dict[str, Any]
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
class GetConfigBody(RequestBody):
|
|
820
|
+
"""The request body for getting the entire SkyPilot configuration."""
|
|
821
|
+
pass
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
class CostReportBody(RequestBody):
|
|
825
|
+
"""The request body for the cost report endpoint."""
|
|
826
|
+
days: Optional[int] = 30
|
|
827
|
+
# we use hashes instead of names to avoid the case where
|
|
828
|
+
# the name is not unique
|
|
829
|
+
cluster_hashes: Optional[List[str]] = None
|
|
830
|
+
# Only return fields that are needed for the dashboard
|
|
831
|
+
# summary page
|
|
832
|
+
dashboard_summary_response: bool = False
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
class RequestPayload(BasePayload):
|
|
836
|
+
"""The payload for the requests."""
|
|
837
|
+
|
|
838
|
+
request_id: str
|
|
839
|
+
name: str
|
|
840
|
+
entrypoint: str
|
|
841
|
+
request_body: str
|
|
842
|
+
status: str
|
|
843
|
+
created_at: float
|
|
844
|
+
user_id: str
|
|
845
|
+
return_value: str
|
|
846
|
+
error: str
|
|
847
|
+
pid: Optional[int]
|
|
848
|
+
schedule_type: str
|
|
849
|
+
user_name: Optional[str] = None
|
|
850
|
+
# Resources the request operates on.
|
|
851
|
+
cluster_name: Optional[str] = None
|
|
852
|
+
status_msg: Optional[str] = None
|
|
853
|
+
should_retry: bool = False
|
|
854
|
+
finished_at: Optional[float] = None
|
|
@@ -90,7 +90,7 @@ class Precondition(abc.ABC):
|
|
|
90
90
|
while True:
|
|
91
91
|
if self.timeout > 0 and time.time() - start_time > self.timeout:
|
|
92
92
|
# Cancel the request on timeout.
|
|
93
|
-
api_requests.
|
|
93
|
+
await api_requests.set_request_failed_async(
|
|
94
94
|
self.request_id,
|
|
95
95
|
exceptions.RequestCancelled(
|
|
96
96
|
f'Request {self.request_id} precondition wait timed '
|
|
@@ -98,13 +98,15 @@ class Precondition(abc.ABC):
|
|
|
98
98
|
return False
|
|
99
99
|
|
|
100
100
|
# Check if the request has been cancelled
|
|
101
|
-
request = api_requests.
|
|
101
|
+
request = await api_requests.get_request_async(self.request_id,
|
|
102
|
+
fields=['status'])
|
|
102
103
|
if request is None:
|
|
103
104
|
logger.error(f'Request {self.request_id} not found')
|
|
104
105
|
return False
|
|
105
106
|
if request.status == api_requests.RequestStatus.CANCELLED:
|
|
106
107
|
logger.debug(f'Request {self.request_id} cancelled')
|
|
107
108
|
return False
|
|
109
|
+
del request
|
|
108
110
|
|
|
109
111
|
try:
|
|
110
112
|
met, status_msg = await self.check()
|
|
@@ -112,12 +114,11 @@ class Precondition(abc.ABC):
|
|
|
112
114
|
return True
|
|
113
115
|
if status_msg is not None and status_msg != last_status_msg:
|
|
114
116
|
# Update the status message if it has changed.
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
req.status_msg = status_msg
|
|
117
|
+
await api_requests.update_status_msg_async(
|
|
118
|
+
self.request_id, status_msg)
|
|
118
119
|
last_status_msg = status_msg
|
|
119
120
|
except (Exception, SystemExit, KeyboardInterrupt) as e: # pylint: disable=broad-except
|
|
120
|
-
api_requests.
|
|
121
|
+
await api_requests.set_request_failed_async(self.request_id, e)
|
|
121
122
|
logger.info(f'Request {self.request_id} failed due to '
|
|
122
123
|
f'{common_utils.format_exception(e)}')
|
|
123
124
|
return False
|
|
@@ -145,10 +146,9 @@ class ClusterStartCompletePrecondition(Precondition):
|
|
|
145
146
|
self.cluster_name = cluster_name
|
|
146
147
|
|
|
147
148
|
async def check(self) -> Tuple[bool, Optional[str]]:
|
|
148
|
-
|
|
149
|
+
cluster_status = global_user_state.get_status_from_cluster_name(
|
|
149
150
|
self.cluster_name)
|
|
150
|
-
if
|
|
151
|
-
cluster_record['status'] is status_lib.ClusterStatus.UP):
|
|
151
|
+
if cluster_status is status_lib.ClusterStatus.UP:
|
|
152
152
|
# Shortcut for started clusters, ignore cluster not found
|
|
153
153
|
# since the cluster record might not yet be created by the
|
|
154
154
|
# launch task.
|
|
@@ -161,14 +161,18 @@ class ClusterStartCompletePrecondition(Precondition):
|
|
|
161
161
|
# We unify these situations into a single state: the process of starting
|
|
162
162
|
# the cluster is done (either normally or abnormally) but cluster is not
|
|
163
163
|
# in UP status.
|
|
164
|
-
requests = api_requests.
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
164
|
+
requests = await api_requests.get_request_tasks_async(
|
|
165
|
+
req_filter=api_requests.RequestTaskFilter(
|
|
166
|
+
status=[
|
|
167
|
+
api_requests.RequestStatus.PENDING,
|
|
168
|
+
api_requests.RequestStatus.RUNNING
|
|
169
|
+
],
|
|
170
|
+
include_request_names=['sky.launch', 'sky.start'],
|
|
171
|
+
cluster_names=[self.cluster_name],
|
|
172
|
+
# Only get the request ID to avoid fetching the whole request.
|
|
173
|
+
# We're only interested in the count, not the whole request.
|
|
174
|
+
fields=['request_id']))
|
|
171
175
|
if len(requests) == 0:
|
|
172
|
-
# No
|
|
176
|
+
# No running or pending tasks, the start process is done.
|
|
173
177
|
return True, None
|
|
174
178
|
return False, f'Waiting for cluster {self.cluster_name} to be UP.'
|