skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/server/uvicorn.py
CHANGED
|
@@ -3,24 +3,238 @@
|
|
|
3
3
|
This module is a wrapper around uvicorn to customize the behavior of the
|
|
4
4
|
server.
|
|
5
5
|
"""
|
|
6
|
+
import asyncio
|
|
7
|
+
import logging
|
|
6
8
|
import os
|
|
9
|
+
import signal
|
|
10
|
+
import sys
|
|
7
11
|
import threading
|
|
8
|
-
|
|
12
|
+
import time
|
|
13
|
+
from types import FrameType
|
|
14
|
+
from typing import Optional, Union
|
|
9
15
|
|
|
16
|
+
import filelock
|
|
10
17
|
import uvicorn
|
|
11
18
|
from uvicorn.supervisors import multiprocess
|
|
12
19
|
|
|
20
|
+
from sky import sky_logging
|
|
21
|
+
from sky.server import daemons
|
|
22
|
+
from sky.server import metrics as metrics_lib
|
|
23
|
+
from sky.server import state
|
|
24
|
+
from sky.server.requests import requests as requests_lib
|
|
25
|
+
from sky.skylet import constants
|
|
26
|
+
from sky.utils import context_utils
|
|
27
|
+
from sky.utils import env_options
|
|
28
|
+
from sky.utils import perf_utils
|
|
13
29
|
from sky.utils import subprocess_utils
|
|
30
|
+
from sky.utils.db import db_utils
|
|
14
31
|
|
|
32
|
+
logger = sky_logging.init_logger(__name__)
|
|
15
33
|
|
|
16
|
-
|
|
34
|
+
# File lock path for coordinating graceful shutdown across processes
|
|
35
|
+
_GRACEFUL_SHUTDOWN_LOCK_PATH = '/tmp/skypilot_graceful_shutdown.lock'
|
|
36
|
+
|
|
37
|
+
# Interval to check for on-going requests.
|
|
38
|
+
_WAIT_REQUESTS_INTERVAL_SECONDS = 5
|
|
39
|
+
|
|
40
|
+
# Timeout for waiting for on-going requests to finish.
|
|
41
|
+
try:
|
|
42
|
+
_WAIT_REQUESTS_TIMEOUT_SECONDS = int(
|
|
43
|
+
os.environ.get(constants.GRACE_PERIOD_SECONDS_ENV_VAR, '60'))
|
|
44
|
+
except ValueError:
|
|
45
|
+
_WAIT_REQUESTS_TIMEOUT_SECONDS = 60
|
|
46
|
+
|
|
47
|
+
# TODO(aylei): use decorator to register requests that need to be proactively
|
|
48
|
+
# cancelled instead of hardcoding here.
|
|
49
|
+
_RETRIABLE_REQUEST_NAMES = {
|
|
50
|
+
'sky.logs',
|
|
51
|
+
'sky.jobs.logs',
|
|
52
|
+
'sky.serve.logs',
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def add_timestamp_prefix_for_server_logs() -> None:
|
|
57
|
+
"""Configure logging for API server.
|
|
58
|
+
|
|
59
|
+
Note: we only do this in the main API server process and uvicorn processes,
|
|
60
|
+
to avoid affecting executor logs (including in modules like
|
|
61
|
+
sky.server.requests) that may get sent to the client.
|
|
62
|
+
"""
|
|
63
|
+
server_logger = sky_logging.init_logger('sky.server')
|
|
64
|
+
# Clear existing handlers first to prevent duplicates
|
|
65
|
+
server_logger.handlers.clear()
|
|
66
|
+
# Disable propagation to avoid the root logger of SkyPilot being affected
|
|
67
|
+
server_logger.propagate = False
|
|
68
|
+
# Add date prefix to the log message printed by loggers under
|
|
69
|
+
# server.
|
|
70
|
+
stream_handler = logging.StreamHandler(sys.stdout)
|
|
71
|
+
if env_options.Options.SHOW_DEBUG_INFO.get():
|
|
72
|
+
stream_handler.setLevel(logging.DEBUG)
|
|
73
|
+
else:
|
|
74
|
+
stream_handler.setLevel(logging.INFO)
|
|
75
|
+
stream_handler.flush = sys.stdout.flush # type: ignore
|
|
76
|
+
stream_handler.setFormatter(sky_logging.FORMATTER)
|
|
77
|
+
server_logger.addHandler(stream_handler)
|
|
78
|
+
# Add date prefix to the log message printed by uvicorn.
|
|
79
|
+
for name in ['uvicorn', 'uvicorn.access']:
|
|
80
|
+
uvicorn_logger = logging.getLogger(name)
|
|
81
|
+
uvicorn_logger.handlers.clear()
|
|
82
|
+
uvicorn_logger.addHandler(stream_handler)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class Server(uvicorn.Server):
|
|
86
|
+
"""Server wrapper for uvicorn.
|
|
87
|
+
|
|
88
|
+
Extended functionalities:
|
|
89
|
+
- Handle exit signal and perform custom graceful shutdown.
|
|
90
|
+
- Run the server process with contextually aware.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self,
|
|
94
|
+
config: uvicorn.Config,
|
|
95
|
+
max_db_connections: Optional[int] = None):
|
|
96
|
+
super().__init__(config=config)
|
|
97
|
+
self.exiting: bool = False
|
|
98
|
+
self.max_db_connections = max_db_connections
|
|
99
|
+
|
|
100
|
+
def handle_exit(self, sig: int, frame: Union[FrameType, None]) -> None:
|
|
101
|
+
"""Handle exit signal.
|
|
102
|
+
|
|
103
|
+
When a server process receives a SIGTERM or SIGINT signal, a graceful
|
|
104
|
+
shutdown will be initiated. If a SIGINT signal is received again, the
|
|
105
|
+
server will be forcefully shutdown.
|
|
106
|
+
"""
|
|
107
|
+
if self.exiting and sig == signal.SIGINT:
|
|
108
|
+
# The server has been siganled to exit and recieved a SIGINT again,
|
|
109
|
+
# do force shutdown.
|
|
110
|
+
logger.info('Force shutdown.')
|
|
111
|
+
self.should_exit = True
|
|
112
|
+
super().handle_exit(sig, frame)
|
|
113
|
+
return
|
|
114
|
+
if not self.exiting:
|
|
115
|
+
self.exiting = True
|
|
116
|
+
# Perform graceful shutdown in a separate thread to avoid blocking
|
|
117
|
+
# the main thread.
|
|
118
|
+
threading.Thread(target=self._graceful_shutdown,
|
|
119
|
+
args=(sig, frame),
|
|
120
|
+
daemon=True).start()
|
|
121
|
+
|
|
122
|
+
def _graceful_shutdown(self, sig: int, frame: Union[FrameType,
|
|
123
|
+
None]) -> None:
|
|
124
|
+
"""Perform graceful shutdown."""
|
|
125
|
+
# Block new requests so that we can wait until all on-going requests
|
|
126
|
+
# are finished. Note that /api/$verb operations are still allowed in
|
|
127
|
+
# this stage to ensure the client can still operate the on-going
|
|
128
|
+
# requests, e.g. /api/logs, /api/cancel, etc.
|
|
129
|
+
logger.info('Block new requests being submitted in worker '
|
|
130
|
+
f'{os.getpid()}.')
|
|
131
|
+
state.set_block_requests(True)
|
|
132
|
+
# Ensure the shutting_down are set on all workers before next step.
|
|
133
|
+
# TODO(aylei): hacky, need a reliable solution.
|
|
134
|
+
time.sleep(1)
|
|
135
|
+
|
|
136
|
+
lock = filelock.FileLock(_GRACEFUL_SHUTDOWN_LOCK_PATH)
|
|
137
|
+
# Elect a coordinator process to handle on-going requests check
|
|
138
|
+
with lock.acquire():
|
|
139
|
+
logger.info(f'Worker {os.getpid()} elected as shutdown coordinator')
|
|
140
|
+
self._wait_requests()
|
|
141
|
+
|
|
142
|
+
logger.info('Shutting down server...')
|
|
143
|
+
self.should_exit = True
|
|
144
|
+
super().handle_exit(sig, frame)
|
|
145
|
+
|
|
146
|
+
def _wait_requests(self) -> None:
|
|
147
|
+
"""Wait until all on-going requests are finished or cancelled."""
|
|
148
|
+
start_time = time.time()
|
|
149
|
+
while True:
|
|
150
|
+
statuses = [
|
|
151
|
+
requests_lib.RequestStatus.PENDING,
|
|
152
|
+
requests_lib.RequestStatus.RUNNING,
|
|
153
|
+
]
|
|
154
|
+
requests = [(request_task.request_id, request_task.name)
|
|
155
|
+
for request_task in requests_lib.get_request_tasks(
|
|
156
|
+
req_filter=requests_lib.RequestTaskFilter(
|
|
157
|
+
status=statuses, fields=['request_id', 'name']))
|
|
158
|
+
]
|
|
159
|
+
if not requests:
|
|
160
|
+
break
|
|
161
|
+
logger.info(f'{len(requests)} on-going requests '
|
|
162
|
+
'found, waiting for them to finish...')
|
|
163
|
+
# Proactively cancel internal requests and logs requests since
|
|
164
|
+
# they can run for infinite time.
|
|
165
|
+
internal_request_ids = {
|
|
166
|
+
d.id for d in daemons.INTERNAL_REQUEST_DAEMONS
|
|
167
|
+
}
|
|
168
|
+
if time.time() - start_time > _WAIT_REQUESTS_TIMEOUT_SECONDS:
|
|
169
|
+
logger.warning('Timeout waiting for on-going requests to '
|
|
170
|
+
'finish, cancelling all on-going requests.')
|
|
171
|
+
for request_id, _ in requests:
|
|
172
|
+
self.interrupt_request_for_retry(request_id)
|
|
173
|
+
break
|
|
174
|
+
interrupted = 0
|
|
175
|
+
for request_id, name in requests:
|
|
176
|
+
if (name in _RETRIABLE_REQUEST_NAMES or
|
|
177
|
+
request_id in internal_request_ids):
|
|
178
|
+
self.interrupt_request_for_retry(request_id)
|
|
179
|
+
interrupted += 1
|
|
180
|
+
# TODO(aylei): interrupt pending requests to accelerate the
|
|
181
|
+
# shutdown.
|
|
182
|
+
# If some requests are not interrupted, wait for them to finish,
|
|
183
|
+
# otherwise we just check again immediately to accelerate the
|
|
184
|
+
# shutdown process.
|
|
185
|
+
if interrupted < len(requests):
|
|
186
|
+
time.sleep(_WAIT_REQUESTS_INTERVAL_SECONDS)
|
|
187
|
+
|
|
188
|
+
def interrupt_request_for_retry(self, request_id: str) -> None:
|
|
189
|
+
"""Interrupt a request for retry."""
|
|
190
|
+
with requests_lib.update_request(request_id) as req:
|
|
191
|
+
if req is None:
|
|
192
|
+
return
|
|
193
|
+
if req.pid is not None:
|
|
194
|
+
try:
|
|
195
|
+
os.kill(req.pid, signal.SIGTERM)
|
|
196
|
+
except ProcessLookupError:
|
|
197
|
+
logger.debug(f'Process {req.pid} already finished.')
|
|
198
|
+
req.status = requests_lib.RequestStatus.CANCELLED
|
|
199
|
+
req.should_retry = True
|
|
200
|
+
logger.info(
|
|
201
|
+
f'Request {request_id} interrupted and will be retried by client.')
|
|
202
|
+
|
|
203
|
+
def run(self, *args, **kwargs):
|
|
204
|
+
"""Run the server process."""
|
|
205
|
+
if self.max_db_connections is not None:
|
|
206
|
+
db_utils.set_max_connections(self.max_db_connections)
|
|
207
|
+
add_timestamp_prefix_for_server_logs()
|
|
208
|
+
context_utils.hijack_sys_attrs()
|
|
209
|
+
# Use default loop policy of uvicorn (use uvloop if available).
|
|
210
|
+
self.config.setup_event_loop()
|
|
211
|
+
lag_threshold = perf_utils.get_loop_lag_threshold()
|
|
212
|
+
if lag_threshold is not None:
|
|
213
|
+
event_loop = asyncio.get_event_loop()
|
|
214
|
+
# Same as set PYTHONASYNCIODEBUG=1, but with custom threshold.
|
|
215
|
+
event_loop.set_debug(True)
|
|
216
|
+
event_loop.slow_callback_duration = lag_threshold
|
|
217
|
+
stop_monitor = threading.Event()
|
|
218
|
+
monitor = threading.Thread(target=metrics_lib.process_monitor,
|
|
219
|
+
args=('server', stop_monitor),
|
|
220
|
+
daemon=True)
|
|
221
|
+
monitor.start()
|
|
222
|
+
try:
|
|
223
|
+
with self.capture_signals():
|
|
224
|
+
asyncio.run(self.serve(*args, **kwargs))
|
|
225
|
+
finally:
|
|
226
|
+
stop_monitor.set()
|
|
227
|
+
monitor.join()
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def run(config: uvicorn.Config, max_db_connections: Optional[int] = None):
|
|
17
231
|
"""Run unvicorn server."""
|
|
18
232
|
if config.reload:
|
|
19
233
|
# Reload and multi-workers are mutually exclusive
|
|
20
234
|
# in uvicorn. Since we do not use reload now, simply
|
|
21
235
|
# guard by an exception.
|
|
22
236
|
raise ValueError('Reload is not supported yet.')
|
|
23
|
-
server =
|
|
237
|
+
server = Server(config=config, max_db_connections=max_db_connections)
|
|
24
238
|
try:
|
|
25
239
|
if config.workers is not None and config.workers > 1:
|
|
26
240
|
sock = config.bind_socket()
|
sky/server/versions.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""API versioning module."""
|
|
2
|
+
|
|
3
|
+
import contextvars
|
|
4
|
+
import functools
|
|
5
|
+
import re
|
|
6
|
+
from typing import Callable, Literal, Mapping, NamedTuple, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
import colorama
|
|
9
|
+
from packaging import version as version_lib
|
|
10
|
+
|
|
11
|
+
import sky
|
|
12
|
+
from sky import exceptions
|
|
13
|
+
from sky import sky_logging
|
|
14
|
+
from sky.server import constants
|
|
15
|
+
from sky.utils import ux_utils
|
|
16
|
+
|
|
17
|
+
logger = sky_logging.init_logger(__name__)
|
|
18
|
+
|
|
19
|
+
CLIENT_TOO_OLD_ERROR = (
|
|
20
|
+
f'{colorama.Fore.YELLOW}Your SkyPilot client version is too old:'
|
|
21
|
+
'{remote_version}\n'
|
|
22
|
+
f'{colorama.Style.RESET_ALL}'
|
|
23
|
+
'The server is running on {local_version} and the minimum compatible '
|
|
24
|
+
'version is {min_version}.\n'
|
|
25
|
+
f'Upgrade your client with:\n{colorama.Fore.YELLOW}'
|
|
26
|
+
'{command}'
|
|
27
|
+
f'{colorama.Style.RESET_ALL}')
|
|
28
|
+
SERVER_TOO_OLD_ERROR = (
|
|
29
|
+
f'{colorama.Fore.YELLOW}Your SkyPilot API server version is too old: '
|
|
30
|
+
'{remote_version}\n'
|
|
31
|
+
f'{colorama.Style.RESET_ALL}'
|
|
32
|
+
'The client is running on {local_version} and the minimum compatible '
|
|
33
|
+
'version is {min_version}.\n'
|
|
34
|
+
'Contact your administrator to upgrade the remote API server or downgrade '
|
|
35
|
+
f'your client with:\n{colorama.Fore.YELLOW}'
|
|
36
|
+
'{command}'
|
|
37
|
+
f'{colorama.Style.RESET_ALL}')
|
|
38
|
+
|
|
39
|
+
# SkyPilot dev version.
|
|
40
|
+
DEV_VERSION = '1.0.0-dev0'
|
|
41
|
+
|
|
42
|
+
_REMOTE_TO_ERROR = {
|
|
43
|
+
'client': CLIENT_TOO_OLD_ERROR,
|
|
44
|
+
'server': SERVER_TOO_OLD_ERROR,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Context-local (thread or cooroutine) remote API version, captured during
|
|
48
|
+
# communication with the remote peer.
|
|
49
|
+
_remote_api_version: contextvars.ContextVar[Optional[int]] = \
|
|
50
|
+
contextvars.ContextVar('remote_api_version', default=None)
|
|
51
|
+
_remote_version: contextvars.ContextVar[str] = \
|
|
52
|
+
contextvars.ContextVar('remote_version', default='unknown')
|
|
53
|
+
_reminded_for_minor_version_upgrade = False
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_remote_api_version() -> Optional[int]:
|
|
57
|
+
return _remote_api_version.get()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def set_remote_api_version(api_version: int) -> None:
|
|
61
|
+
_remote_api_version.set(api_version)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_remote_version() -> str:
|
|
65
|
+
return _remote_version.get()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def set_remote_version(version: str) -> None:
|
|
69
|
+
_remote_version.set(version)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class VersionInfo(NamedTuple):
|
|
73
|
+
api_version: int
|
|
74
|
+
version: str
|
|
75
|
+
error: Optional[str] = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def check_compatibility_at_server(
|
|
79
|
+
client_headers: Mapping[str, str]) -> Optional[VersionInfo]:
|
|
80
|
+
"""Check API compatibility between client and server."""
|
|
81
|
+
return _check_version_compatibility(client_headers, 'client')
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def check_compatibility_at_client(
|
|
85
|
+
server_headers: Mapping[str, str]) -> Optional[VersionInfo]:
|
|
86
|
+
"""Check API compatibility between client and server."""
|
|
87
|
+
return _check_version_compatibility(server_headers, 'server')
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _check_version_compatibility(
|
|
91
|
+
remote_headers: Mapping[str, str],
|
|
92
|
+
remote_type: Literal['client', 'server']) -> Optional[VersionInfo]:
|
|
93
|
+
"""Check API compatibility between client and server.
|
|
94
|
+
|
|
95
|
+
This function can be called at both client and server side, where the
|
|
96
|
+
headers should contain the version info of the remote.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
remote_headers: The headers of the request/response sent from the
|
|
100
|
+
remote.
|
|
101
|
+
remote_type: The type of the remote, used to determine the error
|
|
102
|
+
message. Valid options are 'client' and 'server'.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
The version info of the remote, None if the version info is not found
|
|
106
|
+
in the headers for backward compatibility.
|
|
107
|
+
"""
|
|
108
|
+
api_version_str = remote_headers.get(constants.API_VERSION_HEADER)
|
|
109
|
+
version = remote_headers.get(constants.VERSION_HEADER)
|
|
110
|
+
if version is None or api_version_str is None:
|
|
111
|
+
return None
|
|
112
|
+
try:
|
|
113
|
+
api_version = int(api_version_str)
|
|
114
|
+
except ValueError:
|
|
115
|
+
# The future change is expected to not break the compatibility of this
|
|
116
|
+
# header, so we are encountering a bug or a malicious request here,
|
|
117
|
+
# just raise an error.
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f'Header {constants.API_VERSION_HEADER}: '
|
|
120
|
+
f'{api_version_str} is not a valid API version.') from None
|
|
121
|
+
|
|
122
|
+
if api_version < constants.MIN_COMPATIBLE_API_VERSION:
|
|
123
|
+
if remote_type == 'server':
|
|
124
|
+
# Hint the user to downgrade to client to the remote server server.
|
|
125
|
+
server_version, server_commit = parse_readable_version(version)
|
|
126
|
+
command = install_version_command(server_version, server_commit)
|
|
127
|
+
else:
|
|
128
|
+
# Hint the client to upgrade to upgrade the server version
|
|
129
|
+
command = install_version_command(sky.__version__, sky.__commit__)
|
|
130
|
+
return VersionInfo(api_version=api_version,
|
|
131
|
+
version=version,
|
|
132
|
+
error=_REMOTE_TO_ERROR[remote_type].format(
|
|
133
|
+
remote_version=version,
|
|
134
|
+
local_version=get_local_readable_version(),
|
|
135
|
+
min_version=constants.MIN_COMPATIBLE_VERSION,
|
|
136
|
+
command=command,
|
|
137
|
+
))
|
|
138
|
+
|
|
139
|
+
if remote_type == 'server':
|
|
140
|
+
# Only print the reminder at client-side.
|
|
141
|
+
_remind_minor_version_upgrade(version)
|
|
142
|
+
|
|
143
|
+
return VersionInfo(api_version=api_version, version=version)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def get_local_readable_version() -> str:
|
|
147
|
+
"""Get the readable version of the SkyPilot code loaded in current process.
|
|
148
|
+
|
|
149
|
+
For dev version, the version is formatted as: 1.0.0-dev0 (commit: 1234567)
|
|
150
|
+
to make it meaningful for users.
|
|
151
|
+
"""
|
|
152
|
+
if sky.__version__ == DEV_VERSION:
|
|
153
|
+
return f'{sky.__version__} (commit: {sky.__commit__})'
|
|
154
|
+
else:
|
|
155
|
+
return sky.__version__
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def parse_readable_version(version: str) -> Tuple[str, Optional[str]]:
|
|
159
|
+
"""Parse a readable produced by get_local_readable_version.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
version: The version string to parse.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
A tuple of (version, optional_commit) where:
|
|
166
|
+
- version: The base version string (e.g., "1.0.0-dev0")
|
|
167
|
+
- optional_commit: The commit hash if present, None otherwise
|
|
168
|
+
"""
|
|
169
|
+
# Check if this is a dev version with commit info
|
|
170
|
+
# Format: "1.0.0-dev0 (commit: 1234567)"
|
|
171
|
+
commit_pattern = r'^(.+) \(commit: ([a-f0-9]+)\)$'
|
|
172
|
+
match = re.match(commit_pattern, version)
|
|
173
|
+
|
|
174
|
+
if match:
|
|
175
|
+
base_version = match.group(1)
|
|
176
|
+
commit = match.group(2)
|
|
177
|
+
return base_version, commit
|
|
178
|
+
else:
|
|
179
|
+
# Regular version without commit info
|
|
180
|
+
return version, None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def install_version_command(version: str, commit: Optional[str] = None) -> str:
|
|
184
|
+
if version == DEV_VERSION:
|
|
185
|
+
if commit is not None:
|
|
186
|
+
return ('pip install git+https://github.com/skypilot-org/skypilot@'
|
|
187
|
+
f'{commit}')
|
|
188
|
+
elif 'dev' in version:
|
|
189
|
+
return f'pip install -U "skypilot-nightly=={version}"'
|
|
190
|
+
return f'pip install -U "skypilot=={version}"'
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _remind_minor_version_upgrade(remote_version: str) -> None:
|
|
194
|
+
"""Remind the user to upgrade the CLI/SDK."""
|
|
195
|
+
# Only print the reminder once per process.
|
|
196
|
+
global _reminded_for_minor_version_upgrade
|
|
197
|
+
if _reminded_for_minor_version_upgrade:
|
|
198
|
+
return
|
|
199
|
+
# Skip for dev versions.
|
|
200
|
+
if 'dev' in sky.__version__ or 'dev' in remote_version:
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
# Remove the commit info if any.
|
|
204
|
+
remote_base_version, _ = parse_readable_version(remote_version)
|
|
205
|
+
|
|
206
|
+
# Parse semver for both local and remote versions
|
|
207
|
+
try:
|
|
208
|
+
local = version_lib.parse(sky.__version__)
|
|
209
|
+
remote = version_lib.parse(remote_base_version)
|
|
210
|
+
|
|
211
|
+
# Check if local version is behind remote version, ignore patch version.
|
|
212
|
+
if (local.major, local.minor) < (remote.major, remote.minor):
|
|
213
|
+
logger.warning(
|
|
214
|
+
f'{colorama.Fore.YELLOW}The SkyPilot API server is running in '
|
|
215
|
+
f'version {remote_version}, which is newer than your client '
|
|
216
|
+
f'version {sky.__version__}. The compatibility for your '
|
|
217
|
+
f'current version might be dropped in the next server upgrade.'
|
|
218
|
+
f'\nConsider upgrading your client with:\n'
|
|
219
|
+
f'{install_version_command(remote_version)}'
|
|
220
|
+
f'{colorama.Style.RESET_ALL}')
|
|
221
|
+
_reminded_for_minor_version_upgrade = True
|
|
222
|
+
except version_lib.InvalidVersion:
|
|
223
|
+
# Skip for non-valid semver (probabely a dev version)
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# TODO(aylei): maybe we can use similiar approach to mark a new argument can
|
|
228
|
+
# only be used in the new server version.
|
|
229
|
+
def minimal_api_version(min_version: int) -> Callable:
|
|
230
|
+
"""Decorator to enforce a minimum remote API version for an SDK function.
|
|
231
|
+
|
|
232
|
+
New SDK method must be decorated with this decorator to make sure it raises
|
|
233
|
+
an readable error when the remote server is not upgraded.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
min_version: The minimum remote API version required to call the
|
|
237
|
+
function.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
A decorator function that checks API version before execution.
|
|
241
|
+
|
|
242
|
+
Raises:
|
|
243
|
+
APINotSupportedError: If the remote API version is below the minimum
|
|
244
|
+
required.
|
|
245
|
+
"""
|
|
246
|
+
|
|
247
|
+
def decorator(func: Callable) -> Callable:
|
|
248
|
+
|
|
249
|
+
@functools.wraps(func)
|
|
250
|
+
def wrapper(*args, **kwargs):
|
|
251
|
+
remote_api_version = get_remote_api_version()
|
|
252
|
+
if remote_api_version is None:
|
|
253
|
+
return func(*args, **kwargs)
|
|
254
|
+
if remote_api_version < min_version:
|
|
255
|
+
with ux_utils.print_exception_no_traceback():
|
|
256
|
+
hint = 'Please upgrade the remote server.'
|
|
257
|
+
# The client runs in a released version, do better hint.
|
|
258
|
+
if 'dev' not in sky.__version__:
|
|
259
|
+
hint = (
|
|
260
|
+
f'Upgrade the remote server to {sky.__version__} '
|
|
261
|
+
'and re-run the command.')
|
|
262
|
+
raise exceptions.APINotSupportedError(
|
|
263
|
+
f'Function {func.__name__} is introduced after the '
|
|
264
|
+
f'remote server version {get_remote_version()!r} is '
|
|
265
|
+
f'released. {hint}')
|
|
266
|
+
return func(*args, **kwargs)
|
|
267
|
+
|
|
268
|
+
return wrapper
|
|
269
|
+
|
|
270
|
+
return decorator
|
sky/setup_files/MANIFEST.in
CHANGED
|
@@ -9,6 +9,7 @@ include sky/skylet/providers/ibm/*
|
|
|
9
9
|
include sky/skylet/providers/scp/*
|
|
10
10
|
include sky/skylet/providers/*.py
|
|
11
11
|
include sky/skylet/ray_patches/*.patch
|
|
12
|
+
include sky/skylet/ray_patches/*.diff
|
|
12
13
|
include sky/jobs/dashboard/*
|
|
13
14
|
include sky/jobs/dashboard/templates/*
|
|
14
15
|
include sky/jobs/dashboard/static/*
|
|
@@ -16,3 +17,7 @@ include sky/templates/*
|
|
|
16
17
|
include sky/utils/kubernetes/*
|
|
17
18
|
include sky/server/html/*
|
|
18
19
|
recursive-include sky/dashboard/out *
|
|
20
|
+
include sky/users/*.conf
|
|
21
|
+
include sky/utils/*.sh
|
|
22
|
+
include sky/setup_files/alembic.ini
|
|
23
|
+
recursive-include sky/schemas/db *
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# alembic configuration for global user state, jobs state, and sky config db migrations.
|
|
2
|
+
|
|
3
|
+
[DEFAULT]
|
|
4
|
+
# path to migration scripts.
|
|
5
|
+
# this is typically a path given in POSIX (e.g. forward slashes)
|
|
6
|
+
# format, relative to the token %(here)s which refers to the location of this
|
|
7
|
+
# ini file
|
|
8
|
+
script_location = %(here)s/../schemas/db
|
|
9
|
+
|
|
10
|
+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
|
11
|
+
# Uncomment the line below if you want the files to be prepended with date and time
|
|
12
|
+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
|
13
|
+
# for all available tokens
|
|
14
|
+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
|
15
|
+
|
|
16
|
+
# sys.path path, will be prepended to sys.path if present.
|
|
17
|
+
# defaults to the current working directory. for multiple paths, the path separator
|
|
18
|
+
# is defined by "path_separator" below.
|
|
19
|
+
prepend_sys_path = .
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# timezone to use when rendering the date within the migration file
|
|
23
|
+
# as well as the filename.
|
|
24
|
+
# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
|
|
25
|
+
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
|
26
|
+
# string value is passed to ZoneInfo()
|
|
27
|
+
# leave blank for localtime
|
|
28
|
+
# timezone =
|
|
29
|
+
|
|
30
|
+
# max length of characters to apply to the "slug" field
|
|
31
|
+
# truncate_slug_length = 40
|
|
32
|
+
|
|
33
|
+
# set to 'true' to run the environment during
|
|
34
|
+
# the 'revision' command, regardless of autogenerate
|
|
35
|
+
# revision_environment = false
|
|
36
|
+
|
|
37
|
+
# set to 'true' to allow .pyc and .pyo files without
|
|
38
|
+
# a source .py file to be detected as revisions in the
|
|
39
|
+
# versions/ directory
|
|
40
|
+
# sourceless = false
|
|
41
|
+
|
|
42
|
+
# version location specification; This defaults
|
|
43
|
+
# to <script_location>/versions. When using multiple version
|
|
44
|
+
# directories, initial revisions must be specified with --version-path.
|
|
45
|
+
# The path separator used here should be the separator specified by "path_separator"
|
|
46
|
+
# below.
|
|
47
|
+
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
|
|
48
|
+
|
|
49
|
+
# path_separator; This indicates what character is used to split lists of file
|
|
50
|
+
# paths, including version_locations and prepend_sys_path within configparser
|
|
51
|
+
# files such as alembic.ini.
|
|
52
|
+
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
|
|
53
|
+
# to provide os-dependent path splitting.
|
|
54
|
+
#
|
|
55
|
+
# Note that in order to support legacy alembic.ini files, this default does NOT
|
|
56
|
+
# take place if path_separator is not present in alembic.ini. If this
|
|
57
|
+
# option is omitted entirely, fallback logic is as follows:
|
|
58
|
+
#
|
|
59
|
+
# 1. Parsing of the version_locations option falls back to using the legacy
|
|
60
|
+
# "version_path_separator" key, which if absent then falls back to the legacy
|
|
61
|
+
# behavior of splitting on spaces and/or commas.
|
|
62
|
+
# 2. Parsing of the prepend_sys_path option falls back to the legacy
|
|
63
|
+
# behavior of splitting on spaces, commas, or colons.
|
|
64
|
+
#
|
|
65
|
+
# Valid values for path_separator are:
|
|
66
|
+
#
|
|
67
|
+
# path_separator = :
|
|
68
|
+
# path_separator = ;
|
|
69
|
+
# path_separator = space
|
|
70
|
+
# path_separator = newline
|
|
71
|
+
#
|
|
72
|
+
# Use os.pathsep. Default configuration used for new projects.
|
|
73
|
+
path_separator = os
|
|
74
|
+
|
|
75
|
+
# set to 'true' to search source files recursively
|
|
76
|
+
# in each "version_locations" directory
|
|
77
|
+
# new in Alembic version 1.10
|
|
78
|
+
# recursive_version_locations = false
|
|
79
|
+
|
|
80
|
+
# the output encoding used when revision files
|
|
81
|
+
# are written from script.py.mako
|
|
82
|
+
# output_encoding = utf-8
|
|
83
|
+
|
|
84
|
+
# database URL. This is consumed by the user-maintained env.py script only.
|
|
85
|
+
# other means of configuring database URLs may be customized within the env.py
|
|
86
|
+
# file.
|
|
87
|
+
# sqlalchemy.url = driver://user:pass@localhost/dbname
|
|
88
|
+
|
|
89
|
+
[state_db]
|
|
90
|
+
version_locations = %(here)s/../schemas/db/global_user_state
|
|
91
|
+
version_table = alembic_version_state_db
|
|
92
|
+
|
|
93
|
+
[spot_jobs_db]
|
|
94
|
+
version_locations = %(here)s/../schemas/db/spot_jobs
|
|
95
|
+
version_table = alembic_version_spot_jobs_db
|
|
96
|
+
|
|
97
|
+
[serve_db]
|
|
98
|
+
version_locations = %(here)s/../schemas/db/serve_state
|
|
99
|
+
version_table = alembic_version_serve_state_db
|
|
100
|
+
|
|
101
|
+
[sky_config_db]
|
|
102
|
+
version_locations = %(here)s/../schemas/db/skypilot_config
|
|
103
|
+
version_table = alembic_version_sky_config_db
|
|
104
|
+
|
|
105
|
+
[post_write_hooks]
|
|
106
|
+
# post_write_hooks defines scripts or Python functions that are run
|
|
107
|
+
# on newly generated revision scripts. See the documentation for further
|
|
108
|
+
# detail and examples
|
|
109
|
+
|
|
110
|
+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
|
111
|
+
# hooks = black
|
|
112
|
+
# black.type = console_scripts
|
|
113
|
+
# black.entrypoint = black
|
|
114
|
+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
|
115
|
+
|
|
116
|
+
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
|
117
|
+
# hooks = ruff
|
|
118
|
+
# ruff.type = exec
|
|
119
|
+
# ruff.executable = %(here)s/.venv/bin/ruff
|
|
120
|
+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
|
121
|
+
|
|
122
|
+
# Logging configuration. This is also consumed by the user-maintained
|
|
123
|
+
# env.py script only.
|
|
124
|
+
[loggers]
|
|
125
|
+
keys = root,sqlalchemy,alembic
|
|
126
|
+
|
|
127
|
+
[handlers]
|
|
128
|
+
keys = console
|
|
129
|
+
|
|
130
|
+
[formatters]
|
|
131
|
+
keys = generic
|
|
132
|
+
|
|
133
|
+
[logger_root]
|
|
134
|
+
level = WARNING
|
|
135
|
+
handlers = console
|
|
136
|
+
qualname =
|
|
137
|
+
|
|
138
|
+
[logger_sqlalchemy]
|
|
139
|
+
level = WARNING
|
|
140
|
+
handlers =
|
|
141
|
+
qualname = sqlalchemy.engine
|
|
142
|
+
|
|
143
|
+
[logger_alembic]
|
|
144
|
+
level = WARNING
|
|
145
|
+
handlers =
|
|
146
|
+
qualname = alembic
|
|
147
|
+
|
|
148
|
+
[handler_console]
|
|
149
|
+
class = StreamHandler
|
|
150
|
+
args = (sys.stderr,)
|
|
151
|
+
level = NOTSET
|
|
152
|
+
formatter = generic
|
|
153
|
+
|
|
154
|
+
[formatter_generic]
|
|
155
|
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
|
156
|
+
datefmt = %H:%M:%S
|