skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/utils/resources_utils.py
CHANGED
|
@@ -4,11 +4,12 @@ import enum
|
|
|
4
4
|
import itertools
|
|
5
5
|
import json
|
|
6
6
|
import math
|
|
7
|
-
import re
|
|
8
7
|
import typing
|
|
9
|
-
from typing import Dict, List, Optional, Set, Union
|
|
8
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
10
9
|
|
|
11
10
|
from sky import skypilot_config
|
|
11
|
+
from sky.skylet import constants
|
|
12
|
+
from sky.utils import common_utils
|
|
12
13
|
from sky.utils import registry
|
|
13
14
|
from sky.utils import ux_utils
|
|
14
15
|
|
|
@@ -50,6 +51,48 @@ class DiskTier(enum.Enum):
|
|
|
50
51
|
return types.index(self) <= types.index(other)
|
|
51
52
|
|
|
52
53
|
|
|
54
|
+
class NetworkTier(enum.Enum):
|
|
55
|
+
"""All network tiers supported by SkyPilot."""
|
|
56
|
+
STANDARD = 'standard'
|
|
57
|
+
BEST = 'best'
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def supported_tiers(cls) -> List[str]:
|
|
61
|
+
return [tier.value for tier in cls]
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def cli_help_message(cls) -> str:
|
|
65
|
+
return (
|
|
66
|
+
f'Network tier. Could be one of {", ".join(cls.supported_tiers())}'
|
|
67
|
+
f'. If {cls.BEST.value} is specified, use the best network tier '
|
|
68
|
+
'available on the specified instance. '
|
|
69
|
+
f'Default: {cls.STANDARD.value}')
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_str(cls, tier: str) -> 'NetworkTier':
|
|
73
|
+
if tier not in cls.supported_tiers():
|
|
74
|
+
raise ValueError(f'Invalid network tier: {tier}')
|
|
75
|
+
return cls(tier)
|
|
76
|
+
|
|
77
|
+
def __le__(self, other: 'NetworkTier') -> bool:
|
|
78
|
+
types = list(NetworkTier)
|
|
79
|
+
return types.index(self) <= types.index(other)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class StorageType(enum.Enum):
|
|
83
|
+
"""Storage type."""
|
|
84
|
+
# Durable network storage, e.g. GCP persistent disks
|
|
85
|
+
NETWORK = 'network'
|
|
86
|
+
# Local instance storage, e.g. GCP local SSDs
|
|
87
|
+
INSTANCE = 'instance'
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class DiskAttachMode(enum.Enum):
|
|
91
|
+
"""Disk attach mode."""
|
|
92
|
+
READ_ONLY = 'read_only'
|
|
93
|
+
READ_WRITE = 'read_write'
|
|
94
|
+
|
|
95
|
+
|
|
53
96
|
@dataclasses.dataclass
|
|
54
97
|
class ClusterName:
|
|
55
98
|
display_name: str
|
|
@@ -138,35 +181,81 @@ def simplify_ports(ports: List[str]) -> List[str]:
|
|
|
138
181
|
|
|
139
182
|
|
|
140
183
|
def format_resource(resource: 'resources_lib.Resources',
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
184
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
185
|
+
resource = resource.assert_launchable()
|
|
186
|
+
is_k8s = str(resource.cloud).lower() == 'kubernetes'
|
|
187
|
+
if resource.accelerators is None or is_k8s or not simplified_only:
|
|
188
|
+
vcpu, mem = resource.cloud.get_vcpus_mem_from_instance_type(
|
|
189
|
+
resource.instance_type)
|
|
190
|
+
|
|
191
|
+
elements_simple = []
|
|
192
|
+
elements_full = []
|
|
193
|
+
|
|
194
|
+
if resource.accelerators is not None:
|
|
195
|
+
acc, count = list(resource.accelerators.items())[0]
|
|
196
|
+
elements_simple.append(f'gpus={acc}:{count}')
|
|
197
|
+
elements_full.append(f'gpus={acc}:{count}')
|
|
198
|
+
|
|
199
|
+
if (resource.accelerators is None or is_k8s):
|
|
200
|
+
if vcpu is not None:
|
|
201
|
+
elements_simple.append(f'cpus={int(vcpu)}')
|
|
202
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
203
|
+
if mem is not None:
|
|
204
|
+
elements_simple.append(f'mem={int(mem)}')
|
|
205
|
+
elements_full.append(f'mem={int(mem)}')
|
|
206
|
+
elif not simplified_only:
|
|
207
|
+
if vcpu is not None:
|
|
208
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
209
|
+
if mem is not None:
|
|
210
|
+
elements_full.append(f'mem={int(mem)}')
|
|
211
|
+
|
|
212
|
+
if not is_k8s:
|
|
213
|
+
instance_type_full = resource.instance_type
|
|
214
|
+
instance_type_simple = common_utils.truncate_long_string(
|
|
215
|
+
instance_type_full, 15)
|
|
216
|
+
elements_simple.append(instance_type_simple)
|
|
217
|
+
elements_full.append(instance_type_full)
|
|
218
|
+
elements_simple.append('...')
|
|
219
|
+
if not simplified_only:
|
|
220
|
+
image_id = resource.image_id
|
|
221
|
+
if image_id is not None:
|
|
222
|
+
if None in image_id:
|
|
223
|
+
elements_full.append(f'image_id={image_id[None]}')
|
|
224
|
+
else:
|
|
225
|
+
elements_full.append(f'image_id={image_id}')
|
|
226
|
+
elements_full.append(f'disk={resource.disk_size}')
|
|
227
|
+
disk_tier = resource.disk_tier
|
|
228
|
+
if disk_tier is not None:
|
|
229
|
+
elements_full.append(f'disk_tier={disk_tier.value}')
|
|
230
|
+
ports = resource.ports
|
|
231
|
+
if ports is not None:
|
|
232
|
+
elements_full.append(f'ports={ports}')
|
|
233
|
+
|
|
234
|
+
spot = '[spot]' if resource.use_spot else ''
|
|
235
|
+
resources_str_simple = (
|
|
236
|
+
f'{spot}({"" if not elements_simple else ", ".join(elements_simple)})')
|
|
237
|
+
if simplified_only:
|
|
238
|
+
return resources_str_simple, None
|
|
152
239
|
else:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
240
|
+
resources_str_full = (
|
|
241
|
+
f'{spot}({"" if not elements_full else ", ".join(elements_full)})')
|
|
242
|
+
return resources_str_simple, resources_str_full
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def get_readable_resources_repr(
|
|
246
|
+
handle: 'backends.CloudVmRayResourceHandle',
|
|
247
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
248
|
+
resource_str_simple, resource_str_full = format_resource(
|
|
249
|
+
handle.launched_resources, simplified_only)
|
|
250
|
+
if not simplified_only:
|
|
251
|
+
assert resource_str_full is not None
|
|
165
252
|
if (handle.launched_nodes is not None and
|
|
166
253
|
handle.launched_resources is not None):
|
|
167
|
-
return (f'{handle.launched_nodes}x
|
|
168
|
-
|
|
169
|
-
|
|
254
|
+
return (f'{handle.launched_nodes}x{resource_str_simple}',
|
|
255
|
+
None if simplified_only else
|
|
256
|
+
f'{handle.launched_nodes}x{resource_str_full}')
|
|
257
|
+
return (_DEFAULT_MESSAGE_HANDLE_INITIALIZING,
|
|
258
|
+
_DEFAULT_MESSAGE_HANDLE_INITIALIZING)
|
|
170
259
|
|
|
171
260
|
|
|
172
261
|
def make_ray_custom_resources_str(
|
|
@@ -208,10 +297,18 @@ def need_to_query_reservations() -> bool:
|
|
|
208
297
|
clouds that do not use reservations.
|
|
209
298
|
"""
|
|
210
299
|
for cloud_str in registry.CLOUD_REGISTRY.keys():
|
|
211
|
-
cloud_specific_reservations =
|
|
212
|
-
(
|
|
213
|
-
|
|
214
|
-
|
|
300
|
+
cloud_specific_reservations = (
|
|
301
|
+
skypilot_config.get_effective_region_config(
|
|
302
|
+
cloud=cloud_str,
|
|
303
|
+
region=None,
|
|
304
|
+
keys=('specific_reservations',),
|
|
305
|
+
default_value=None))
|
|
306
|
+
cloud_prioritize_reservations = (
|
|
307
|
+
skypilot_config.get_effective_region_config(
|
|
308
|
+
cloud=cloud_str,
|
|
309
|
+
region=None,
|
|
310
|
+
keys=('prioritize_reservations',),
|
|
311
|
+
default_value=False))
|
|
215
312
|
if (cloud_specific_reservations is not None or
|
|
216
313
|
cloud_prioritize_reservations):
|
|
217
314
|
return True
|
|
@@ -248,6 +345,7 @@ def make_launchables_for_valid_region_zones(
|
|
|
248
345
|
launchables = []
|
|
249
346
|
regions = launchable_resources.get_valid_regions_for_launchable()
|
|
250
347
|
for region in regions:
|
|
348
|
+
assert launchable_resources.cloud is not None, 'Cloud must be specified'
|
|
251
349
|
optimize_by_zone = (override_optimize_by_zone or
|
|
252
350
|
launchable_resources.cloud.optimize_by_zone())
|
|
253
351
|
# It is possible that we force the optimize_by_zone but some clouds
|
|
@@ -266,3 +364,122 @@ def make_launchables_for_valid_region_zones(
|
|
|
266
364
|
# Batch the requests at the granularity of a single region.
|
|
267
365
|
launchables.append(launchable_resources.copy(region=region.name))
|
|
268
366
|
return launchables
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def parse_memory_resource(resource_qty_str: Union[str, int, float],
|
|
370
|
+
field_name: str,
|
|
371
|
+
ret_type: type = int,
|
|
372
|
+
unit: str = 'gb',
|
|
373
|
+
allow_plus: bool = False,
|
|
374
|
+
allow_x: bool = False,
|
|
375
|
+
allow_rounding: bool = False) -> str:
|
|
376
|
+
"""Returns memory size in chosen units given a resource quantity string.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
resource_qty_str: Resource quantity string
|
|
380
|
+
unit: Unit to convert to
|
|
381
|
+
allow_plus: Whether to allow '+' prefix
|
|
382
|
+
allow_x: Whether to allow 'x' suffix
|
|
383
|
+
"""
|
|
384
|
+
assert unit in constants.MEMORY_SIZE_UNITS, f'Invalid unit: {unit}'
|
|
385
|
+
|
|
386
|
+
error_msg = (f'"{field_name}" field should be a '
|
|
387
|
+
f'{constants.MEMORY_SIZE_PATTERN}+?,'
|
|
388
|
+
f' got {resource_qty_str}')
|
|
389
|
+
|
|
390
|
+
resource_str = str(resource_qty_str)
|
|
391
|
+
|
|
392
|
+
# Handle plus and x suffixes, x is only used internally for jobs controller
|
|
393
|
+
plus = ''
|
|
394
|
+
if resource_str.endswith('+'):
|
|
395
|
+
if allow_plus:
|
|
396
|
+
resource_str = resource_str[:-1]
|
|
397
|
+
plus = '+'
|
|
398
|
+
else:
|
|
399
|
+
raise ValueError(error_msg)
|
|
400
|
+
|
|
401
|
+
x = ''
|
|
402
|
+
if resource_str.endswith('x'):
|
|
403
|
+
if allow_x:
|
|
404
|
+
resource_str = resource_str[:-1]
|
|
405
|
+
x = 'x'
|
|
406
|
+
else:
|
|
407
|
+
raise ValueError(error_msg)
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
# We assume it is already in the wanted units to maintain backwards
|
|
411
|
+
# compatibility
|
|
412
|
+
ret_type(resource_str)
|
|
413
|
+
return f'{resource_str}{plus}{x}'
|
|
414
|
+
except ValueError:
|
|
415
|
+
pass
|
|
416
|
+
|
|
417
|
+
resource_str = resource_str.lower()
|
|
418
|
+
for mem_unit, multiplier in constants.MEMORY_SIZE_UNITS.items():
|
|
419
|
+
if resource_str.endswith(mem_unit):
|
|
420
|
+
try:
|
|
421
|
+
value = ret_type(resource_str[:-len(mem_unit)])
|
|
422
|
+
converted = (value * multiplier /
|
|
423
|
+
constants.MEMORY_SIZE_UNITS[unit])
|
|
424
|
+
if not allow_rounding and ret_type(converted) != converted:
|
|
425
|
+
raise ValueError(error_msg)
|
|
426
|
+
converted = ret_type(converted)
|
|
427
|
+
return f'{converted}{plus}{x}'
|
|
428
|
+
except ValueError:
|
|
429
|
+
continue
|
|
430
|
+
|
|
431
|
+
raise ValueError(error_msg)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def parse_time_minutes(time: str) -> int:
|
|
435
|
+
"""Convert a time string to minutes.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
time: Time string with optional unit suffix (e.g., '30m', '2h', '1d')
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
Time in minutes as an integer
|
|
442
|
+
"""
|
|
443
|
+
time_str = str(time)
|
|
444
|
+
|
|
445
|
+
if time_str.isdecimal():
|
|
446
|
+
# We assume it is already in minutes to maintain backwards
|
|
447
|
+
# compatibility
|
|
448
|
+
return int(time_str)
|
|
449
|
+
|
|
450
|
+
time_str = time_str.lower()
|
|
451
|
+
for unit, multiplier in constants.TIME_UNITS.items():
|
|
452
|
+
if time_str.endswith(unit):
|
|
453
|
+
try:
|
|
454
|
+
value = float(time_str[:-len(unit)])
|
|
455
|
+
final_value = math.ceil(value * multiplier)
|
|
456
|
+
if final_value >= 0:
|
|
457
|
+
return final_value
|
|
458
|
+
except ValueError:
|
|
459
|
+
continue
|
|
460
|
+
|
|
461
|
+
raise ValueError(f'Invalid time format: {time}')
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def normalize_any_of_resources_config(
|
|
465
|
+
any_of: List[Dict[str, Any]]) -> Tuple[str, ...]:
|
|
466
|
+
"""Normalize a list of any_of resources config to a canonical form.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
any_of: A list of any_of resources config.
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
A normalized tuple representation that can be compared for equality.
|
|
473
|
+
Two lists with the same resource configurations in different orders
|
|
474
|
+
will produce the same normalized result.
|
|
475
|
+
"""
|
|
476
|
+
if not any_of:
|
|
477
|
+
return tuple()
|
|
478
|
+
|
|
479
|
+
# Convert each config to JSON string with sorted keys, then sort the list
|
|
480
|
+
normalized_configs = [
|
|
481
|
+
json.dumps(config, sort_keys=True, separators=(',', ':'))
|
|
482
|
+
for config in any_of
|
|
483
|
+
]
|
|
484
|
+
|
|
485
|
+
return tuple(sorted(normalized_configs))
|
sky/utils/rich_utils.py
CHANGED
|
@@ -1,28 +1,53 @@
|
|
|
1
1
|
"""Rich status spinner utils."""
|
|
2
2
|
import contextlib
|
|
3
|
+
import contextvars
|
|
3
4
|
import enum
|
|
4
5
|
import logging
|
|
5
6
|
import threading
|
|
6
7
|
import typing
|
|
7
|
-
from typing import
|
|
8
|
+
from typing import Callable, Iterator, Optional, Tuple, Union
|
|
8
9
|
|
|
10
|
+
from sky import exceptions
|
|
9
11
|
from sky.adaptors import common as adaptors_common
|
|
10
12
|
from sky.utils import annotations
|
|
13
|
+
from sky.utils import context
|
|
11
14
|
from sky.utils import message_utils
|
|
12
15
|
from sky.utils import rich_console_utils
|
|
13
16
|
|
|
14
17
|
if typing.TYPE_CHECKING:
|
|
18
|
+
import aiohttp
|
|
15
19
|
import requests
|
|
16
20
|
import rich.console as rich_console
|
|
17
21
|
else:
|
|
18
22
|
requests = adaptors_common.LazyImport('requests')
|
|
19
23
|
rich_console = adaptors_common.LazyImport('rich.console')
|
|
24
|
+
aiohttp = adaptors_common.LazyImport('aiohttp')
|
|
25
|
+
|
|
26
|
+
GeneralStatus = Union['rich_console.Status', 'EncodedStatus']
|
|
27
|
+
|
|
28
|
+
_client_status: Optional[GeneralStatus] = None
|
|
29
|
+
_server_status: contextvars.ContextVar[
|
|
30
|
+
Optional[GeneralStatus]] = contextvars.ContextVar('server_status',
|
|
31
|
+
default=None)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _get_client_status() -> Optional[GeneralStatus]:
|
|
35
|
+
return _client_status
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _get_server_status() -> Optional[GeneralStatus]:
|
|
39
|
+
return _server_status.get()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _set_client_status(status: Optional[GeneralStatus]):
|
|
43
|
+
global _client_status
|
|
44
|
+
_client_status = status
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _set_server_status(status: Optional[GeneralStatus]):
|
|
48
|
+
_server_status.set(status)
|
|
49
|
+
|
|
20
50
|
|
|
21
|
-
_statuses: Dict[str, Optional[Union['EncodedStatus',
|
|
22
|
-
'rich_console.Status']]] = {
|
|
23
|
-
'server': None,
|
|
24
|
-
'client': None,
|
|
25
|
-
}
|
|
26
51
|
_status_nesting_level = 0
|
|
27
52
|
|
|
28
53
|
_logging_lock = threading.RLock()
|
|
@@ -35,6 +60,8 @@ class Control(enum.Enum):
|
|
|
35
60
|
STOP = 'rich_stop'
|
|
36
61
|
EXIT = 'rich_exit'
|
|
37
62
|
UPDATE = 'rich_update'
|
|
63
|
+
HEARTBEAT = 'heartbeat'
|
|
64
|
+
RETRY = 'retry'
|
|
38
65
|
|
|
39
66
|
def encode(self, msg: str) -> str:
|
|
40
67
|
return f'<{self.value}>{msg}</{self.value}>'
|
|
@@ -128,20 +155,22 @@ class _NoOpConsoleStatus:
|
|
|
128
155
|
class _RevertibleStatus:
|
|
129
156
|
"""A wrapper for status that can revert to previous message after exit."""
|
|
130
157
|
|
|
131
|
-
def __init__(self, message: str,
|
|
158
|
+
def __init__(self, message: str, get_status_fn: Callable[[], GeneralStatus],
|
|
159
|
+
set_status_fn: Callable[[Optional[GeneralStatus]], None]):
|
|
132
160
|
self.previous_message = None
|
|
133
|
-
self.
|
|
134
|
-
|
|
161
|
+
self.get_status_fn = get_status_fn
|
|
162
|
+
self.set_status_fn = set_status_fn
|
|
163
|
+
status = self.get_status_fn()
|
|
135
164
|
if status is not None:
|
|
136
165
|
self.previous_message = status.status
|
|
137
166
|
self.message = message
|
|
138
167
|
|
|
139
168
|
def __enter__(self):
|
|
140
169
|
global _status_nesting_level
|
|
141
|
-
|
|
170
|
+
self.get_status_fn().update(self.message)
|
|
142
171
|
_status_nesting_level += 1
|
|
143
|
-
|
|
144
|
-
return
|
|
172
|
+
self.get_status_fn().__enter__()
|
|
173
|
+
return self.get_status_fn()
|
|
145
174
|
|
|
146
175
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
147
176
|
# We use the same lock with the `safe_logger` to avoid the following 2
|
|
@@ -160,32 +189,48 @@ class _RevertibleStatus:
|
|
|
160
189
|
_status_nesting_level -= 1
|
|
161
190
|
if _status_nesting_level <= 0:
|
|
162
191
|
_status_nesting_level = 0
|
|
163
|
-
if
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
_statuses[self.status_type] = None
|
|
192
|
+
if self.get_status_fn() is not None:
|
|
193
|
+
self.get_status_fn().__exit__(exc_type, exc_val, exc_tb)
|
|
194
|
+
self.set_status_fn(None)
|
|
167
195
|
else:
|
|
168
|
-
|
|
196
|
+
self.get_status_fn().update(self.previous_message)
|
|
169
197
|
|
|
170
198
|
def update(self, *args, **kwargs):
|
|
171
|
-
|
|
199
|
+
self.get_status_fn().update(*args, **kwargs)
|
|
172
200
|
|
|
173
201
|
def stop(self):
|
|
174
|
-
|
|
202
|
+
self.get_status_fn().stop()
|
|
175
203
|
|
|
176
204
|
def start(self):
|
|
177
|
-
|
|
205
|
+
self.get_status_fn().start()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _is_thread_safe() -> bool:
|
|
209
|
+
"""Check if the current status context is thread-safe.
|
|
210
|
+
|
|
211
|
+
We are thread-safe if we are on the main thread or the server_status is
|
|
212
|
+
context-local, i.e. an async context has been initialized.
|
|
213
|
+
"""
|
|
214
|
+
return (threading.current_thread() is threading.main_thread() or
|
|
215
|
+
context.get() is not None)
|
|
178
216
|
|
|
179
217
|
|
|
180
218
|
def safe_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
|
|
181
|
-
"""A wrapper for multi-threaded console.status.
|
|
219
|
+
"""A wrapper for multi-threaded server-side console.status.
|
|
220
|
+
|
|
221
|
+
This function will encode rich status with control codes and output the
|
|
222
|
+
encoded string to stdout. Client-side decode control codes from server
|
|
223
|
+
output and update the rich status. This function is safe to be called in
|
|
224
|
+
async/multi-threaded context.
|
|
225
|
+
|
|
226
|
+
See also: :func:`client_status`, :class:`EncodedStatus`.
|
|
227
|
+
"""
|
|
182
228
|
from sky import sky_logging # pylint: disable=import-outside-toplevel
|
|
183
|
-
if (annotations.is_on_api_server and
|
|
184
|
-
threading.current_thread() is threading.main_thread() and
|
|
229
|
+
if (annotations.is_on_api_server and _is_thread_safe() and
|
|
185
230
|
not sky_logging.is_silent()):
|
|
186
|
-
if
|
|
187
|
-
|
|
188
|
-
return _RevertibleStatus(msg,
|
|
231
|
+
if _get_server_status() is None:
|
|
232
|
+
_set_server_status(EncodedStatus(msg))
|
|
233
|
+
return _RevertibleStatus(msg, _get_server_status, _set_server_status)
|
|
189
234
|
return _NoOpConsoleStatus()
|
|
190
235
|
|
|
191
236
|
|
|
@@ -196,22 +241,26 @@ def stop_safe_status():
|
|
|
196
241
|
stream logs from user program and do not want it to interfere with the
|
|
197
242
|
spinner display.
|
|
198
243
|
"""
|
|
199
|
-
if (
|
|
200
|
-
|
|
201
|
-
|
|
244
|
+
if _is_thread_safe():
|
|
245
|
+
return
|
|
246
|
+
server_status = _get_server_status()
|
|
247
|
+
if server_status is not None:
|
|
248
|
+
server_status.stop()
|
|
202
249
|
|
|
203
250
|
|
|
204
251
|
def force_update_status(msg: str):
|
|
205
252
|
"""Update the status message even if sky_logging.is_silent() is true."""
|
|
206
|
-
if
|
|
207
|
-
|
|
208
|
-
|
|
253
|
+
if not _is_thread_safe():
|
|
254
|
+
return
|
|
255
|
+
server_status = _get_server_status()
|
|
256
|
+
if server_status is not None:
|
|
257
|
+
server_status.update(msg)
|
|
209
258
|
|
|
210
259
|
|
|
211
260
|
@contextlib.contextmanager
|
|
212
261
|
def safe_logger():
|
|
213
262
|
with _logging_lock:
|
|
214
|
-
client_status_obj =
|
|
263
|
+
client_status_obj = _get_client_status()
|
|
215
264
|
|
|
216
265
|
client_status_live = (client_status_obj is not None and
|
|
217
266
|
client_status_obj._live.is_started) # pylint: disable=protected-access
|
|
@@ -230,13 +279,13 @@ class RichSafeStreamHandler(logging.StreamHandler):
|
|
|
230
279
|
|
|
231
280
|
|
|
232
281
|
def client_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
|
|
233
|
-
"""A wrapper for multi-threaded console.status."""
|
|
282
|
+
"""A wrapper for multi-threaded client-side console.status."""
|
|
234
283
|
from sky import sky_logging # pylint: disable=import-outside-toplevel
|
|
235
284
|
if (threading.current_thread() is threading.main_thread() and
|
|
236
285
|
not sky_logging.is_silent()):
|
|
237
|
-
if
|
|
238
|
-
|
|
239
|
-
return _RevertibleStatus(msg,
|
|
286
|
+
if _get_client_status() is None:
|
|
287
|
+
_set_client_status(rich_console_utils.get_console().status(msg))
|
|
288
|
+
return _RevertibleStatus(msg, _get_client_status, _set_client_status)
|
|
240
289
|
return _NoOpConsoleStatus()
|
|
241
290
|
|
|
242
291
|
|
|
@@ -320,6 +369,9 @@ def decode_rich_status(
|
|
|
320
369
|
yield line
|
|
321
370
|
continue
|
|
322
371
|
|
|
372
|
+
if control == Control.RETRY:
|
|
373
|
+
raise exceptions.RequestInterruptedError(
|
|
374
|
+
'Streaming interrupted. Please retry.')
|
|
323
375
|
# control is not None, i.e. it is a rich status control message.
|
|
324
376
|
if threading.current_thread() is not threading.main_thread():
|
|
325
377
|
yield None
|
|
@@ -341,6 +393,130 @@ def decode_rich_status(
|
|
|
341
393
|
decoding_status.__exit__(None, None, None)
|
|
342
394
|
elif control == Control.START:
|
|
343
395
|
decoding_status.start()
|
|
396
|
+
elif control == Control.HEARTBEAT:
|
|
397
|
+
# Heartbeat is not displayed to the user, so we do not
|
|
398
|
+
# need to update the status.
|
|
399
|
+
pass
|
|
400
|
+
finally:
|
|
401
|
+
if decoding_status is not None:
|
|
402
|
+
decoding_status.__exit__(None, None, None)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
async def decode_rich_status_async(
|
|
406
|
+
response: 'aiohttp.ClientResponse'
|
|
407
|
+
) -> typing.AsyncIterator[Optional[str]]:
|
|
408
|
+
"""Async version of rich_utils.decode_rich_status that decodes rich status
|
|
409
|
+
messages from an aiohttp response.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
response: The aiohttp response.
|
|
413
|
+
|
|
414
|
+
Yields:
|
|
415
|
+
Optional[str]: Decoded lines or None for control messages.
|
|
416
|
+
"""
|
|
417
|
+
decoding_status = None
|
|
418
|
+
try:
|
|
419
|
+
last_line = ''
|
|
420
|
+
# Buffer to store incomplete UTF-8 bytes between chunks
|
|
421
|
+
undecoded_buffer = b''
|
|
422
|
+
|
|
423
|
+
# Iterate over the response content in chunks
|
|
424
|
+
async for chunk, _ in response.content.iter_chunks():
|
|
425
|
+
if chunk is None:
|
|
426
|
+
return
|
|
427
|
+
|
|
428
|
+
# Append the new chunk to any leftover bytes from previous iteration
|
|
429
|
+
current_bytes = undecoded_buffer + chunk
|
|
430
|
+
undecoded_buffer = b''
|
|
431
|
+
|
|
432
|
+
# Try to decode the combined bytes
|
|
433
|
+
try:
|
|
434
|
+
encoded_msg = current_bytes.decode('utf-8')
|
|
435
|
+
except UnicodeDecodeError as e:
|
|
436
|
+
# Check if this is potentially an incomplete sequence at the end
|
|
437
|
+
if e.start > 0:
|
|
438
|
+
# Decode the valid part
|
|
439
|
+
encoded_msg = current_bytes[:e.start].decode('utf-8')
|
|
440
|
+
|
|
441
|
+
# Check if the remaining bytes are likely a partial char
|
|
442
|
+
# or actually invalid UTF-8
|
|
443
|
+
remaining_bytes = current_bytes[e.start:]
|
|
444
|
+
if len(remaining_bytes) < 4: # Max UTF-8 char is 4 bytes
|
|
445
|
+
# Likely incomplete - save for next chunk
|
|
446
|
+
undecoded_buffer = remaining_bytes
|
|
447
|
+
else:
|
|
448
|
+
# Likely invalid - replace with replacement character
|
|
449
|
+
encoded_msg += remaining_bytes.decode('utf-8',
|
|
450
|
+
errors='replace')
|
|
451
|
+
undecoded_buffer = b''
|
|
452
|
+
else:
|
|
453
|
+
# Error at the very beginning of the buffer - invalid UTF-8
|
|
454
|
+
encoded_msg = current_bytes.decode('utf-8',
|
|
455
|
+
errors='replace')
|
|
456
|
+
undecoded_buffer = b''
|
|
457
|
+
|
|
458
|
+
lines = encoded_msg.splitlines(keepends=True)
|
|
459
|
+
|
|
460
|
+
# Skip processing if lines is empty to avoid IndexError
|
|
461
|
+
if not lines:
|
|
462
|
+
continue
|
|
463
|
+
|
|
464
|
+
lines[0] = last_line + lines[0]
|
|
465
|
+
last_line = lines[-1]
|
|
466
|
+
# If the last line is not ended with `\r` or `\n` (with ending
|
|
467
|
+
# spaces stripped), it means the last line is not a complete line.
|
|
468
|
+
# We keep the last line in the buffer and continue.
|
|
469
|
+
if (not last_line.strip(' ').endswith('\r') and
|
|
470
|
+
not last_line.strip(' ').endswith('\n')):
|
|
471
|
+
lines = lines[:-1]
|
|
472
|
+
else:
|
|
473
|
+
# Reset the buffer for the next line, as the last line is a
|
|
474
|
+
# complete line.
|
|
475
|
+
last_line = ''
|
|
476
|
+
|
|
477
|
+
for line in lines:
|
|
478
|
+
if line.endswith('\r\n'):
|
|
479
|
+
# Replace `\r\n` with `\n`, as printing a line ends with
|
|
480
|
+
# `\r\n` in linux will cause the line to be empty.
|
|
481
|
+
line = line[:-2] + '\n'
|
|
482
|
+
is_payload, line = message_utils.decode_payload(
|
|
483
|
+
line, raise_for_mismatch=False)
|
|
484
|
+
if line is None:
|
|
485
|
+
continue
|
|
486
|
+
control = None
|
|
487
|
+
if is_payload:
|
|
488
|
+
control, encoded_status = Control.decode(line)
|
|
489
|
+
if control is None:
|
|
490
|
+
yield line
|
|
491
|
+
continue
|
|
492
|
+
|
|
493
|
+
if control == Control.RETRY:
|
|
494
|
+
raise exceptions.RequestInterruptedError(
|
|
495
|
+
'Streaming interrupted. Please retry.')
|
|
496
|
+
# control is not None, i.e. it is a rich status control message.
|
|
497
|
+
# In async context, we'll handle rich status controls normally
|
|
498
|
+
# since async typically runs in main thread
|
|
499
|
+
if control == Control.INIT:
|
|
500
|
+
decoding_status = client_status(encoded_status)
|
|
501
|
+
else:
|
|
502
|
+
if decoding_status is None:
|
|
503
|
+
# status may not be initialized if a user use --tail for
|
|
504
|
+
# sky api logs.
|
|
505
|
+
continue
|
|
506
|
+
assert decoding_status is not None, (
|
|
507
|
+
f'Rich status not initialized: {line}')
|
|
508
|
+
if control == Control.UPDATE:
|
|
509
|
+
decoding_status.update(encoded_status)
|
|
510
|
+
elif control == Control.STOP:
|
|
511
|
+
decoding_status.stop()
|
|
512
|
+
elif control == Control.EXIT:
|
|
513
|
+
decoding_status.__exit__(None, None, None)
|
|
514
|
+
elif control == Control.START:
|
|
515
|
+
decoding_status.start()
|
|
516
|
+
elif control == Control.HEARTBEAT:
|
|
517
|
+
# Heartbeat is not displayed to the user, so we do not
|
|
518
|
+
# need to update the status.
|
|
519
|
+
pass
|
|
344
520
|
finally:
|
|
345
521
|
if decoding_status is not None:
|
|
346
522
|
decoding_status.__exit__(None, None, None)
|