skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/resources_utils.py
CHANGED
|
@@ -4,11 +4,12 @@ import enum
|
|
|
4
4
|
import itertools
|
|
5
5
|
import json
|
|
6
6
|
import math
|
|
7
|
-
import re
|
|
8
7
|
import typing
|
|
9
|
-
from typing import Dict, List, Optional, Set, Union
|
|
8
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
10
9
|
|
|
11
10
|
from sky import skypilot_config
|
|
11
|
+
from sky.skylet import constants
|
|
12
|
+
from sky.utils import common_utils
|
|
12
13
|
from sky.utils import registry
|
|
13
14
|
from sky.utils import ux_utils
|
|
14
15
|
|
|
@@ -50,6 +51,48 @@ class DiskTier(enum.Enum):
|
|
|
50
51
|
return types.index(self) <= types.index(other)
|
|
51
52
|
|
|
52
53
|
|
|
54
|
+
class NetworkTier(enum.Enum):
|
|
55
|
+
"""All network tiers supported by SkyPilot."""
|
|
56
|
+
STANDARD = 'standard'
|
|
57
|
+
BEST = 'best'
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def supported_tiers(cls) -> List[str]:
|
|
61
|
+
return [tier.value for tier in cls]
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def cli_help_message(cls) -> str:
|
|
65
|
+
return (
|
|
66
|
+
f'Network tier. Could be one of {", ".join(cls.supported_tiers())}'
|
|
67
|
+
f'. If {cls.BEST.value} is specified, use the best network tier '
|
|
68
|
+
'available on the specified instance. '
|
|
69
|
+
f'Default: {cls.STANDARD.value}')
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_str(cls, tier: str) -> 'NetworkTier':
|
|
73
|
+
if tier not in cls.supported_tiers():
|
|
74
|
+
raise ValueError(f'Invalid network tier: {tier}')
|
|
75
|
+
return cls(tier)
|
|
76
|
+
|
|
77
|
+
def __le__(self, other: 'NetworkTier') -> bool:
|
|
78
|
+
types = list(NetworkTier)
|
|
79
|
+
return types.index(self) <= types.index(other)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class StorageType(enum.Enum):
|
|
83
|
+
"""Storage type."""
|
|
84
|
+
# Durable network storage, e.g. GCP persistent disks
|
|
85
|
+
NETWORK = 'network'
|
|
86
|
+
# Local instance storage, e.g. GCP local SSDs
|
|
87
|
+
INSTANCE = 'instance'
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class DiskAttachMode(enum.Enum):
|
|
91
|
+
"""Disk attach mode."""
|
|
92
|
+
READ_ONLY = 'read_only'
|
|
93
|
+
READ_WRITE = 'read_write'
|
|
94
|
+
|
|
95
|
+
|
|
53
96
|
@dataclasses.dataclass
|
|
54
97
|
class ClusterName:
|
|
55
98
|
display_name: str
|
|
@@ -138,35 +181,81 @@ def simplify_ports(ports: List[str]) -> List[str]:
|
|
|
138
181
|
|
|
139
182
|
|
|
140
183
|
def format_resource(resource: 'resources_lib.Resources',
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
184
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
185
|
+
resource = resource.assert_launchable()
|
|
186
|
+
is_k8s = str(resource.cloud).lower() == 'kubernetes'
|
|
187
|
+
if resource.accelerators is None or is_k8s or not simplified_only:
|
|
188
|
+
vcpu, mem = resource.cloud.get_vcpus_mem_from_instance_type(
|
|
189
|
+
resource.instance_type)
|
|
190
|
+
|
|
191
|
+
elements_simple = []
|
|
192
|
+
elements_full = []
|
|
193
|
+
|
|
194
|
+
if resource.accelerators is not None:
|
|
195
|
+
acc, count = list(resource.accelerators.items())[0]
|
|
196
|
+
elements_simple.append(f'gpus={acc}:{count}')
|
|
197
|
+
elements_full.append(f'gpus={acc}:{count}')
|
|
198
|
+
|
|
199
|
+
if (resource.accelerators is None or is_k8s):
|
|
200
|
+
if vcpu is not None:
|
|
201
|
+
elements_simple.append(f'cpus={int(vcpu)}')
|
|
202
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
203
|
+
if mem is not None:
|
|
204
|
+
elements_simple.append(f'mem={int(mem)}')
|
|
205
|
+
elements_full.append(f'mem={int(mem)}')
|
|
206
|
+
elif not simplified_only:
|
|
207
|
+
if vcpu is not None:
|
|
208
|
+
elements_full.append(f'cpus={int(vcpu)}')
|
|
209
|
+
if mem is not None:
|
|
210
|
+
elements_full.append(f'mem={int(mem)}')
|
|
211
|
+
|
|
212
|
+
if not is_k8s:
|
|
213
|
+
instance_type_full = resource.instance_type
|
|
214
|
+
instance_type_simple = common_utils.truncate_long_string(
|
|
215
|
+
instance_type_full, 15)
|
|
216
|
+
elements_simple.append(instance_type_simple)
|
|
217
|
+
elements_full.append(instance_type_full)
|
|
218
|
+
elements_simple.append('...')
|
|
219
|
+
if not simplified_only:
|
|
220
|
+
image_id = resource.image_id
|
|
221
|
+
if image_id is not None:
|
|
222
|
+
if None in image_id:
|
|
223
|
+
elements_full.append(f'image_id={image_id[None]}')
|
|
224
|
+
else:
|
|
225
|
+
elements_full.append(f'image_id={image_id}')
|
|
226
|
+
elements_full.append(f'disk={resource.disk_size}')
|
|
227
|
+
disk_tier = resource.disk_tier
|
|
228
|
+
if disk_tier is not None:
|
|
229
|
+
elements_full.append(f'disk_tier={disk_tier.value}')
|
|
230
|
+
ports = resource.ports
|
|
231
|
+
if ports is not None:
|
|
232
|
+
elements_full.append(f'ports={ports}')
|
|
233
|
+
|
|
234
|
+
spot = '[spot]' if resource.use_spot else ''
|
|
235
|
+
resources_str_simple = (
|
|
236
|
+
f'{spot}({"" if not elements_simple else ", ".join(elements_simple)})')
|
|
237
|
+
if simplified_only:
|
|
238
|
+
return resources_str_simple, None
|
|
152
239
|
else:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
240
|
+
resources_str_full = (
|
|
241
|
+
f'{spot}({"" if not elements_full else ", ".join(elements_full)})')
|
|
242
|
+
return resources_str_simple, resources_str_full
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def get_readable_resources_repr(
|
|
246
|
+
handle: 'backends.CloudVmRayResourceHandle',
|
|
247
|
+
simplified_only: bool = False) -> Tuple[str, Optional[str]]:
|
|
248
|
+
resource_str_simple, resource_str_full = format_resource(
|
|
249
|
+
handle.launched_resources, simplified_only)
|
|
250
|
+
if not simplified_only:
|
|
251
|
+
assert resource_str_full is not None
|
|
165
252
|
if (handle.launched_nodes is not None and
|
|
166
253
|
handle.launched_resources is not None):
|
|
167
|
-
return (f'{handle.launched_nodes}x
|
|
168
|
-
|
|
169
|
-
|
|
254
|
+
return (f'{handle.launched_nodes}x{resource_str_simple}',
|
|
255
|
+
None if simplified_only else
|
|
256
|
+
f'{handle.launched_nodes}x{resource_str_full}')
|
|
257
|
+
return (_DEFAULT_MESSAGE_HANDLE_INITIALIZING,
|
|
258
|
+
_DEFAULT_MESSAGE_HANDLE_INITIALIZING)
|
|
170
259
|
|
|
171
260
|
|
|
172
261
|
def make_ray_custom_resources_str(
|
|
@@ -208,10 +297,18 @@ def need_to_query_reservations() -> bool:
|
|
|
208
297
|
clouds that do not use reservations.
|
|
209
298
|
"""
|
|
210
299
|
for cloud_str in registry.CLOUD_REGISTRY.keys():
|
|
211
|
-
cloud_specific_reservations =
|
|
212
|
-
(
|
|
213
|
-
|
|
214
|
-
|
|
300
|
+
cloud_specific_reservations = (
|
|
301
|
+
skypilot_config.get_effective_region_config(
|
|
302
|
+
cloud=cloud_str,
|
|
303
|
+
region=None,
|
|
304
|
+
keys=('specific_reservations',),
|
|
305
|
+
default_value=None))
|
|
306
|
+
cloud_prioritize_reservations = (
|
|
307
|
+
skypilot_config.get_effective_region_config(
|
|
308
|
+
cloud=cloud_str,
|
|
309
|
+
region=None,
|
|
310
|
+
keys=('prioritize_reservations',),
|
|
311
|
+
default_value=False))
|
|
215
312
|
if (cloud_specific_reservations is not None or
|
|
216
313
|
cloud_prioritize_reservations):
|
|
217
314
|
return True
|
|
@@ -248,6 +345,7 @@ def make_launchables_for_valid_region_zones(
|
|
|
248
345
|
launchables = []
|
|
249
346
|
regions = launchable_resources.get_valid_regions_for_launchable()
|
|
250
347
|
for region in regions:
|
|
348
|
+
assert launchable_resources.cloud is not None, 'Cloud must be specified'
|
|
251
349
|
optimize_by_zone = (override_optimize_by_zone or
|
|
252
350
|
launchable_resources.cloud.optimize_by_zone())
|
|
253
351
|
# It is possible that we force the optimize_by_zone but some clouds
|
|
@@ -266,3 +364,122 @@ def make_launchables_for_valid_region_zones(
|
|
|
266
364
|
# Batch the requests at the granularity of a single region.
|
|
267
365
|
launchables.append(launchable_resources.copy(region=region.name))
|
|
268
366
|
return launchables
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def parse_memory_resource(resource_qty_str: Union[str, int, float],
|
|
370
|
+
field_name: str,
|
|
371
|
+
ret_type: type = int,
|
|
372
|
+
unit: str = 'gb',
|
|
373
|
+
allow_plus: bool = False,
|
|
374
|
+
allow_x: bool = False,
|
|
375
|
+
allow_rounding: bool = False) -> str:
|
|
376
|
+
"""Returns memory size in chosen units given a resource quantity string.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
resource_qty_str: Resource quantity string
|
|
380
|
+
unit: Unit to convert to
|
|
381
|
+
allow_plus: Whether to allow '+' prefix
|
|
382
|
+
allow_x: Whether to allow 'x' suffix
|
|
383
|
+
"""
|
|
384
|
+
assert unit in constants.MEMORY_SIZE_UNITS, f'Invalid unit: {unit}'
|
|
385
|
+
|
|
386
|
+
error_msg = (f'"{field_name}" field should be a '
|
|
387
|
+
f'{constants.MEMORY_SIZE_PATTERN}+?,'
|
|
388
|
+
f' got {resource_qty_str}')
|
|
389
|
+
|
|
390
|
+
resource_str = str(resource_qty_str)
|
|
391
|
+
|
|
392
|
+
# Handle plus and x suffixes, x is only used internally for jobs controller
|
|
393
|
+
plus = ''
|
|
394
|
+
if resource_str.endswith('+'):
|
|
395
|
+
if allow_plus:
|
|
396
|
+
resource_str = resource_str[:-1]
|
|
397
|
+
plus = '+'
|
|
398
|
+
else:
|
|
399
|
+
raise ValueError(error_msg)
|
|
400
|
+
|
|
401
|
+
x = ''
|
|
402
|
+
if resource_str.endswith('x'):
|
|
403
|
+
if allow_x:
|
|
404
|
+
resource_str = resource_str[:-1]
|
|
405
|
+
x = 'x'
|
|
406
|
+
else:
|
|
407
|
+
raise ValueError(error_msg)
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
# We assume it is already in the wanted units to maintain backwards
|
|
411
|
+
# compatibility
|
|
412
|
+
ret_type(resource_str)
|
|
413
|
+
return f'{resource_str}{plus}{x}'
|
|
414
|
+
except ValueError:
|
|
415
|
+
pass
|
|
416
|
+
|
|
417
|
+
resource_str = resource_str.lower()
|
|
418
|
+
for mem_unit, multiplier in constants.MEMORY_SIZE_UNITS.items():
|
|
419
|
+
if resource_str.endswith(mem_unit):
|
|
420
|
+
try:
|
|
421
|
+
value = ret_type(resource_str[:-len(mem_unit)])
|
|
422
|
+
converted = (value * multiplier /
|
|
423
|
+
constants.MEMORY_SIZE_UNITS[unit])
|
|
424
|
+
if not allow_rounding and ret_type(converted) != converted:
|
|
425
|
+
raise ValueError(error_msg)
|
|
426
|
+
converted = ret_type(converted)
|
|
427
|
+
return f'{converted}{plus}{x}'
|
|
428
|
+
except ValueError:
|
|
429
|
+
continue
|
|
430
|
+
|
|
431
|
+
raise ValueError(error_msg)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def parse_time_minutes(time: str) -> int:
|
|
435
|
+
"""Convert a time string to minutes.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
time: Time string with optional unit suffix (e.g., '30m', '2h', '1d')
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
Time in minutes as an integer
|
|
442
|
+
"""
|
|
443
|
+
time_str = str(time)
|
|
444
|
+
|
|
445
|
+
if time_str.isdecimal():
|
|
446
|
+
# We assume it is already in minutes to maintain backwards
|
|
447
|
+
# compatibility
|
|
448
|
+
return int(time_str)
|
|
449
|
+
|
|
450
|
+
time_str = time_str.lower()
|
|
451
|
+
for unit, multiplier in constants.TIME_UNITS.items():
|
|
452
|
+
if time_str.endswith(unit):
|
|
453
|
+
try:
|
|
454
|
+
value = float(time_str[:-len(unit)])
|
|
455
|
+
final_value = math.ceil(value * multiplier)
|
|
456
|
+
if final_value >= 0:
|
|
457
|
+
return final_value
|
|
458
|
+
except ValueError:
|
|
459
|
+
continue
|
|
460
|
+
|
|
461
|
+
raise ValueError(f'Invalid time format: {time}')
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def normalize_any_of_resources_config(
|
|
465
|
+
any_of: List[Dict[str, Any]]) -> Tuple[str, ...]:
|
|
466
|
+
"""Normalize a list of any_of resources config to a canonical form.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
any_of: A list of any_of resources config.
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
A normalized tuple representation that can be compared for equality.
|
|
473
|
+
Two lists with the same resource configurations in different orders
|
|
474
|
+
will produce the same normalized result.
|
|
475
|
+
"""
|
|
476
|
+
if not any_of:
|
|
477
|
+
return tuple()
|
|
478
|
+
|
|
479
|
+
# Convert each config to JSON string with sorted keys, then sort the list
|
|
480
|
+
normalized_configs = [
|
|
481
|
+
json.dumps(config, sort_keys=True, separators=(',', ':'))
|
|
482
|
+
for config in any_of
|
|
483
|
+
]
|
|
484
|
+
|
|
485
|
+
return tuple(sorted(normalized_configs))
|
sky/utils/rich_utils.py
CHANGED
|
@@ -1,28 +1,53 @@
|
|
|
1
1
|
"""Rich status spinner utils."""
|
|
2
2
|
import contextlib
|
|
3
|
+
import contextvars
|
|
3
4
|
import enum
|
|
4
5
|
import logging
|
|
5
6
|
import threading
|
|
6
7
|
import typing
|
|
7
|
-
from typing import
|
|
8
|
+
from typing import Callable, Iterator, Optional, Tuple, Union
|
|
8
9
|
|
|
10
|
+
from sky import exceptions
|
|
9
11
|
from sky.adaptors import common as adaptors_common
|
|
10
12
|
from sky.utils import annotations
|
|
13
|
+
from sky.utils import context
|
|
11
14
|
from sky.utils import message_utils
|
|
12
15
|
from sky.utils import rich_console_utils
|
|
13
16
|
|
|
14
17
|
if typing.TYPE_CHECKING:
|
|
18
|
+
import aiohttp
|
|
15
19
|
import requests
|
|
16
20
|
import rich.console as rich_console
|
|
17
21
|
else:
|
|
18
22
|
requests = adaptors_common.LazyImport('requests')
|
|
19
23
|
rich_console = adaptors_common.LazyImport('rich.console')
|
|
24
|
+
aiohttp = adaptors_common.LazyImport('aiohttp')
|
|
25
|
+
|
|
26
|
+
GeneralStatus = Union['rich_console.Status', 'EncodedStatus']
|
|
27
|
+
|
|
28
|
+
_client_status: Optional[GeneralStatus] = None
|
|
29
|
+
_server_status: contextvars.ContextVar[
|
|
30
|
+
Optional[GeneralStatus]] = contextvars.ContextVar('server_status',
|
|
31
|
+
default=None)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _get_client_status() -> Optional[GeneralStatus]:
|
|
35
|
+
return _client_status
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _get_server_status() -> Optional[GeneralStatus]:
|
|
39
|
+
return _server_status.get()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _set_client_status(status: Optional[GeneralStatus]):
|
|
43
|
+
global _client_status
|
|
44
|
+
_client_status = status
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _set_server_status(status: Optional[GeneralStatus]):
|
|
48
|
+
_server_status.set(status)
|
|
49
|
+
|
|
20
50
|
|
|
21
|
-
_statuses: Dict[str, Optional[Union['EncodedStatus',
|
|
22
|
-
'rich_console.Status']]] = {
|
|
23
|
-
'server': None,
|
|
24
|
-
'client': None,
|
|
25
|
-
}
|
|
26
51
|
_status_nesting_level = 0
|
|
27
52
|
|
|
28
53
|
_logging_lock = threading.RLock()
|
|
@@ -35,6 +60,8 @@ class Control(enum.Enum):
|
|
|
35
60
|
STOP = 'rich_stop'
|
|
36
61
|
EXIT = 'rich_exit'
|
|
37
62
|
UPDATE = 'rich_update'
|
|
63
|
+
HEARTBEAT = 'heartbeat'
|
|
64
|
+
RETRY = 'retry'
|
|
38
65
|
|
|
39
66
|
def encode(self, msg: str) -> str:
|
|
40
67
|
return f'<{self.value}>{msg}</{self.value}>'
|
|
@@ -128,20 +155,22 @@ class _NoOpConsoleStatus:
|
|
|
128
155
|
class _RevertibleStatus:
|
|
129
156
|
"""A wrapper for status that can revert to previous message after exit."""
|
|
130
157
|
|
|
131
|
-
def __init__(self, message: str,
|
|
158
|
+
def __init__(self, message: str, get_status_fn: Callable[[], GeneralStatus],
|
|
159
|
+
set_status_fn: Callable[[Optional[GeneralStatus]], None]):
|
|
132
160
|
self.previous_message = None
|
|
133
|
-
self.
|
|
134
|
-
|
|
161
|
+
self.get_status_fn = get_status_fn
|
|
162
|
+
self.set_status_fn = set_status_fn
|
|
163
|
+
status = self.get_status_fn()
|
|
135
164
|
if status is not None:
|
|
136
165
|
self.previous_message = status.status
|
|
137
166
|
self.message = message
|
|
138
167
|
|
|
139
168
|
def __enter__(self):
|
|
140
169
|
global _status_nesting_level
|
|
141
|
-
|
|
170
|
+
self.get_status_fn().update(self.message)
|
|
142
171
|
_status_nesting_level += 1
|
|
143
|
-
|
|
144
|
-
return
|
|
172
|
+
self.get_status_fn().__enter__()
|
|
173
|
+
return self.get_status_fn()
|
|
145
174
|
|
|
146
175
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
147
176
|
# We use the same lock with the `safe_logger` to avoid the following 2
|
|
@@ -160,32 +189,49 @@ class _RevertibleStatus:
|
|
|
160
189
|
_status_nesting_level -= 1
|
|
161
190
|
if _status_nesting_level <= 0:
|
|
162
191
|
_status_nesting_level = 0
|
|
163
|
-
if
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
_statuses[self.status_type] = None
|
|
192
|
+
if self.get_status_fn() is not None:
|
|
193
|
+
self.get_status_fn().__exit__(exc_type, exc_val, exc_tb)
|
|
194
|
+
self.set_status_fn(None)
|
|
167
195
|
else:
|
|
168
|
-
|
|
196
|
+
if self.previous_message is not None:
|
|
197
|
+
self.get_status_fn().update(self.previous_message)
|
|
169
198
|
|
|
170
199
|
def update(self, *args, **kwargs):
|
|
171
|
-
|
|
200
|
+
self.get_status_fn().update(*args, **kwargs)
|
|
172
201
|
|
|
173
202
|
def stop(self):
|
|
174
|
-
|
|
203
|
+
self.get_status_fn().stop()
|
|
175
204
|
|
|
176
205
|
def start(self):
|
|
177
|
-
|
|
206
|
+
self.get_status_fn().start()
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _is_thread_safe() -> bool:
|
|
210
|
+
"""Check if the current status context is thread-safe.
|
|
211
|
+
|
|
212
|
+
We are thread-safe if we are on the main thread or the server_status is
|
|
213
|
+
context-local, i.e. an async context has been initialized.
|
|
214
|
+
"""
|
|
215
|
+
return (threading.current_thread() is threading.main_thread() or
|
|
216
|
+
context.get() is not None)
|
|
178
217
|
|
|
179
218
|
|
|
180
219
|
def safe_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
|
|
181
|
-
"""A wrapper for multi-threaded console.status.
|
|
220
|
+
"""A wrapper for multi-threaded server-side console.status.
|
|
221
|
+
|
|
222
|
+
This function will encode rich status with control codes and output the
|
|
223
|
+
encoded string to stdout. Client-side decode control codes from server
|
|
224
|
+
output and update the rich status. This function is safe to be called in
|
|
225
|
+
async/multi-threaded context.
|
|
226
|
+
|
|
227
|
+
See also: :func:`client_status`, :class:`EncodedStatus`.
|
|
228
|
+
"""
|
|
182
229
|
from sky import sky_logging # pylint: disable=import-outside-toplevel
|
|
183
|
-
if (annotations.is_on_api_server and
|
|
184
|
-
threading.current_thread() is threading.main_thread() and
|
|
230
|
+
if (annotations.is_on_api_server and _is_thread_safe() and
|
|
185
231
|
not sky_logging.is_silent()):
|
|
186
|
-
if
|
|
187
|
-
|
|
188
|
-
return _RevertibleStatus(msg,
|
|
232
|
+
if _get_server_status() is None:
|
|
233
|
+
_set_server_status(EncodedStatus(msg))
|
|
234
|
+
return _RevertibleStatus(msg, _get_server_status, _set_server_status)
|
|
189
235
|
return _NoOpConsoleStatus()
|
|
190
236
|
|
|
191
237
|
|
|
@@ -196,29 +242,34 @@ def stop_safe_status():
|
|
|
196
242
|
stream logs from user program and do not want it to interfere with the
|
|
197
243
|
spinner display.
|
|
198
244
|
"""
|
|
199
|
-
if (
|
|
200
|
-
|
|
201
|
-
|
|
245
|
+
if _is_thread_safe():
|
|
246
|
+
return
|
|
247
|
+
server_status = _get_server_status()
|
|
248
|
+
if server_status is not None:
|
|
249
|
+
server_status.stop()
|
|
202
250
|
|
|
203
251
|
|
|
204
252
|
def force_update_status(msg: str):
|
|
205
253
|
"""Update the status message even if sky_logging.is_silent() is true."""
|
|
206
|
-
if
|
|
207
|
-
|
|
208
|
-
|
|
254
|
+
if not _is_thread_safe():
|
|
255
|
+
return
|
|
256
|
+
server_status = _get_server_status()
|
|
257
|
+
if server_status is not None:
|
|
258
|
+
server_status.update(msg)
|
|
209
259
|
|
|
210
260
|
|
|
211
261
|
@contextlib.contextmanager
|
|
212
262
|
def safe_logger():
|
|
213
263
|
with _logging_lock:
|
|
214
|
-
client_status_obj =
|
|
264
|
+
client_status_obj = _get_client_status()
|
|
215
265
|
|
|
216
266
|
client_status_live = (client_status_obj is not None and
|
|
267
|
+
hasattr(client_status_obj, '_live') and
|
|
217
268
|
client_status_obj._live.is_started) # pylint: disable=protected-access
|
|
218
|
-
if client_status_live:
|
|
269
|
+
if client_status_live and client_status_obj is not None:
|
|
219
270
|
client_status_obj.stop()
|
|
220
271
|
yield
|
|
221
|
-
if client_status_live:
|
|
272
|
+
if client_status_live and client_status_obj is not None:
|
|
222
273
|
client_status_obj.start()
|
|
223
274
|
|
|
224
275
|
|
|
@@ -230,13 +281,13 @@ class RichSafeStreamHandler(logging.StreamHandler):
|
|
|
230
281
|
|
|
231
282
|
|
|
232
283
|
def client_status(msg: str) -> Union['rich_console.Status', _NoOpConsoleStatus]:
|
|
233
|
-
"""A wrapper for multi-threaded console.status."""
|
|
284
|
+
"""A wrapper for multi-threaded client-side console.status."""
|
|
234
285
|
from sky import sky_logging # pylint: disable=import-outside-toplevel
|
|
235
286
|
if (threading.current_thread() is threading.main_thread() and
|
|
236
287
|
not sky_logging.is_silent()):
|
|
237
|
-
if
|
|
238
|
-
|
|
239
|
-
return _RevertibleStatus(msg,
|
|
288
|
+
if _get_client_status() is None:
|
|
289
|
+
_set_client_status(rich_console_utils.get_console().status(msg))
|
|
290
|
+
return _RevertibleStatus(msg, _get_client_status, _set_client_status)
|
|
240
291
|
return _NoOpConsoleStatus()
|
|
241
292
|
|
|
242
293
|
|
|
@@ -320,6 +371,9 @@ def decode_rich_status(
|
|
|
320
371
|
yield line
|
|
321
372
|
continue
|
|
322
373
|
|
|
374
|
+
if control == Control.RETRY:
|
|
375
|
+
raise exceptions.RequestInterruptedError(
|
|
376
|
+
'Streaming interrupted. Please retry.')
|
|
323
377
|
# control is not None, i.e. it is a rich status control message.
|
|
324
378
|
if threading.current_thread() is not threading.main_thread():
|
|
325
379
|
yield None
|
|
@@ -341,6 +395,130 @@ def decode_rich_status(
|
|
|
341
395
|
decoding_status.__exit__(None, None, None)
|
|
342
396
|
elif control == Control.START:
|
|
343
397
|
decoding_status.start()
|
|
398
|
+
elif control == Control.HEARTBEAT:
|
|
399
|
+
# Heartbeat is not displayed to the user, so we do not
|
|
400
|
+
# need to update the status.
|
|
401
|
+
pass
|
|
402
|
+
finally:
|
|
403
|
+
if decoding_status is not None:
|
|
404
|
+
decoding_status.__exit__(None, None, None)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
async def decode_rich_status_async(
|
|
408
|
+
response: 'aiohttp.ClientResponse'
|
|
409
|
+
) -> typing.AsyncIterator[Optional[str]]:
|
|
410
|
+
"""Async version of rich_utils.decode_rich_status that decodes rich status
|
|
411
|
+
messages from an aiohttp response.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
response: The aiohttp response.
|
|
415
|
+
|
|
416
|
+
Yields:
|
|
417
|
+
Optional[str]: Decoded lines or None for control messages.
|
|
418
|
+
"""
|
|
419
|
+
decoding_status = None
|
|
420
|
+
try:
|
|
421
|
+
last_line = ''
|
|
422
|
+
# Buffer to store incomplete UTF-8 bytes between chunks
|
|
423
|
+
undecoded_buffer = b''
|
|
424
|
+
|
|
425
|
+
# Iterate over the response content in chunks
|
|
426
|
+
async for chunk, _ in response.content.iter_chunks():
|
|
427
|
+
if chunk is None:
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
# Append the new chunk to any leftover bytes from previous iteration
|
|
431
|
+
current_bytes = undecoded_buffer + chunk
|
|
432
|
+
undecoded_buffer = b''
|
|
433
|
+
|
|
434
|
+
# Try to decode the combined bytes
|
|
435
|
+
try:
|
|
436
|
+
encoded_msg = current_bytes.decode('utf-8')
|
|
437
|
+
except UnicodeDecodeError as e:
|
|
438
|
+
# Check if this is potentially an incomplete sequence at the end
|
|
439
|
+
if e.start > 0:
|
|
440
|
+
# Decode the valid part
|
|
441
|
+
encoded_msg = current_bytes[:e.start].decode('utf-8')
|
|
442
|
+
|
|
443
|
+
# Check if the remaining bytes are likely a partial char
|
|
444
|
+
# or actually invalid UTF-8
|
|
445
|
+
remaining_bytes = current_bytes[e.start:]
|
|
446
|
+
if len(remaining_bytes) < 4: # Max UTF-8 char is 4 bytes
|
|
447
|
+
# Likely incomplete - save for next chunk
|
|
448
|
+
undecoded_buffer = remaining_bytes
|
|
449
|
+
else:
|
|
450
|
+
# Likely invalid - replace with replacement character
|
|
451
|
+
encoded_msg += remaining_bytes.decode('utf-8',
|
|
452
|
+
errors='replace')
|
|
453
|
+
undecoded_buffer = b''
|
|
454
|
+
else:
|
|
455
|
+
# Error at the very beginning of the buffer - invalid UTF-8
|
|
456
|
+
encoded_msg = current_bytes.decode('utf-8',
|
|
457
|
+
errors='replace')
|
|
458
|
+
undecoded_buffer = b''
|
|
459
|
+
|
|
460
|
+
lines = encoded_msg.splitlines(keepends=True)
|
|
461
|
+
|
|
462
|
+
# Skip processing if lines is empty to avoid IndexError
|
|
463
|
+
if not lines:
|
|
464
|
+
continue
|
|
465
|
+
|
|
466
|
+
lines[0] = last_line + lines[0]
|
|
467
|
+
last_line = lines[-1]
|
|
468
|
+
# If the last line is not ended with `\r` or `\n` (with ending
|
|
469
|
+
# spaces stripped), it means the last line is not a complete line.
|
|
470
|
+
# We keep the last line in the buffer and continue.
|
|
471
|
+
if (not last_line.strip(' ').endswith('\r') and
|
|
472
|
+
not last_line.strip(' ').endswith('\n')):
|
|
473
|
+
lines = lines[:-1]
|
|
474
|
+
else:
|
|
475
|
+
# Reset the buffer for the next line, as the last line is a
|
|
476
|
+
# complete line.
|
|
477
|
+
last_line = ''
|
|
478
|
+
|
|
479
|
+
for line in lines:
|
|
480
|
+
if line.endswith('\r\n'):
|
|
481
|
+
# Replace `\r\n` with `\n`, as printing a line ends with
|
|
482
|
+
# `\r\n` in linux will cause the line to be empty.
|
|
483
|
+
line = line[:-2] + '\n'
|
|
484
|
+
is_payload, line = message_utils.decode_payload(
|
|
485
|
+
line, raise_for_mismatch=False)
|
|
486
|
+
if line is None:
|
|
487
|
+
continue
|
|
488
|
+
control = None
|
|
489
|
+
if is_payload:
|
|
490
|
+
control, encoded_status = Control.decode(line)
|
|
491
|
+
if control is None:
|
|
492
|
+
yield line
|
|
493
|
+
continue
|
|
494
|
+
|
|
495
|
+
if control == Control.RETRY:
|
|
496
|
+
raise exceptions.RequestInterruptedError(
|
|
497
|
+
'Streaming interrupted. Please retry.')
|
|
498
|
+
# control is not None, i.e. it is a rich status control message.
|
|
499
|
+
# In async context, we'll handle rich status controls normally
|
|
500
|
+
# since async typically runs in main thread
|
|
501
|
+
if control == Control.INIT:
|
|
502
|
+
decoding_status = client_status(encoded_status)
|
|
503
|
+
else:
|
|
504
|
+
if decoding_status is None:
|
|
505
|
+
# status may not be initialized if a user use --tail for
|
|
506
|
+
# sky api logs.
|
|
507
|
+
continue
|
|
508
|
+
assert decoding_status is not None, (
|
|
509
|
+
f'Rich status not initialized: {line}')
|
|
510
|
+
if control == Control.UPDATE:
|
|
511
|
+
decoding_status.update(encoded_status)
|
|
512
|
+
elif control == Control.STOP:
|
|
513
|
+
decoding_status.stop()
|
|
514
|
+
elif control == Control.EXIT:
|
|
515
|
+
decoding_status.__exit__(None, None, None)
|
|
516
|
+
elif control == Control.START:
|
|
517
|
+
decoding_status.start()
|
|
518
|
+
elif control == Control.HEARTBEAT:
|
|
519
|
+
# Heartbeat is not displayed to the user, so we do not
|
|
520
|
+
# need to update the status.
|
|
521
|
+
pass
|
|
344
522
|
finally:
|
|
345
523
|
if decoding_status is not None:
|
|
346
524
|
decoding_status.__exit__(None, None, None)
|