skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/clouds/aws.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Amazon Web Services."""
|
|
2
2
|
import enum
|
|
3
3
|
import fnmatch
|
|
4
|
+
import functools
|
|
4
5
|
import hashlib
|
|
5
6
|
import json
|
|
6
7
|
import os
|
|
@@ -8,16 +9,20 @@ import re
|
|
|
8
9
|
import subprocess
|
|
9
10
|
import time
|
|
10
11
|
import typing
|
|
11
|
-
from typing import Any, Dict, Iterator, List, Optional, Set,
|
|
12
|
+
from typing import (Any, Callable, Dict, Iterator, List, Literal, Optional, Set,
|
|
13
|
+
Tuple, TypeVar, Union)
|
|
12
14
|
|
|
15
|
+
from typing_extensions import ParamSpec
|
|
16
|
+
|
|
17
|
+
from sky import catalog
|
|
13
18
|
from sky import clouds
|
|
14
19
|
from sky import exceptions
|
|
15
20
|
from sky import provision as provision_lib
|
|
16
21
|
from sky import sky_logging
|
|
17
22
|
from sky import skypilot_config
|
|
18
23
|
from sky.adaptors import aws
|
|
19
|
-
from sky.
|
|
20
|
-
from sky.
|
|
24
|
+
from sky.adaptors import common
|
|
25
|
+
from sky.catalog import common as catalog_common
|
|
21
26
|
from sky.clouds.utils import aws_utils
|
|
22
27
|
from sky.skylet import constants
|
|
23
28
|
from sky.utils import annotations
|
|
@@ -32,14 +37,17 @@ if typing.TYPE_CHECKING:
|
|
|
32
37
|
# renaming to avoid shadowing variables
|
|
33
38
|
from sky import resources as resources_lib
|
|
34
39
|
from sky.utils import status_lib
|
|
40
|
+
from sky.utils import volume as volume_lib
|
|
35
41
|
|
|
36
42
|
logger = sky_logging.init_logger(__name__)
|
|
37
43
|
|
|
38
44
|
# Image ID tags
|
|
39
45
|
_DEFAULT_CPU_IMAGE_ID = 'skypilot:custom-cpu-ubuntu'
|
|
46
|
+
_DEFAULT_CPU_ARM64_IMAGE_ID = 'skypilot:custom-cpu-ubuntu-arm64'
|
|
40
47
|
# For GPU-related package version,
|
|
41
|
-
# see sky/
|
|
48
|
+
# see sky/catalog/images/provisioners/cuda.sh
|
|
42
49
|
_DEFAULT_GPU_IMAGE_ID = 'skypilot:custom-gpu-ubuntu'
|
|
50
|
+
_DEFAULT_GPU_ARM64_IMAGE_ID = 'skypilot:custom-gpu-ubuntu-arm64'
|
|
43
51
|
_DEFAULT_GPU_K80_IMAGE_ID = 'skypilot:k80-ubuntu-2004'
|
|
44
52
|
_DEFAULT_NEURON_IMAGE_ID = 'skypilot:neuron-ubuntu-2204'
|
|
45
53
|
|
|
@@ -64,6 +72,8 @@ _CREDENTIAL_FILES = [
|
|
|
64
72
|
]
|
|
65
73
|
|
|
66
74
|
DEFAULT_AMI_GB = 45
|
|
75
|
+
DEFAULT_SSH_USER = 'ubuntu'
|
|
76
|
+
DEFAULT_ROOT_DEVICE_NAME = '/dev/sda1'
|
|
67
77
|
|
|
68
78
|
# Temporary measure, as deleting per-cluster SGs is too slow.
|
|
69
79
|
# See https://github.com/skypilot-org/skypilot/pull/742.
|
|
@@ -74,6 +84,136 @@ DEFAULT_SECURITY_GROUP_NAME = f'sky-sg-{common_utils.user_and_hostname_hash()}'
|
|
|
74
84
|
# Security group to use when user specified ports in their resources.
|
|
75
85
|
USER_PORTS_SECURITY_GROUP_NAME = 'sky-sg-{}'
|
|
76
86
|
|
|
87
|
+
# GPU instance types that support EFA
|
|
88
|
+
# TODO(hailong): Some CPU instance types also support EFA, may need to support
|
|
89
|
+
# all of them later.
|
|
90
|
+
# TODO(hailong): Add the EFA info in catalog.
|
|
91
|
+
_EFA_INSTANCE_TYPE_PREFIXES = [
|
|
92
|
+
'g4dn.',
|
|
93
|
+
'g5.',
|
|
94
|
+
'g6.',
|
|
95
|
+
'gr6.',
|
|
96
|
+
'g6e.',
|
|
97
|
+
'p4d.',
|
|
98
|
+
'p4de.',
|
|
99
|
+
'p5.',
|
|
100
|
+
'p5e.',
|
|
101
|
+
'p5en.',
|
|
102
|
+
'p6-b200.',
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
# Docker run options for EFA.
|
|
106
|
+
# Refer to https://github.com/ofiwg/libfabric/issues/6437 for updating
|
|
107
|
+
# memlock ulimit
|
|
108
|
+
_EFA_DOCKER_RUN_OPTIONS = [
|
|
109
|
+
'--cap-add=IPC_LOCK',
|
|
110
|
+
'--device=/dev/infiniband',
|
|
111
|
+
'--ulimit memlock=-1:-1',
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
# AWS EFA image name.
|
|
115
|
+
# Refer to https://docs.aws.amazon.com/dlami/latest/devguide/aws-deep-learning-base-gpu-ami-ubuntu-22-04.html for latest version. # pylint: disable=line-too-long
|
|
116
|
+
# TODO(hailong): may need to update the version later.
|
|
117
|
+
_EFA_IMAGE_NAME = 'Deep Learning Base OSS Nvidia Driver GPU AMI' \
|
|
118
|
+
' (Ubuntu 22.04) 20250808'
|
|
119
|
+
|
|
120
|
+
# For functions that needs caching per AWS profile.
|
|
121
|
+
_AWS_PROFILE_SCOPED_FUNC_CACHE_SIZE = 5
|
|
122
|
+
|
|
123
|
+
T = TypeVar('T')
|
|
124
|
+
P = ParamSpec('P')
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def aws_profile_aware_lru_cache(*lru_cache_args,
|
|
128
|
+
scope: Literal['global', 'request'] = 'request',
|
|
129
|
+
**lru_cache_kwargs) -> Callable:
|
|
130
|
+
"""Similar to annotations.lru_cache, but automatically includes the
|
|
131
|
+
AWS profile (if set in the workspace config) in the cache key.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
def decorator(func: Callable[P, T]) -> Callable[P, T]:
|
|
135
|
+
|
|
136
|
+
@annotations.lru_cache(scope, *lru_cache_args, **lru_cache_kwargs)
|
|
137
|
+
def cached_impl(aws_profile, *args, **kwargs):
|
|
138
|
+
del aws_profile # Only used as part of the cache key.
|
|
139
|
+
return func(*args, **kwargs)
|
|
140
|
+
|
|
141
|
+
@functools.wraps(func)
|
|
142
|
+
def wrapper(*args, **kwargs):
|
|
143
|
+
aws_profile = aws.get_workspace_profile()
|
|
144
|
+
return cached_impl(aws_profile, *args, **kwargs)
|
|
145
|
+
|
|
146
|
+
wrapper.cache_clear = cached_impl.cache_clear # type: ignore[attr-defined]
|
|
147
|
+
return wrapper
|
|
148
|
+
|
|
149
|
+
return decorator
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _is_efa_instance_type(instance_type: str) -> bool:
|
|
153
|
+
"""Check if the instance type is in EFA supported instance family."""
|
|
154
|
+
return any(
|
|
155
|
+
instance_type.startswith(prefix)
|
|
156
|
+
for prefix in _EFA_INSTANCE_TYPE_PREFIXES)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@annotations.lru_cache(scope='global', maxsize=128)
|
|
160
|
+
def _get_efa_image_id(region_name: str) -> Optional[str]:
|
|
161
|
+
"""Get the EFA image id for the given region."""
|
|
162
|
+
try:
|
|
163
|
+
client = aws.client('ec2', region_name=region_name)
|
|
164
|
+
response = client.describe_images(Filters=[{
|
|
165
|
+
'Name': 'name',
|
|
166
|
+
'Values': [_EFA_IMAGE_NAME]
|
|
167
|
+
}])
|
|
168
|
+
if 'Images' not in response:
|
|
169
|
+
return None
|
|
170
|
+
if len(response['Images']) == 0:
|
|
171
|
+
return None
|
|
172
|
+
available_images = [
|
|
173
|
+
img for img in response['Images'] if img['State'] == 'available'
|
|
174
|
+
]
|
|
175
|
+
if len(available_images) == 0:
|
|
176
|
+
return None
|
|
177
|
+
sorted_images = sorted(available_images,
|
|
178
|
+
key=lambda x: x['CreationDate'],
|
|
179
|
+
reverse=True)
|
|
180
|
+
return sorted_images[0]['ImageId']
|
|
181
|
+
except (aws.botocore_exceptions().NoCredentialsError,
|
|
182
|
+
aws.botocore_exceptions().ProfileNotFound,
|
|
183
|
+
aws.botocore_exceptions().ClientError) as e:
|
|
184
|
+
with ux_utils.print_exception_no_traceback():
|
|
185
|
+
raise ValueError(f'Failed to get EFA image id: {e}') from None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@annotations.lru_cache(scope='global', maxsize=128)
|
|
189
|
+
def _get_max_efa_interfaces(instance_type: str, region_name: str) -> int:
|
|
190
|
+
"""Get the maximum number of EFA interfaces for the given instance type."""
|
|
191
|
+
if not _is_efa_instance_type(instance_type):
|
|
192
|
+
return 0
|
|
193
|
+
try:
|
|
194
|
+
client = aws.client('ec2', region_name=region_name)
|
|
195
|
+
response = client.describe_instance_types(
|
|
196
|
+
# TODO(cooperc): fix the types for mypy 1.16
|
|
197
|
+
# Boto3 type stubs expect Literal instance types; using str list here.
|
|
198
|
+
InstanceTypes=[instance_type], # type: ignore
|
|
199
|
+
Filters=[{
|
|
200
|
+
'Name': 'network-info.efa-supported',
|
|
201
|
+
'Values': ['true']
|
|
202
|
+
}])
|
|
203
|
+
if 'InstanceTypes' in response and len(response['InstanceTypes']) > 0:
|
|
204
|
+
network_info = response['InstanceTypes'][0]['NetworkInfo']
|
|
205
|
+
if ('EfaInfo' in network_info and
|
|
206
|
+
'MaximumEfaInterfaces' in network_info['EfaInfo']):
|
|
207
|
+
return network_info['EfaInfo']['MaximumEfaInterfaces']
|
|
208
|
+
return 0
|
|
209
|
+
except (aws.botocore_exceptions().NoCredentialsError,
|
|
210
|
+
aws.botocore_exceptions().ProfileNotFound,
|
|
211
|
+
aws.botocore_exceptions().ClientError) as e:
|
|
212
|
+
with ux_utils.print_exception_no_traceback():
|
|
213
|
+
raise ValueError(
|
|
214
|
+
f'Failed to get max EFA interfaces for {instance_type}: {e}'
|
|
215
|
+
) from None
|
|
216
|
+
|
|
77
217
|
|
|
78
218
|
class AWSIdentityType(enum.Enum):
|
|
79
219
|
"""AWS identity type.
|
|
@@ -159,7 +299,9 @@ class AWS(clouds.Cloud):
|
|
|
159
299
|
|
|
160
300
|
@classmethod
|
|
161
301
|
def _unsupported_features_for_resources(
|
|
162
|
-
cls,
|
|
302
|
+
cls,
|
|
303
|
+
resources: 'resources_lib.Resources',
|
|
304
|
+
region: Optional[str] = None,
|
|
163
305
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
|
164
306
|
unsupported_features = {}
|
|
165
307
|
if resources.use_spot:
|
|
@@ -173,6 +315,11 @@ class AWS(clouds.Cloud):
|
|
|
173
315
|
f'High availability controllers are not supported on {cls._REPR}.'
|
|
174
316
|
)
|
|
175
317
|
|
|
318
|
+
unsupported_features[
|
|
319
|
+
clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK] = (
|
|
320
|
+
f'Customized multiple network interfaces are not supported on {cls._REPR}.'
|
|
321
|
+
)
|
|
322
|
+
|
|
176
323
|
return unsupported_features
|
|
177
324
|
|
|
178
325
|
@classmethod
|
|
@@ -196,12 +343,17 @@ class AWS(clouds.Cloud):
|
|
|
196
343
|
#### Regions/Zones ####
|
|
197
344
|
|
|
198
345
|
@classmethod
|
|
199
|
-
def regions_with_offering(
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
346
|
+
def regions_with_offering(
|
|
347
|
+
cls,
|
|
348
|
+
instance_type: str,
|
|
349
|
+
accelerators: Optional[Dict[str, int]],
|
|
350
|
+
use_spot: bool,
|
|
351
|
+
region: Optional[str],
|
|
352
|
+
zone: Optional[str],
|
|
353
|
+
resources: Optional['resources_lib.Resources'] = None,
|
|
354
|
+
) -> List[clouds.Region]:
|
|
203
355
|
del accelerators # unused
|
|
204
|
-
regions =
|
|
356
|
+
regions = catalog.get_region_zones_for_instance_type(
|
|
205
357
|
instance_type, use_spot, 'aws')
|
|
206
358
|
|
|
207
359
|
if region is not None:
|
|
@@ -256,19 +408,29 @@ class AWS(clouds.Cloud):
|
|
|
256
408
|
@classmethod
|
|
257
409
|
def _get_default_ami(cls, region_name: str, instance_type: str) -> str:
|
|
258
410
|
acc = cls.get_accelerators_from_instance_type(instance_type)
|
|
259
|
-
|
|
411
|
+
arch = cls.get_arch_from_instance_type(instance_type)
|
|
412
|
+
if arch == constants.ARM64_ARCH:
|
|
413
|
+
image_id = catalog.get_image_id_from_tag(
|
|
414
|
+
_DEFAULT_CPU_ARM64_IMAGE_ID, region_name, clouds='aws')
|
|
415
|
+
else:
|
|
416
|
+
image_id = catalog.get_image_id_from_tag(_DEFAULT_CPU_IMAGE_ID,
|
|
417
|
+
region_name,
|
|
418
|
+
clouds='aws')
|
|
419
|
+
if acc is not None:
|
|
420
|
+
if arch == constants.ARM64_ARCH:
|
|
421
|
+
image_id = catalog.get_image_id_from_tag(
|
|
422
|
+
_DEFAULT_GPU_ARM64_IMAGE_ID, region_name, clouds='aws')
|
|
423
|
+
else:
|
|
424
|
+
image_id = catalog.get_image_id_from_tag(_DEFAULT_GPU_IMAGE_ID,
|
|
260
425
|
region_name,
|
|
261
426
|
clouds='aws')
|
|
262
|
-
if acc is not None:
|
|
263
|
-
image_id = service_catalog.get_image_id_from_tag(
|
|
264
|
-
_DEFAULT_GPU_IMAGE_ID, region_name, clouds='aws')
|
|
265
427
|
assert len(acc) == 1, acc
|
|
266
428
|
acc_name = list(acc.keys())[0]
|
|
267
429
|
if acc_name == 'K80':
|
|
268
|
-
image_id =
|
|
430
|
+
image_id = catalog.get_image_id_from_tag(
|
|
269
431
|
_DEFAULT_GPU_K80_IMAGE_ID, region_name, clouds='aws')
|
|
270
432
|
if acc_name in ['Trainium', 'Inferentia']:
|
|
271
|
-
image_id =
|
|
433
|
+
image_id = catalog.get_image_id_from_tag(
|
|
272
434
|
_DEFAULT_NEURON_IMAGE_ID, region_name, clouds='aws')
|
|
273
435
|
if image_id is not None:
|
|
274
436
|
return image_id
|
|
@@ -286,8 +448,13 @@ class AWS(clouds.Cloud):
|
|
|
286
448
|
image_id: Optional[Dict[Optional[str], str]],
|
|
287
449
|
region_name: str,
|
|
288
450
|
instance_type: str,
|
|
451
|
+
enable_efa: bool,
|
|
289
452
|
) -> str:
|
|
290
453
|
if image_id is None:
|
|
454
|
+
if enable_efa:
|
|
455
|
+
efa_image_id = _get_efa_image_id(region_name)
|
|
456
|
+
if efa_image_id:
|
|
457
|
+
return efa_image_id
|
|
291
458
|
return cls._get_default_ami(region_name, instance_type)
|
|
292
459
|
if None in image_id:
|
|
293
460
|
image_id_str = image_id[None]
|
|
@@ -295,9 +462,9 @@ class AWS(clouds.Cloud):
|
|
|
295
462
|
assert region_name in image_id, image_id
|
|
296
463
|
image_id_str = image_id[region_name]
|
|
297
464
|
if image_id_str.startswith('skypilot:'):
|
|
298
|
-
image_id_str =
|
|
299
|
-
|
|
300
|
-
|
|
465
|
+
image_id_str = catalog.get_image_id_from_tag(image_id_str,
|
|
466
|
+
region_name,
|
|
467
|
+
clouds='aws')
|
|
301
468
|
if image_id_str is None:
|
|
302
469
|
# Raise ResourcesUnavailableError to make sure the failover
|
|
303
470
|
# in CloudVMRayBackend will be correctly triggered.
|
|
@@ -336,6 +503,45 @@ class AWS(clouds.Cloud):
|
|
|
336
503
|
raise ValueError(image_not_found_message) from None
|
|
337
504
|
return image_size
|
|
338
505
|
|
|
506
|
+
@classmethod
|
|
507
|
+
@aws_profile_aware_lru_cache(scope='request',
|
|
508
|
+
maxsize=_AWS_PROFILE_SCOPED_FUNC_CACHE_SIZE)
|
|
509
|
+
def get_image_root_device_name(cls, image_id: str,
|
|
510
|
+
region: Optional[str]) -> str:
|
|
511
|
+
if image_id.startswith('skypilot:'):
|
|
512
|
+
return DEFAULT_ROOT_DEVICE_NAME
|
|
513
|
+
assert region is not None, (image_id, region)
|
|
514
|
+
image_not_found_message = (
|
|
515
|
+
f'Image {image_id!r} not found in AWS region {region}.\n'
|
|
516
|
+
f'To find AWS AMI IDs: https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-images.html#examples\n' # pylint: disable=line-too-long
|
|
517
|
+
'Example: ami-0729d913a335efca7')
|
|
518
|
+
try:
|
|
519
|
+
client = aws.client('ec2', region_name=region)
|
|
520
|
+
image_info = client.describe_images(ImageIds=[image_id]).get(
|
|
521
|
+
'Images', [])
|
|
522
|
+
if not image_info:
|
|
523
|
+
with ux_utils.print_exception_no_traceback():
|
|
524
|
+
raise ValueError(image_not_found_message)
|
|
525
|
+
image = image_info[0]
|
|
526
|
+
if 'RootDeviceName' not in image:
|
|
527
|
+
logger.warning(f'Image {image_id!r} does not have a root '
|
|
528
|
+
f'device name. '
|
|
529
|
+
f'Using {DEFAULT_ROOT_DEVICE_NAME}.')
|
|
530
|
+
return DEFAULT_ROOT_DEVICE_NAME
|
|
531
|
+
return image['RootDeviceName']
|
|
532
|
+
except (aws.botocore_exceptions().NoCredentialsError,
|
|
533
|
+
aws.botocore_exceptions().ProfileNotFound):
|
|
534
|
+
# Fallback to default root device name if no credentials are
|
|
535
|
+
# available.
|
|
536
|
+
# The credentials issue will be caught when actually provisioning
|
|
537
|
+
# the instance and appropriate errors will be raised there.
|
|
538
|
+
logger.warning(f'No credentials available for region {region}. '
|
|
539
|
+
f'Using {DEFAULT_ROOT_DEVICE_NAME}.')
|
|
540
|
+
return DEFAULT_ROOT_DEVICE_NAME
|
|
541
|
+
except aws.botocore_exceptions().ClientError:
|
|
542
|
+
with ux_utils.print_exception_no_traceback():
|
|
543
|
+
raise ValueError(image_not_found_message) from None
|
|
544
|
+
|
|
339
545
|
@classmethod
|
|
340
546
|
def get_zone_shell_cmd(cls) -> Optional[str]:
|
|
341
547
|
# The command for getting the current zone is from:
|
|
@@ -356,11 +562,11 @@ class AWS(clouds.Cloud):
|
|
|
356
562
|
use_spot: bool,
|
|
357
563
|
region: Optional[str] = None,
|
|
358
564
|
zone: Optional[str] = None) -> float:
|
|
359
|
-
return
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
565
|
+
return catalog.get_hourly_cost(instance_type,
|
|
566
|
+
use_spot=use_spot,
|
|
567
|
+
region=region,
|
|
568
|
+
zone=zone,
|
|
569
|
+
clouds='aws')
|
|
364
570
|
|
|
365
571
|
def accelerators_to_hourly_cost(self,
|
|
366
572
|
accelerators: Dict[str, int],
|
|
@@ -397,16 +603,19 @@ class AWS(clouds.Cloud):
|
|
|
397
603
|
return cost
|
|
398
604
|
|
|
399
605
|
@classmethod
|
|
400
|
-
def get_default_instance_type(
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
606
|
+
def get_default_instance_type(cls,
|
|
607
|
+
cpus: Optional[str] = None,
|
|
608
|
+
memory: Optional[str] = None,
|
|
609
|
+
disk_tier: Optional[
|
|
610
|
+
resources_utils.DiskTier] = None,
|
|
611
|
+
region: Optional[str] = None,
|
|
612
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
613
|
+
return catalog.get_default_instance_type(cpus=cpus,
|
|
614
|
+
memory=memory,
|
|
615
|
+
disk_tier=disk_tier,
|
|
616
|
+
region=region,
|
|
617
|
+
zone=zone,
|
|
618
|
+
clouds='aws')
|
|
410
619
|
|
|
411
620
|
# TODO: factor the following three methods, as they are the same logic
|
|
412
621
|
# between Azure and AWS.
|
|
@@ -415,48 +624,86 @@ class AWS(clouds.Cloud):
|
|
|
415
624
|
cls,
|
|
416
625
|
instance_type: str,
|
|
417
626
|
) -> Optional[Dict[str, Union[int, float]]]:
|
|
418
|
-
return
|
|
419
|
-
|
|
627
|
+
return catalog.get_accelerators_from_instance_type(instance_type,
|
|
628
|
+
clouds='aws')
|
|
629
|
+
|
|
630
|
+
@classmethod
|
|
631
|
+
def get_arch_from_instance_type(
|
|
632
|
+
cls,
|
|
633
|
+
instance_type: str,
|
|
634
|
+
) -> Optional[str]:
|
|
635
|
+
return catalog.get_arch_from_instance_type(instance_type, clouds='aws')
|
|
420
636
|
|
|
421
637
|
@classmethod
|
|
422
638
|
def get_vcpus_mem_from_instance_type(
|
|
423
639
|
cls,
|
|
424
640
|
instance_type: str,
|
|
425
641
|
) -> Tuple[Optional[float], Optional[float]]:
|
|
426
|
-
return
|
|
427
|
-
|
|
642
|
+
return catalog.get_vcpus_mem_from_instance_type(instance_type,
|
|
643
|
+
clouds='aws')
|
|
428
644
|
|
|
429
645
|
def make_deploy_resources_variables(
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
646
|
+
self,
|
|
647
|
+
resources: 'resources_lib.Resources',
|
|
648
|
+
cluster_name: resources_utils.ClusterName,
|
|
649
|
+
region: 'clouds.Region',
|
|
650
|
+
zones: Optional[List['clouds.Zone']],
|
|
651
|
+
num_nodes: int,
|
|
652
|
+
dryrun: bool = False,
|
|
653
|
+
volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
|
|
654
|
+
) -> Dict[str, Any]:
|
|
437
655
|
del dryrun # unused
|
|
438
656
|
assert zones is not None, (region, zones)
|
|
439
657
|
|
|
440
658
|
region_name = region.name
|
|
441
659
|
zone_names = [zone.name for zone in zones]
|
|
442
660
|
|
|
443
|
-
|
|
444
|
-
#
|
|
445
|
-
acc_dict = self.get_accelerators_from_instance_type(
|
|
661
|
+
resources = resources.assert_launchable()
|
|
662
|
+
# resources.accelerators is cleared but .instance_type encodes the info.
|
|
663
|
+
acc_dict = self.get_accelerators_from_instance_type(
|
|
664
|
+
resources.instance_type)
|
|
446
665
|
custom_resources = resources_utils.make_ray_custom_resources_str(
|
|
447
666
|
acc_dict)
|
|
448
667
|
|
|
449
|
-
|
|
668
|
+
network_tier = (resources.network_tier if resources.network_tier
|
|
669
|
+
is not None else resources_utils.NetworkTier.STANDARD)
|
|
670
|
+
if network_tier == resources_utils.NetworkTier.BEST:
|
|
671
|
+
max_efa_interfaces = _get_max_efa_interfaces(
|
|
672
|
+
resources.instance_type, region_name)
|
|
673
|
+
enable_efa = max_efa_interfaces > 0
|
|
674
|
+
else:
|
|
675
|
+
max_efa_interfaces = 0
|
|
676
|
+
enable_efa = False
|
|
677
|
+
|
|
678
|
+
docker_run_options = []
|
|
679
|
+
if resources.extract_docker_image() is not None:
|
|
450
680
|
image_id_to_use = None
|
|
681
|
+
if enable_efa:
|
|
682
|
+
docker_run_options = _EFA_DOCKER_RUN_OPTIONS
|
|
451
683
|
else:
|
|
452
|
-
image_id_to_use =
|
|
684
|
+
image_id_to_use = resources.image_id
|
|
453
685
|
image_id = self._get_image_id(image_id_to_use, region_name,
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
686
|
+
resources.instance_type, enable_efa)
|
|
687
|
+
|
|
688
|
+
root_device_name = self.get_image_root_device_name(
|
|
689
|
+
image_id, region_name)
|
|
690
|
+
|
|
691
|
+
ssh_user = skypilot_config.get_effective_region_config(
|
|
692
|
+
cloud='aws',
|
|
693
|
+
region=region_name,
|
|
694
|
+
keys=('ssh_user',),
|
|
695
|
+
default_value=DEFAULT_SSH_USER)
|
|
696
|
+
|
|
697
|
+
disk_encrypted = skypilot_config.get_effective_region_config(
|
|
698
|
+
cloud='aws',
|
|
699
|
+
region=region_name,
|
|
700
|
+
keys=('disk_encrypted',),
|
|
701
|
+
default_value=False)
|
|
702
|
+
user_security_group_config = skypilot_config.get_effective_region_config(
|
|
703
|
+
cloud='aws',
|
|
704
|
+
region=region_name,
|
|
705
|
+
keys=('security_group_name',),
|
|
706
|
+
default_value=None)
|
|
460
707
|
user_security_group = None
|
|
461
708
|
if isinstance(user_security_group_config, str):
|
|
462
709
|
user_security_group = user_security_group_config
|
|
@@ -483,17 +730,21 @@ class AWS(clouds.Cloud):
|
|
|
483
730
|
'in `~/.sky/config.yaml`.')
|
|
484
731
|
|
|
485
732
|
return {
|
|
486
|
-
'instance_type':
|
|
733
|
+
'instance_type': resources.instance_type,
|
|
487
734
|
'custom_resources': custom_resources,
|
|
488
735
|
'disk_encrypted': disk_encrypted,
|
|
489
|
-
'use_spot':
|
|
736
|
+
'use_spot': resources.use_spot,
|
|
490
737
|
'region': region_name,
|
|
491
738
|
'zones': ','.join(zone_names),
|
|
492
739
|
'image_id': image_id,
|
|
740
|
+
'root_device_name': root_device_name,
|
|
741
|
+
'ssh_user': ssh_user,
|
|
493
742
|
'security_group': security_group,
|
|
494
743
|
'security_group_managed_by_skypilot':
|
|
495
744
|
str(security_group != user_security_group).lower(),
|
|
496
|
-
|
|
745
|
+
'max_efa_interfaces': max_efa_interfaces,
|
|
746
|
+
'docker_run_options': docker_run_options,
|
|
747
|
+
**AWS._get_disk_specs(resources.disk_tier)
|
|
497
748
|
}
|
|
498
749
|
|
|
499
750
|
def _get_feasible_launchable_resources(
|
|
@@ -538,7 +789,9 @@ class AWS(clouds.Cloud):
|
|
|
538
789
|
default_instance_type = AWS.get_default_instance_type(
|
|
539
790
|
cpus=resources.cpus,
|
|
540
791
|
memory=resources.memory,
|
|
541
|
-
disk_tier=resources.disk_tier
|
|
792
|
+
disk_tier=resources.disk_tier,
|
|
793
|
+
region=resources.region,
|
|
794
|
+
zone=resources.zone)
|
|
542
795
|
if default_instance_type is None:
|
|
543
796
|
return resources_utils.FeasibleResources([], [], None)
|
|
544
797
|
else:
|
|
@@ -547,16 +800,16 @@ class AWS(clouds.Cloud):
|
|
|
547
800
|
|
|
548
801
|
assert len(accelerators) == 1, resources
|
|
549
802
|
acc, acc_count = list(accelerators.items())[0]
|
|
550
|
-
(instance_list,
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
803
|
+
(instance_list,
|
|
804
|
+
fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
|
|
805
|
+
acc,
|
|
806
|
+
acc_count,
|
|
807
|
+
use_spot=resources.use_spot,
|
|
808
|
+
cpus=resources.cpus,
|
|
809
|
+
memory=resources.memory,
|
|
810
|
+
region=resources.region,
|
|
811
|
+
zone=resources.zone,
|
|
812
|
+
clouds='aws')
|
|
560
813
|
if instance_list is None:
|
|
561
814
|
return resources_utils.FeasibleResources([], fuzzy_candidate_list,
|
|
562
815
|
None)
|
|
@@ -564,20 +817,23 @@ class AWS(clouds.Cloud):
|
|
|
564
817
|
fuzzy_candidate_list, None)
|
|
565
818
|
|
|
566
819
|
@classmethod
|
|
567
|
-
def _check_compute_credentials(
|
|
820
|
+
def _check_compute_credentials(
|
|
821
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
|
568
822
|
"""Checks if the user has access credentials to this AWS's compute service."""
|
|
569
823
|
return cls._check_credentials()
|
|
570
824
|
|
|
571
825
|
@classmethod
|
|
572
|
-
def _check_storage_credentials(
|
|
826
|
+
def _check_storage_credentials(
|
|
827
|
+
cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
|
|
573
828
|
"""Checks if the user has access credentials to this AWS's storage service."""
|
|
574
829
|
# TODO(seungjin): Implement separate check for
|
|
575
830
|
# if the user has access to S3.
|
|
576
831
|
return cls._check_credentials()
|
|
577
832
|
|
|
578
833
|
@classmethod
|
|
579
|
-
|
|
580
|
-
|
|
834
|
+
# Cache since getting identity is slow.
|
|
835
|
+
@aws_profile_aware_lru_cache(scope='request',
|
|
836
|
+
maxsize=_AWS_PROFILE_SCOPED_FUNC_CACHE_SIZE)
|
|
581
837
|
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
|
582
838
|
"""Checks if the user has access credentials to AWS."""
|
|
583
839
|
|
|
@@ -596,12 +852,9 @@ class AWS(clouds.Cloud):
|
|
|
596
852
|
stderr=subprocess.PIPE)
|
|
597
853
|
if proc.returncode != 0:
|
|
598
854
|
return False, dependency_installation_hints
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
import boto3
|
|
603
|
-
import botocore
|
|
604
|
-
except ImportError:
|
|
855
|
+
|
|
856
|
+
# Checks if aws boto is installed properly
|
|
857
|
+
if not common.can_import_modules(['boto3', 'botocore']):
|
|
605
858
|
return False, dependency_installation_hints
|
|
606
859
|
|
|
607
860
|
# Checks if AWS credentials 1) exist and 2) are valid.
|
|
@@ -668,7 +921,7 @@ class AWS(clouds.Cloud):
|
|
|
668
921
|
|
|
669
922
|
# Fetch the AWS catalogs
|
|
670
923
|
# pylint: disable=import-outside-toplevel
|
|
671
|
-
from sky.
|
|
924
|
+
from sky.catalog import aws_catalog
|
|
672
925
|
|
|
673
926
|
# Trigger the fetch of the availability zones mapping.
|
|
674
927
|
try:
|
|
@@ -715,20 +968,28 @@ class AWS(clouds.Cloud):
|
|
|
715
968
|
return AWSIdentityType.SHARED_CREDENTIALS_FILE
|
|
716
969
|
|
|
717
970
|
@classmethod
|
|
718
|
-
@
|
|
971
|
+
@aws_profile_aware_lru_cache(scope='request',
|
|
972
|
+
maxsize=_AWS_PROFILE_SCOPED_FUNC_CACHE_SIZE)
|
|
719
973
|
def _aws_configure_list(cls) -> Optional[bytes]:
|
|
720
|
-
|
|
974
|
+
cmd = 'aws configure list'
|
|
975
|
+
# Profile takes precedence over default configs.
|
|
976
|
+
profile = aws.get_workspace_profile()
|
|
977
|
+
if profile is not None:
|
|
978
|
+
# If profile does not exist, we will get returncode 255.
|
|
979
|
+
cmd += f' --profile {profile}'
|
|
980
|
+
proc = subprocess.run(cmd,
|
|
721
981
|
shell=True,
|
|
722
982
|
check=False,
|
|
723
983
|
stdout=subprocess.PIPE,
|
|
724
|
-
stderr=subprocess.
|
|
984
|
+
stderr=subprocess.DEVNULL)
|
|
725
985
|
if proc.returncode != 0:
|
|
726
986
|
return None
|
|
727
987
|
return proc.stdout
|
|
728
988
|
|
|
729
989
|
@classmethod
|
|
730
|
-
|
|
731
|
-
|
|
990
|
+
# Cache since getting identity is slow.
|
|
991
|
+
@aws_profile_aware_lru_cache(scope='request',
|
|
992
|
+
maxsize=_AWS_PROFILE_SCOPED_FUNC_CACHE_SIZE)
|
|
732
993
|
def _sts_get_caller_identity(cls) -> Optional[List[List[str]]]:
|
|
733
994
|
try:
|
|
734
995
|
sts = aws.client('sts', check_credentials=False)
|
|
@@ -809,8 +1070,9 @@ class AWS(clouds.Cloud):
|
|
|
809
1070
|
return [user_ids]
|
|
810
1071
|
|
|
811
1072
|
@classmethod
|
|
812
|
-
|
|
813
|
-
|
|
1073
|
+
# Cache since getting identity is slow.
|
|
1074
|
+
@aws_profile_aware_lru_cache(scope='request',
|
|
1075
|
+
maxsize=_AWS_PROFILE_SCOPED_FUNC_CACHE_SIZE)
|
|
814
1076
|
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
|
815
1077
|
"""Returns a [UserId, Account] list that uniquely identifies the user.
|
|
816
1078
|
|
|
@@ -859,7 +1121,7 @@ class AWS(clouds.Cloud):
|
|
|
859
1121
|
# `aws configure list` as cache key. Different `aws configure list` output
|
|
860
1122
|
# can have same aws identity, our assumption is the output would be stable
|
|
861
1123
|
# in real world, so the number of cache files would be limited.
|
|
862
|
-
# TODO(aylei): consider using a more stable cache key and
|
|
1124
|
+
# TODO(aylei): consider using a more stable cache key and evaluate eviction.
|
|
863
1125
|
cache_path = catalog_common.get_catalog_path(
|
|
864
1126
|
f'aws/.cache/user-identity-{config_hash}.txt')
|
|
865
1127
|
if os.path.exists(cache_path):
|
|
@@ -905,6 +1167,7 @@ class AWS(clouds.Cloud):
|
|
|
905
1167
|
# provider of the cluster to be launched in this function and make sure
|
|
906
1168
|
# the cluster will not be used for launching clusters in other clouds,
|
|
907
1169
|
# e.g. jobs controller.
|
|
1170
|
+
|
|
908
1171
|
if self._current_identity_type(
|
|
909
1172
|
) != AWSIdentityType.SHARED_CREDENTIALS_FILE:
|
|
910
1173
|
return {}
|
|
@@ -914,14 +1177,15 @@ class AWS(clouds.Cloud):
|
|
|
914
1177
|
if os.path.exists(os.path.expanduser(f'~/.aws/{filename}'))
|
|
915
1178
|
}
|
|
916
1179
|
|
|
917
|
-
@
|
|
1180
|
+
@aws_profile_aware_lru_cache(scope='request',
|
|
1181
|
+
maxsize=_AWS_PROFILE_SCOPED_FUNC_CACHE_SIZE)
|
|
918
1182
|
def can_credential_expire(self) -> bool:
|
|
919
1183
|
identity_type = self._current_identity_type()
|
|
920
1184
|
return (identity_type is not None and
|
|
921
1185
|
identity_type.can_credential_expire())
|
|
922
1186
|
|
|
923
1187
|
def instance_type_exists(self, instance_type):
|
|
924
|
-
return
|
|
1188
|
+
return catalog.instance_type_exists(instance_type, clouds='aws')
|
|
925
1189
|
|
|
926
1190
|
@classmethod
|
|
927
1191
|
def _get_disk_type(cls, disk_tier: resources_utils.DiskTier) -> str:
|
|
@@ -971,12 +1235,13 @@ class AWS(clouds.Cloud):
|
|
|
971
1235
|
botocore.exceptions.ClientError: error in Boto3 client request.
|
|
972
1236
|
"""
|
|
973
1237
|
|
|
1238
|
+
resources = resources.assert_launchable()
|
|
974
1239
|
instance_type = resources.instance_type
|
|
975
1240
|
region = resources.region
|
|
976
1241
|
use_spot = resources.use_spot
|
|
977
1242
|
|
|
978
1243
|
# pylint: disable=import-outside-toplevel,unused-import
|
|
979
|
-
from sky.
|
|
1244
|
+
from sky.catalog import aws_catalog
|
|
980
1245
|
|
|
981
1246
|
quota_code = aws_catalog.get_quota_code(instance_type, use_spot)
|
|
982
1247
|
|
|
@@ -1056,7 +1321,7 @@ class AWS(clouds.Cloud):
|
|
|
1056
1321
|
|
|
1057
1322
|
image_name = f'skypilot-{cluster_name.display_name}-{int(time.time())}'
|
|
1058
1323
|
|
|
1059
|
-
status = provision_lib.query_instances('AWS',
|
|
1324
|
+
status = provision_lib.query_instances('AWS', cluster_name.display_name,
|
|
1060
1325
|
cluster_name.name_on_cloud,
|
|
1061
1326
|
{'region': region})
|
|
1062
1327
|
instance_ids = list(status.keys())
|