skypilot-nightly 1.0.0.dev20250310__tar.gz → 1.0.0.dev20250312__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skypilot_nightly-1.0.0.dev20250310/skypilot_nightly.egg-info → skypilot_nightly-1.0.0.dev20250312}/PKG-INFO +1 -1
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/__init__.py +2 -2
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/nebius.py +11 -1
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/backend_utils.py +38 -15
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/cloud_vm_ray_backend.py +17 -52
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/kubernetes.py +89 -9
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/nebius.py +8 -6
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/kubernetes_catalog.py +3 -2
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/exceptions.py +20 -3
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/network.py +7 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/network_utils.py +3 -2
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/utils.py +22 -15
- skypilot_nightly-1.0.0.dev20250312/sky/server/requests/event_loop.py +31 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/executor.py +50 -22
- skypilot_nightly-1.0.0.dev20250312/sky/server/requests/preconditions.py +174 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/requests.py +42 -3
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/server.py +29 -8
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/stream_utils.py +9 -6
- skypilot_nightly-1.0.0.dev20250312/sky/server/uvicorn.py +81 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/accelerator_registry.py +1 -1
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/subprocess_utils.py +56 -1
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312/skypilot_nightly.egg-info}/PKG-INFO +1 -1
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/SOURCES.txt +3 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/MANIFEST.in +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/README.md +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/pyproject.toml +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/setup.cfg +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/setup.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/aws.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/azure.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/cloudflare.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/cudo.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/do.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/docker.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/gcp.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/ibm.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/kubernetes.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/oci.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/runpod.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/vast.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/vsphere.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/admin_policy.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/authentication.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/backend.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/docker_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/local_docker_backend.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/monkey_patches/monkey_patch_ray_up.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/wheel_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/benchmark/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/benchmark/benchmark_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/benchmark/benchmark_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/check.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/cli.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/cli.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/sdk.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/cloud_stores.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/aws.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/azure.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/cloud.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/cudo.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/do.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/fluidstack.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/gcp.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/ibm.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/lambda_cloud.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/oci.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/paperspace.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/runpod.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/scp.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/aws_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/azure_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/cudo_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_aws.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_azure.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_vast.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/do_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/fluidstack_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/gcp_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/ibm_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/lambda_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/nebius_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/oci_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/paperspace_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/runpod_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/scp_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/vast_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/vsphere_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/aws_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/azure_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/gcp_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/oci_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/scp_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/vast.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/vsphere.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/core.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/dag.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/data_transfer.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/data_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/mounting_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/storage.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/storage_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/execution.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/global_user_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/client/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/client/sdk.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/controller.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/dashboard/dashboard.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/dashboard/static/favicon.ico +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/dashboard/templates/index.html +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/recovery_strategy.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/scheduler.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/core.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/dashboard_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/server.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/state.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/models.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/optimizer.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/azure-config-template.json +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/cudo_machine_type.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/cudo_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/cudo_wrapper.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/docker_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/fluidstack_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/instance_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/mig_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/instance_setup.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/lambda_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/logging.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/metadata_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/query_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/provisioner.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/api/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/api/commands.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/api/pods.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/cls_api_client.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/cls_api_helper.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/custom_script.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/id_generator.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/metadata_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/service_manager.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/service_manager_factory.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/ssl_helper.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/vapiconnect.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/vim_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/vsphere_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/resources.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/autoscalers.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/client/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/client/sdk.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/controller.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/load_balancer.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/load_balancing_policies.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/replica_managers.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/serve_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/serve_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/server/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/server/core.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/server/server.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/service.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/service_spec.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/html/log.html +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/payloads.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/queues/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/queues/mp_queue.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/serializers/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/serializers/decoders.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/serializers/encoders.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/setup_files/MANIFEST.in +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/setup_files/dependencies.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/setup_files/setup.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/sky_logging.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/attempt_skylet.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/autostop_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/configs.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/events.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/job_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/log_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/log_lib.pyi +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/command_runner.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/node_provider.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/vpc_provider.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/scp/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/scp/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/scp/node_provider.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/autoscaler.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/cli.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/command_runner.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/log_monitor.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/resource_demand_scheduler.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/updater.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/worker.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/skylet.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/subprocess_daemon.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skypilot_config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/task.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/aws-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/azure-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/cudo-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/do-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/fluidstack-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/gcp-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/ibm-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/jobs-controller.yaml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-ingress.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-loadbalancer.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-port-forward-proxy-command.sh +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-ssh-jump.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/lambda-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/local-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/nebius-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/oci-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/paperspace-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/runpod-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/scp-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/sky-serve-controller.yaml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/skypilot-server-kubernetes-proxy.sh +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/vast-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/vsphere-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/websocket_proxy.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/usage/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/usage/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/usage/usage_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/admin_policy_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/annotations.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/cli_utils/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/cli_utils/status_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/cluster_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/command_runner.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/command_runner.pyi +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/common_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/config_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/control_master_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/controller_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/dag_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/db_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/env_options.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/create_cluster.sh +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/delete_cluster.sh +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/deploy_remote_cluster.sh +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/exec_kubeconfig_converter.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/generate_kind_config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/generate_kubeconfig.sh +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/gpu_labeler.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/kubernetes_deploy_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/rsync_helper.sh +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes_enums.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/log_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/message_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/registry.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/resources_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/rich_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/schemas.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/status_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/timeline.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/ux_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/validator.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/dependency_links.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/requires.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/top_level.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_api.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_cli.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_config.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_global_user_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_jobs.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_jobs_and_serve.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_list_accelerators.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_optimizer_dryruns.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_optimizer_random_dag.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_serve_autoscaler.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_smoke.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_storage.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_wheels.py +0 -0
- {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_yaml_parser.py +0 -0
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '78a42b6e733bbc29b68efe0e9c79191eaaca9fcd'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250312'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
{skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/nebius.py
RENAMED
@@ -6,9 +6,11 @@ from sky.adaptors import common
|
|
6
6
|
NEBIUS_TENANT_ID_FILENAME = 'NEBIUS_TENANT_ID.txt'
|
7
7
|
NEBIUS_IAM_TOKEN_FILENAME = 'NEBIUS_IAM_TOKEN.txt'
|
8
8
|
NEBIUS_PROJECT_ID_FILENAME = 'NEBIUS_PROJECT_ID.txt'
|
9
|
+
NEBIUS_CREDENTIALS_FILENAME = 'credentials.json'
|
9
10
|
NEBIUS_TENANT_ID_PATH = '~/.nebius/' + NEBIUS_TENANT_ID_FILENAME
|
10
11
|
NEBIUS_IAM_TOKEN_PATH = '~/.nebius/' + NEBIUS_IAM_TOKEN_FILENAME
|
11
12
|
NEBIUS_PROJECT_ID_PATH = '~/.nebius/' + NEBIUS_PROJECT_ID_FILENAME
|
13
|
+
NEBIUS_CREDENTIALS_PATH = '~/.nebius/' + NEBIUS_CREDENTIALS_FILENAME
|
12
14
|
|
13
15
|
MAX_RETRIES_TO_DISK_CREATE = 120
|
14
16
|
MAX_RETRIES_TO_INSTANCE_STOP = 120
|
@@ -72,6 +74,11 @@ def get_iam_token():
|
|
72
74
|
return _iam_token
|
73
75
|
|
74
76
|
|
77
|
+
def is_token_or_cred_file_exist():
|
78
|
+
return (os.path.exists(os.path.expanduser(NEBIUS_IAM_TOKEN_PATH)) or
|
79
|
+
os.path.exists(os.path.expanduser(NEBIUS_CREDENTIALS_PATH)))
|
80
|
+
|
81
|
+
|
75
82
|
def get_project_id():
|
76
83
|
global _project_id
|
77
84
|
if _project_id is None:
|
@@ -97,4 +104,7 @@ def get_tenant_id():
|
|
97
104
|
|
98
105
|
|
99
106
|
def sdk():
|
100
|
-
|
107
|
+
if get_iam_token() is not None:
|
108
|
+
return nebius.sdk.SDK(credentials=get_iam_token())
|
109
|
+
return nebius.sdk.SDK(
|
110
|
+
credentials_file_name=os.path.expanduser(NEBIUS_CREDENTIALS_PATH))
|
@@ -1802,6 +1802,21 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
1802
1802
|
status == status_lib.ClusterStatus.UP for status in node_statuses) and
|
1803
1803
|
len(node_statuses) == handle.launched_nodes)
|
1804
1804
|
|
1805
|
+
def get_node_counts_from_ray_status(
|
1806
|
+
runner: command_runner.CommandRunner) -> Tuple[int, int, str, str]:
|
1807
|
+
rc, output, stderr = runner.run(
|
1808
|
+
instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND,
|
1809
|
+
stream_logs=False,
|
1810
|
+
require_outputs=True,
|
1811
|
+
separate_stderr=True)
|
1812
|
+
if rc:
|
1813
|
+
raise RuntimeError(
|
1814
|
+
f'Refreshing status ({cluster_name!r}): Failed to check '
|
1815
|
+
f'ray cluster\'s healthiness with '
|
1816
|
+
f'{instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND}.\n'
|
1817
|
+
f'-- stdout --\n{output}\n-- stderr --\n{stderr}')
|
1818
|
+
return (*_count_healthy_nodes_from_ray(output), output, stderr)
|
1819
|
+
|
1805
1820
|
def run_ray_status_to_check_ray_cluster_healthy() -> bool:
|
1806
1821
|
try:
|
1807
1822
|
# NOTE: fetching the IPs is very slow as it calls into
|
@@ -1822,26 +1837,34 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
1822
1837
|
raise exceptions.FetchClusterInfoError(
|
1823
1838
|
reason=exceptions.FetchClusterInfoError.Reason.HEAD)
|
1824
1839
|
head_runner = runners[0]
|
1825
|
-
rc, output, stderr = head_runner.run(
|
1826
|
-
instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND,
|
1827
|
-
stream_logs=False,
|
1828
|
-
require_outputs=True,
|
1829
|
-
separate_stderr=True)
|
1830
|
-
if rc:
|
1831
|
-
raise RuntimeError(
|
1832
|
-
f'Refreshing status ({cluster_name!r}): Failed to check '
|
1833
|
-
f'ray cluster\'s healthiness with '
|
1834
|
-
f'{instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND}.\n'
|
1835
|
-
f'-- stdout --\n{output}\n-- stderr --\n{stderr}')
|
1836
1840
|
|
1837
|
-
ready_head, ready_workers = _count_healthy_nodes_from_ray(output)
|
1838
1841
|
total_nodes = handle.launched_nodes * handle.num_ips_per_node
|
1839
|
-
|
1840
|
-
|
1842
|
+
|
1843
|
+
for i in range(5):
|
1844
|
+
ready_head, ready_workers, output, stderr = (
|
1845
|
+
get_node_counts_from_ray_status(head_runner))
|
1846
|
+
if ready_head + ready_workers == total_nodes:
|
1847
|
+
return True
|
1848
|
+
logger.debug(f'Refreshing status ({cluster_name!r}) attempt '
|
1849
|
+
f'{i}: ray status not showing all nodes '
|
1850
|
+
f'({ready_head + ready_workers}/{total_nodes});\n'
|
1851
|
+
f'output:\n{output}\nstderr:\n{stderr}')
|
1852
|
+
|
1853
|
+
# If cluster JUST started, maybe not all the nodes have shown
|
1854
|
+
# up. Try again for a few seconds.
|
1855
|
+
# Note: We are okay with this performance hit because it's very
|
1856
|
+
# rare to normally hit this case. It requires:
|
1857
|
+
# - All the instances in the cluster are up on the cloud side
|
1858
|
+
# (not preempted), but
|
1859
|
+
# - The ray cluster is somehow degraded so not all instances are
|
1860
|
+
# showing up
|
1861
|
+
time.sleep(1)
|
1862
|
+
|
1841
1863
|
raise RuntimeError(
|
1842
1864
|
f'Refreshing status ({cluster_name!r}): ray status not showing '
|
1843
1865
|
f'all nodes ({ready_head + ready_workers}/'
|
1844
|
-
f'{total_nodes})
|
1866
|
+
f'{total_nodes});\noutput:\n{output}\nstderr:\n{stderr}')
|
1867
|
+
|
1845
1868
|
except exceptions.FetchClusterInfoError:
|
1846
1869
|
logger.debug(
|
1847
1870
|
f'Refreshing status ({cluster_name!r}) failed to get IPs.')
|
@@ -772,32 +772,6 @@ class FailoverCloudErrorHandlerV1:
|
|
772
772
|
setattr(e, 'detailed_reason', detailed_reason)
|
773
773
|
raise e
|
774
774
|
|
775
|
-
@staticmethod
|
776
|
-
def _lambda_handler(blocked_resources: Set['resources_lib.Resources'],
|
777
|
-
launchable_resources: 'resources_lib.Resources',
|
778
|
-
region: 'clouds.Region',
|
779
|
-
zones: Optional[List['clouds.Zone']], stdout: str,
|
780
|
-
stderr: str):
|
781
|
-
del region, zones # Unused.
|
782
|
-
errors = FailoverCloudErrorHandlerV1._handle_errors(
|
783
|
-
stdout,
|
784
|
-
stderr,
|
785
|
-
is_error_str_known=lambda x: 'LambdaCloudError:' in x.strip())
|
786
|
-
messages = '\n '.join(errors)
|
787
|
-
style = colorama.Style
|
788
|
-
logger.warning(f' {style.DIM}{messages}{style.RESET_ALL}')
|
789
|
-
_add_to_blocked_resources(blocked_resources,
|
790
|
-
launchable_resources.copy(zone=None))
|
791
|
-
|
792
|
-
# Sometimes, LambdaCloudError will list available regions.
|
793
|
-
for e in errors:
|
794
|
-
if e.find('Regions with capacity available:') != -1:
|
795
|
-
for r in service_catalog.regions('lambda'):
|
796
|
-
if e.find(r.name) == -1:
|
797
|
-
_add_to_blocked_resources(
|
798
|
-
blocked_resources,
|
799
|
-
launchable_resources.copy(region=r.name, zone=None))
|
800
|
-
|
801
775
|
@staticmethod
|
802
776
|
def _scp_handler(blocked_resources: Set['resources_lib.Resources'],
|
803
777
|
launchable_resources: 'resources_lib.Resources',
|
@@ -846,32 +820,6 @@ class FailoverCloudErrorHandlerV1:
|
|
846
820
|
_add_to_blocked_resources(blocked_resources,
|
847
821
|
launchable_resources.copy(zone=zone.name))
|
848
822
|
|
849
|
-
# Apr, 2023 by Hysun(hysun.he@oracle.com): Added support for OCI
|
850
|
-
@staticmethod
|
851
|
-
def _oci_handler(blocked_resources: Set['resources_lib.Resources'],
|
852
|
-
launchable_resources: 'resources_lib.Resources',
|
853
|
-
region: 'clouds.Region',
|
854
|
-
zones: Optional[List['clouds.Zone']], stdout: str,
|
855
|
-
stderr: str):
|
856
|
-
known_service_errors = [
|
857
|
-
'NotAuthorizedOrNotFound', 'CannotParseRequest', 'InternalError',
|
858
|
-
'LimitExceeded', 'NotAuthenticated'
|
859
|
-
]
|
860
|
-
errors = FailoverCloudErrorHandlerV1._handle_errors(
|
861
|
-
stdout, stderr, lambda x: 'VcnSubnetNotFound' in x.strip() or
|
862
|
-
('oci.exceptions.ServiceError' in x.strip() and any(
|
863
|
-
known_err in x.strip() for known_err in known_service_errors)))
|
864
|
-
logger.warning(f'Got error(s) in {region.name}:')
|
865
|
-
messages = '\n\t'.join(errors)
|
866
|
-
style = colorama.Style
|
867
|
-
logger.warning(f'{style.DIM}\t{messages}{style.RESET_ALL}')
|
868
|
-
|
869
|
-
if zones is not None:
|
870
|
-
for zone in zones:
|
871
|
-
_add_to_blocked_resources(
|
872
|
-
blocked_resources,
|
873
|
-
launchable_resources.copy(zone=zone.name))
|
874
|
-
|
875
823
|
@staticmethod
|
876
824
|
def update_blocklist_on_error(
|
877
825
|
blocked_resources: Set['resources_lib.Resources'],
|
@@ -1123,6 +1071,23 @@ class FailoverCloudErrorHandlerV2:
|
|
1123
1071
|
blocked_resources,
|
1124
1072
|
launchable_resources.copy(zone=zone.name))
|
1125
1073
|
|
1074
|
+
@staticmethod
|
1075
|
+
def _lambda_handler(blocked_resources: Set['resources_lib.Resources'],
|
1076
|
+
launchable_resources: 'resources_lib.Resources',
|
1077
|
+
region: 'clouds.Region',
|
1078
|
+
zones: Optional[List['clouds.Zone']], error: Exception):
|
1079
|
+
output = str(error)
|
1080
|
+
# Sometimes, lambda cloud error will list available regions.
|
1081
|
+
if output.find('Regions with capacity available:') != -1:
|
1082
|
+
for r in service_catalog.regions('lambda'):
|
1083
|
+
if output.find(r.name) == -1:
|
1084
|
+
_add_to_blocked_resources(
|
1085
|
+
blocked_resources,
|
1086
|
+
launchable_resources.copy(region=r.name, zone=None))
|
1087
|
+
else:
|
1088
|
+
FailoverCloudErrorHandlerV2._default_handler(
|
1089
|
+
blocked_resources, launchable_resources, region, zones, error)
|
1090
|
+
|
1126
1091
|
@staticmethod
|
1127
1092
|
def _default_handler(blocked_resources: Set['resources_lib.Resources'],
|
1128
1093
|
launchable_resources: 'resources_lib.Resources',
|
{skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/kubernetes.py
RENAMED
@@ -2,9 +2,10 @@
|
|
2
2
|
import os
|
3
3
|
import re
|
4
4
|
import typing
|
5
|
-
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
5
|
+
from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
|
6
6
|
|
7
7
|
from sky import clouds
|
8
|
+
from sky import exceptions
|
8
9
|
from sky import sky_logging
|
9
10
|
from sky import skypilot_config
|
10
11
|
from sky.adaptors import kubernetes
|
@@ -78,6 +79,11 @@ class Kubernetes(clouds.Cloud):
|
|
78
79
|
PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
|
79
80
|
STATUS_VERSION = clouds.StatusVersion.SKYPILOT
|
80
81
|
|
82
|
+
_INDENT_PREFIX = ' ' * 4
|
83
|
+
|
84
|
+
# Set of contexts that has logged as temporarily unreachable
|
85
|
+
logged_unreachable_contexts: Set[str] = set()
|
86
|
+
|
81
87
|
@property
|
82
88
|
def ssh_key_secret_field_name(self):
|
83
89
|
# Use a fresh user hash to avoid conflicts in the secret object naming.
|
@@ -90,6 +96,8 @@ class Kubernetes(clouds.Cloud):
|
|
90
96
|
def _unsupported_features_for_resources(
|
91
97
|
cls, resources: 'resources_lib.Resources'
|
92
98
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
99
|
+
# TODO(aylei): features need to be regional (per context) to make
|
100
|
+
# multi-kubernetes selection/failover work.
|
93
101
|
unsupported_features = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
|
94
102
|
context = resources.region
|
95
103
|
if context is None:
|
@@ -106,10 +114,13 @@ class Kubernetes(clouds.Cloud):
|
|
106
114
|
unsupported_features[
|
107
115
|
clouds.CloudImplementationFeatures.AUTO_TERMINATE] = message
|
108
116
|
# Allow spot instances if supported by the cluster
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
117
|
+
try:
|
118
|
+
spot_label_key, _ = kubernetes_utils.get_spot_label(context)
|
119
|
+
if spot_label_key is not None:
|
120
|
+
unsupported_features.pop(
|
121
|
+
clouds.CloudImplementationFeatures.SPOT_INSTANCE, None)
|
122
|
+
except exceptions.KubeAPIUnreachableError as e:
|
123
|
+
cls._log_unreachable_context(context, str(e))
|
113
124
|
return unsupported_features
|
114
125
|
|
115
126
|
@classmethod
|
@@ -170,6 +181,36 @@ class Kubernetes(clouds.Cloud):
|
|
170
181
|
cls._log_skipped_contexts_once(tuple(skipped_contexts))
|
171
182
|
return existing_contexts
|
172
183
|
|
184
|
+
@classmethod
|
185
|
+
def _log_unreachable_context(cls,
|
186
|
+
context: str,
|
187
|
+
reason: Optional[str] = None) -> None:
|
188
|
+
"""Logs a Kubernetes context as unreachable.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
context: The Kubernetes context to mark as unreachable.
|
192
|
+
reason: Optional reason for marking the context as unreachable.
|
193
|
+
silent: Whether to suppress the log message.
|
194
|
+
"""
|
195
|
+
# Skip if this context has already been logged as unreachable
|
196
|
+
if context in cls.logged_unreachable_contexts:
|
197
|
+
return
|
198
|
+
|
199
|
+
cls.logged_unreachable_contexts.add(context)
|
200
|
+
msg = f'Excluding Kubernetes context {context}'
|
201
|
+
if reason is not None:
|
202
|
+
msg += f': {reason}'
|
203
|
+
logger.info(msg)
|
204
|
+
|
205
|
+
# Check if all existing allowed contexts are now unreachable
|
206
|
+
existing_contexts = cls.existing_allowed_contexts()
|
207
|
+
if existing_contexts and all(ctx in cls.logged_unreachable_contexts
|
208
|
+
for ctx in existing_contexts):
|
209
|
+
logger.warning(
|
210
|
+
'All Kubernetes contexts are unreachable. '
|
211
|
+
'Retry if it is a transient error, or run sky check to '
|
212
|
+
'refresh Kubernetes availability if permanent.')
|
213
|
+
|
173
214
|
@classmethod
|
174
215
|
def regions_with_offering(cls, instance_type: Optional[str],
|
175
216
|
accelerators: Optional[Dict[str, int]],
|
@@ -198,8 +239,12 @@ class Kubernetes(clouds.Cloud):
|
|
198
239
|
# provision_timeout, after which failover will be triggered.
|
199
240
|
for r in regions:
|
200
241
|
context = r.name
|
201
|
-
|
202
|
-
|
242
|
+
try:
|
243
|
+
fits, reason = kubernetes_utils.check_instance_fits(
|
244
|
+
context, instance_type)
|
245
|
+
except exceptions.KubeAPIUnreachableError as e:
|
246
|
+
cls._log_unreachable_context(context, str(e))
|
247
|
+
continue
|
203
248
|
if fits:
|
204
249
|
regions_to_return.append(r)
|
205
250
|
else:
|
@@ -609,18 +654,53 @@ class Kubernetes(clouds.Cloud):
|
|
609
654
|
'Check if you have a valid kubeconfig file' +
|
610
655
|
check_skypilot_config_msg)
|
611
656
|
reasons = []
|
657
|
+
hints = []
|
658
|
+
success = False
|
612
659
|
for context in existing_allowed_contexts:
|
613
660
|
try:
|
614
661
|
check_result = kubernetes_utils.check_credentials(context)
|
615
662
|
if check_result[0]:
|
616
|
-
|
617
|
-
|
663
|
+
success = True
|
664
|
+
if check_result[1] is not None:
|
665
|
+
hints.append(f'Context {context}: {check_result[1]}')
|
666
|
+
else:
|
667
|
+
reasons.append(f'Context {context}: {check_result[1]}')
|
618
668
|
except Exception as e: # pylint: disable=broad-except
|
619
669
|
return (False, f'Credential check failed for {context}: '
|
620
670
|
f'{common_utils.format_exception(e)}')
|
671
|
+
if success:
|
672
|
+
return (True, cls._format_credential_check_results(hints, reasons))
|
621
673
|
return (False, 'Failed to find available context with working '
|
622
674
|
'credentials. Details:\n' + '\n'.join(reasons))
|
623
675
|
|
676
|
+
@classmethod
|
677
|
+
def _format_credential_check_results(cls, hints: List[str],
|
678
|
+
reasons: List[str]) -> str:
|
679
|
+
"""Format credential check results with hints and reasons.
|
680
|
+
|
681
|
+
Args:
|
682
|
+
hints: List of successful context check messages.
|
683
|
+
reasons: List of failed context check reasons.
|
684
|
+
|
685
|
+
Returns:
|
686
|
+
A formatted string containing hints and by failure reasons.
|
687
|
+
"""
|
688
|
+
message_parts = []
|
689
|
+
if len(hints) == 1 and not reasons:
|
690
|
+
return hints[0]
|
691
|
+
if hints:
|
692
|
+
message_parts.append(f'\n{cls._INDENT_PREFIX} ' +
|
693
|
+
f'\n{cls._INDENT_PREFIX} '.join(hints))
|
694
|
+
if reasons:
|
695
|
+
if hints:
|
696
|
+
message_parts.append('\n')
|
697
|
+
message_parts.append(
|
698
|
+
f'\n{cls._INDENT_PREFIX}Unavailable contexts (remove from '
|
699
|
+
'"allowed_contexts" config if permanently unavailable): '
|
700
|
+
f'\n{cls._INDENT_PREFIX} ' +
|
701
|
+
f'\n{cls._INDENT_PREFIX} '.join(reasons))
|
702
|
+
return ''.join(message_parts)
|
703
|
+
|
624
704
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
625
705
|
if os.path.exists(os.path.expanduser(CREDENTIAL_PATH)):
|
626
706
|
# Upload kubeconfig to the default path to avoid having to set
|
{skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/nebius.py
RENAMED
@@ -17,6 +17,7 @@ _CREDENTIAL_FILES = [
|
|
17
17
|
nebius.NEBIUS_TENANT_ID_FILENAME,
|
18
18
|
nebius.NEBIUS_IAM_TOKEN_FILENAME,
|
19
19
|
nebius.NEBIUS_PROJECT_ID_FILENAME,
|
20
|
+
nebius.NEBIUS_CREDENTIALS_FILENAME
|
20
21
|
]
|
21
22
|
|
22
23
|
|
@@ -252,15 +253,16 @@ class Nebius(clouds.Cloud):
|
|
252
253
|
def check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
253
254
|
""" Verify that the user has valid credentials for Nebius. """
|
254
255
|
logging.debug('Nebius cloud check credentials')
|
255
|
-
|
256
|
-
|
257
|
-
|
256
|
+
token_cred_msg = (' Credentials can be set up by running: \n'\
|
257
|
+
f' $ nebius iam get-access-token > {nebius.NEBIUS_IAM_TOKEN_PATH} \n'\
|
258
|
+
' or generate ~/.nebius/credentials.json') # pylint: disable=line-too-long
|
259
|
+
|
258
260
|
tenant_msg = (' Copy your tenat ID from the web console and save it to file \n' # pylint: disable=line-too-long
|
259
261
|
f' $ echo $NEBIUS_TENANT_ID_PATH > {nebius.NEBIUS_TENANT_ID_PATH} \n' # pylint: disable=line-too-long
|
260
262
|
' Or if you have 1 tenant you can run:\n' # pylint: disable=line-too-long
|
261
263
|
f' $ nebius --format json iam whoami|jq -r \'.user_profile.tenants[0].tenant_id\' > {nebius.NEBIUS_TENANT_ID_PATH} \n') # pylint: disable=line-too-long
|
262
|
-
if
|
263
|
-
return False, f'{
|
264
|
+
if not nebius.is_token_or_cred_file_exist():
|
265
|
+
return False, f'{token_cred_msg}'
|
264
266
|
sdk = nebius.sdk()
|
265
267
|
tenant_id = nebius.get_tenant_id()
|
266
268
|
if tenant_id is None:
|
@@ -272,7 +274,7 @@ class Nebius(clouds.Cloud):
|
|
272
274
|
except nebius.request_error() as e:
|
273
275
|
return False, (
|
274
276
|
f'{e.status} \n' # First line is indented by 4 spaces
|
275
|
-
f'{
|
277
|
+
f'{token_cred_msg}'
|
276
278
|
f'{tenant_msg}')
|
277
279
|
return True, None
|
278
280
|
|
@@ -164,12 +164,13 @@ def _list_accelerators(
|
|
164
164
|
|
165
165
|
accelerators_qtys: Set[Tuple[str, int]] = set()
|
166
166
|
keys = lf.get_label_keys()
|
167
|
-
nodes = kubernetes_utils.get_kubernetes_nodes(context)
|
167
|
+
nodes = kubernetes_utils.get_kubernetes_nodes(context=context)
|
168
168
|
pods = None
|
169
169
|
if realtime:
|
170
170
|
# Get the pods to get the real-time GPU usage
|
171
171
|
try:
|
172
|
-
pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(
|
172
|
+
pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(
|
173
|
+
context=context)
|
173
174
|
except kubernetes.api_exception() as e:
|
174
175
|
if e.status == 403:
|
175
176
|
logger.warning(
|
@@ -28,12 +28,19 @@ GIT_FATAL_EXIT_CODE = 128
|
|
28
28
|
ARCH_NOT_SUPPORTED_EXIT_CODE = 133
|
29
29
|
|
30
30
|
|
31
|
-
def is_safe_exception(exc:
|
31
|
+
def is_safe_exception(exc: BaseException) -> bool:
|
32
32
|
"""Returns True if the exception is safe to send to clients.
|
33
33
|
|
34
34
|
Safe exceptions are:
|
35
35
|
1. Built-in exceptions
|
36
36
|
2. SkyPilot's own exceptions
|
37
|
+
|
38
|
+
Args:
|
39
|
+
exc: The exception to check, accept BaseException to handle SystemExit
|
40
|
+
and KeyboardInterrupt.
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
True if the exception is safe to send to clients, False otherwise.
|
37
44
|
"""
|
38
45
|
module = type(exc).__module__
|
39
46
|
|
@@ -48,7 +55,7 @@ def is_safe_exception(exc: Exception) -> bool:
|
|
48
55
|
return False
|
49
56
|
|
50
57
|
|
51
|
-
def wrap_exception(exc:
|
58
|
+
def wrap_exception(exc: BaseException) -> BaseException:
|
52
59
|
"""Wraps non-safe exceptions into SkyPilot exceptions
|
53
60
|
|
54
61
|
This is used to wrap exceptions that are not safe to deserialize at clients.
|
@@ -64,7 +71,8 @@ def wrap_exception(exc: Exception) -> Exception:
|
|
64
71
|
error_type=type(exc).__name__)
|
65
72
|
|
66
73
|
|
67
|
-
|
74
|
+
# Accept BaseException to handle SystemExit and KeyboardInterrupt
|
75
|
+
def serialize_exception(e: BaseException) -> Dict[str, Any]:
|
68
76
|
"""Serialize the exception.
|
69
77
|
|
70
78
|
This function also wraps any unsafe exceptions (e.g., cloud exceptions)
|
@@ -156,6 +164,15 @@ class ResourcesUnavailableError(Exception):
|
|
156
164
|
return self
|
157
165
|
|
158
166
|
|
167
|
+
class KubeAPIUnreachableError(ResourcesUnavailableError):
|
168
|
+
"""Raised when the Kubernetes API is currently unreachable.
|
169
|
+
|
170
|
+
This is a subclass of ResourcesUnavailableError to trigger same failover
|
171
|
+
behavior as other ResourcesUnavailableError.
|
172
|
+
"""
|
173
|
+
pass
|
174
|
+
|
175
|
+
|
159
176
|
class InvalidCloudConfigs(Exception):
|
160
177
|
"""Raised when invalid configurations are provided for a given cloud."""
|
161
178
|
pass
|
@@ -157,7 +157,11 @@ def _cleanup_ports_for_loadbalancer(
|
|
157
157
|
) -> None:
|
158
158
|
service_name = _LOADBALANCER_SERVICE_NAME.format(
|
159
159
|
cluster_name_on_cloud=cluster_name_on_cloud)
|
160
|
+
# TODO(aylei): test coverage
|
161
|
+
context = provider_config.get(
|
162
|
+
'context', kubernetes_utils.get_current_kube_config_context_name())
|
160
163
|
network_utils.delete_namespaced_service(
|
164
|
+
context=context,
|
161
165
|
namespace=provider_config.get('namespace', 'default'),
|
162
166
|
service_name=service_name,
|
163
167
|
)
|
@@ -169,9 +173,12 @@ def _cleanup_ports_for_ingress(
|
|
169
173
|
provider_config: Dict[str, Any],
|
170
174
|
) -> None:
|
171
175
|
# Delete services for each port
|
176
|
+
context = provider_config.get(
|
177
|
+
'context', kubernetes_utils.get_current_kube_config_context_name())
|
172
178
|
for port in ports:
|
173
179
|
service_name = f'{cluster_name_on_cloud}--skypilot-svc--{port}'
|
174
180
|
network_utils.delete_namespaced_service(
|
181
|
+
context=context,
|
175
182
|
namespace=provider_config.get('namespace',
|
176
183
|
kubernetes_utils.DEFAULT_NAMESPACE),
|
177
184
|
service_name=service_name,
|
@@ -194,9 +194,10 @@ def create_or_replace_namespaced_service(
|
|
194
194
|
_request_timeout=kubernetes.API_TIMEOUT)
|
195
195
|
|
196
196
|
|
197
|
-
def delete_namespaced_service(
|
197
|
+
def delete_namespaced_service(context: Optional[str], namespace: str,
|
198
|
+
service_name: str) -> None:
|
198
199
|
"""Deletes a service resource."""
|
199
|
-
core_api = kubernetes.core_api()
|
200
|
+
core_api = kubernetes.core_api(context)
|
200
201
|
|
201
202
|
try:
|
202
203
|
core_api.delete_namespaced_service(
|
@@ -125,6 +125,10 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
|
|
125
125
|
retry_interval: Initial seconds to wait between retries
|
126
126
|
resource_type: Type of resource being accessed (e.g. 'node', 'pod').
|
127
127
|
Used to provide more specific error messages.
|
128
|
+
|
129
|
+
Raises:
|
130
|
+
KubeAPIUnreachableError: If the API server of the given context is
|
131
|
+
unreachable.
|
128
132
|
"""
|
129
133
|
|
130
134
|
def decorator(func):
|
@@ -135,6 +139,9 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
|
|
135
139
|
backoff = common_utils.Backoff(initial_backoff=retry_interval,
|
136
140
|
max_backoff_factor=3)
|
137
141
|
|
142
|
+
assert 'context' in kwargs, 'context is required'
|
143
|
+
context = kwargs.get('context')
|
144
|
+
|
138
145
|
for attempt in range(max_retries):
|
139
146
|
try:
|
140
147
|
return func(*args, **kwargs)
|
@@ -160,6 +167,8 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
|
|
160
167
|
if resource_type else ''
|
161
168
|
debug_cmd = f' To debug, run: kubectl get {resource_type}s' \
|
162
169
|
if resource_type else ''
|
170
|
+
if context:
|
171
|
+
debug_cmd += f' --context {context}'
|
163
172
|
|
164
173
|
if isinstance(last_exception, kubernetes.max_retry_error()):
|
165
174
|
error_msg = f'Timed out{resource_msg} from Kubernetes cluster.'
|
@@ -170,7 +179,7 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
|
|
170
179
|
error_msg = (f'Kubernetes configuration error{resource_msg}: '
|
171
180
|
f'{str(last_exception)}')
|
172
181
|
|
173
|
-
raise exceptions.
|
182
|
+
raise exceptions.KubeAPIUnreachableError(
|
174
183
|
f'{error_msg}'
|
175
184
|
f' Please check if the cluster is healthy and retry.'
|
176
185
|
f'{debug_cmd}') from last_exception
|
@@ -529,7 +538,7 @@ def detect_gpu_label_formatter(
|
|
529
538
|
"""
|
530
539
|
# Get all labels across all nodes
|
531
540
|
node_labels: Dict[str, List[Tuple[str, str]]] = {}
|
532
|
-
nodes = get_kubernetes_nodes(context)
|
541
|
+
nodes = get_kubernetes_nodes(context=context)
|
533
542
|
for node in nodes:
|
534
543
|
node_labels[node.metadata.name] = []
|
535
544
|
for label, value in node.metadata.labels.items():
|
@@ -564,7 +573,7 @@ def detect_accelerator_resource(
|
|
564
573
|
"""
|
565
574
|
# Get the set of resources across all nodes
|
566
575
|
cluster_resources: Set[str] = set()
|
567
|
-
nodes = get_kubernetes_nodes(context)
|
576
|
+
nodes = get_kubernetes_nodes(context=context)
|
568
577
|
for node in nodes:
|
569
578
|
cluster_resources.update(node.status.allocatable.keys())
|
570
579
|
has_accelerator = (get_gpu_resource_key() in cluster_resources or
|
@@ -575,7 +584,7 @@ def detect_accelerator_resource(
|
|
575
584
|
|
576
585
|
@annotations.lru_cache(scope='request', maxsize=10)
|
577
586
|
@_retry_on_error(resource_type='node')
|
578
|
-
def get_kubernetes_nodes(context: Optional[str] = None) -> List[Any]:
|
587
|
+
def get_kubernetes_nodes(*, context: Optional[str] = None) -> List[Any]:
|
579
588
|
"""Gets the kubernetes nodes in the context.
|
580
589
|
|
581
590
|
If context is None, gets the nodes in the current context.
|
@@ -589,8 +598,9 @@ def get_kubernetes_nodes(context: Optional[str] = None) -> List[Any]:
|
|
589
598
|
|
590
599
|
|
591
600
|
@_retry_on_error(resource_type='pod')
|
592
|
-
def get_all_pods_in_kubernetes_cluster(
|
593
|
-
|
601
|
+
def get_all_pods_in_kubernetes_cluster(*,
|
602
|
+
context: Optional[str] = None
|
603
|
+
) -> List[Any]:
|
594
604
|
"""Gets pods in all namespaces in kubernetes cluster indicated by context.
|
595
605
|
|
596
606
|
Used for computing cluster resource usage.
|
@@ -619,9 +629,6 @@ def check_instance_fits(context: Optional[str],
|
|
619
629
|
Optional[str]: Error message if the instance does not fit.
|
620
630
|
"""
|
621
631
|
|
622
|
-
# TODO(zhwu): this should check the node for specific context, instead
|
623
|
-
# of the default context to make failover fully functional.
|
624
|
-
|
625
632
|
def check_cpu_mem_fits(candidate_instance_type: 'KubernetesInstanceType',
|
626
633
|
node_list: List[Any]) -> Tuple[bool, Optional[str]]:
|
627
634
|
"""Checks if the instance fits on the cluster based on CPU and memory.
|
@@ -682,7 +689,7 @@ def check_instance_fits(context: Optional[str],
|
|
682
689
|
f'{tpu_list_in_cluster_str}. Note that multi-host TPU '
|
683
690
|
'podslices are currently not unsupported.')
|
684
691
|
|
685
|
-
nodes = get_kubernetes_nodes(context)
|
692
|
+
nodes = get_kubernetes_nodes(context=context)
|
686
693
|
k8s_instance_type = KubernetesInstanceType.\
|
687
694
|
from_instance_type(instance)
|
688
695
|
acc_type = k8s_instance_type.accelerator_type
|
@@ -846,7 +853,7 @@ def get_accelerator_label_key_value(
|
|
846
853
|
for label, value in label_list:
|
847
854
|
if (label_formatter.match_label_key(label) and
|
848
855
|
label_formatter.get_accelerator_from_label_value(
|
849
|
-
value) == acc_type):
|
856
|
+
value).lower() == acc_type.lower()):
|
850
857
|
if is_tpu_on_gke(acc_type):
|
851
858
|
assert isinstance(label_formatter,
|
852
859
|
GKELabelFormatter)
|
@@ -2083,7 +2090,7 @@ def get_spot_label(
|
|
2083
2090
|
"""
|
2084
2091
|
# Check if the cluster supports spot instances by checking nodes for known
|
2085
2092
|
# spot label keys and values
|
2086
|
-
for node in get_kubernetes_nodes(context):
|
2093
|
+
for node in get_kubernetes_nodes(context=context):
|
2087
2094
|
for _, (key, value) in SPOT_LABEL_MAP.items():
|
2088
2095
|
if key in node.metadata.labels and node.metadata.labels[
|
2089
2096
|
key] == value:
|
@@ -2133,10 +2140,10 @@ def get_kubernetes_node_info(
|
|
2133
2140
|
Dict[str, KubernetesNodeInfo]: Dictionary containing the node name as
|
2134
2141
|
key and the KubernetesNodeInfo object as value
|
2135
2142
|
"""
|
2136
|
-
nodes = get_kubernetes_nodes(context)
|
2143
|
+
nodes = get_kubernetes_nodes(context=context)
|
2137
2144
|
# Get the pods to get the real-time resource usage
|
2138
2145
|
try:
|
2139
|
-
pods = get_all_pods_in_kubernetes_cluster(context)
|
2146
|
+
pods = get_all_pods_in_kubernetes_cluster(context=context)
|
2140
2147
|
except kubernetes.api_exception() as e:
|
2141
2148
|
if e.status == 403:
|
2142
2149
|
pods = None
|
@@ -2443,7 +2450,7 @@ def is_multi_host_tpu(node_metadata_labels: dict) -> bool:
|
|
2443
2450
|
|
2444
2451
|
def multi_host_tpu_exists_in_cluster(context: Optional[str] = None) -> bool:
|
2445
2452
|
"""Checks if there exists a multi-host TPU within the cluster."""
|
2446
|
-
nodes = get_kubernetes_nodes(context)
|
2453
|
+
nodes = get_kubernetes_nodes(context=context)
|
2447
2454
|
for node in nodes:
|
2448
2455
|
if is_multi_host_tpu(node.metadata.labels):
|
2449
2456
|
return True
|