skypilot-nightly 1.0.0.dev20241110__tar.gz → 1.0.0.dev20241112__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skypilot_nightly-1.0.0.dev20241110/skypilot_nightly.egg-info → skypilot_nightly-1.0.0.dev20241112}/PKG-INFO +2 -2
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/README.md +1 -1
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/__init__.py +2 -2
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/cli.py +7 -3
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/kubernetes_catalog.py +34 -11
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/docker_utils.py +1 -1
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/instance.py +104 -102
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/utils.py +26 -14
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/__init__.py +2 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/load_balancer.py +34 -8
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/load_balancing_policies.py +23 -1
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/service.py +4 -1
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/service_spec.py +19 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/kubernetes-ray.yml.j2 +21 -1
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/generate_kubeconfig.sh +3 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/schemas.py +8 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112/skypilot_nightly.egg-info}/PKG-INFO +2 -2
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_smoke.py +5 -5
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/MANIFEST.in +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/pyproject.toml +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/setup.cfg +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/setup.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/aws.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/azure.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/cloudflare.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/cudo.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/docker.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/gcp.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/ibm.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/kubernetes.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/oci.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/runpod.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/adaptors/vsphere.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/admin_policy.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/authentication.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/backend.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/backend_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/cloud_vm_ray_backend.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/docker_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/local_docker_backend.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/monkey_patches/monkey_patch_ray_up.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/backends/wheel_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/benchmark/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/benchmark/benchmark_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/benchmark/benchmark_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/check.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/cloud_stores.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/aws.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/azure.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/cloud.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/cloud_registry.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/cudo.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/fluidstack.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/gcp.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/ibm.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/kubernetes.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/lambda_cloud.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/oci.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/paperspace.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/runpod.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/scp.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/aws_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/azure_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/cudo_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/fetch_aws.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/fetch_azure.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/fluidstack_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/gcp_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/ibm_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/lambda_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/oci_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/paperspace_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/runpod_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/scp_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/service_catalog/vsphere_catalog.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/utils/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/utils/aws_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/utils/azure_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/utils/gcp_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/utils/oci_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/utils/scp_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/clouds/vsphere.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/core.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/dag.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/data/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/data/data_transfer.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/data/data_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/data/mounting_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/data/storage.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/data/storage_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/exceptions.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/execution.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/global_user_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/controller.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/core.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/dashboard/dashboard.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/dashboard/static/favicon.ico +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/dashboard/templates/index.html +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/recovery_strategy.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/state.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/jobs/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/optimizer.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/aws/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/aws/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/aws/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/aws/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/azure/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/azure/azure-config-template.json +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/azure/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/azure/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/common.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/cudo/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/cudo/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/cudo/cudo_machine_type.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/cudo/cudo_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/cudo/cudo_wrapper.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/cudo/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/fluidstack/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/fluidstack/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/fluidstack/fluidstack_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/fluidstack/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/gcp/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/gcp/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/gcp/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/gcp/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/gcp/instance_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/gcp/mig_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/instance_setup.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/network.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/kubernetes/network_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/lambda_cloud/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/lambda_cloud/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/lambda_cloud/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/lambda_cloud/lambda_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/logging.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/metadata_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/oci/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/oci/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/oci/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/oci/query_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/paperspace/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/paperspace/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/paperspace/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/paperspace/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/paperspace/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/provisioner.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/runpod/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/runpod/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/runpod/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/runpod/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/cls_api_client.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/cls_api_helper.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/custom_script.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/id_generator.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/metadata_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/service_manager.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/service_manager_factory.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/ssl_helper.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/vapiconnect.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/common/vim_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/instance.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/provision/vsphere/vsphere_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/resources.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/autoscalers.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/controller.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/core.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/replica_managers.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/serve_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/serve_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/setup_files/MANIFEST.in +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/setup_files/setup.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/sky_logging.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/attempt_skylet.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/autostop_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/configs.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/events.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/job_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/log_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/log_lib.pyi +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/command_runner.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/ibm/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/ibm/node_provider.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/ibm/utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/ibm/vpc_provider.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/scp/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/scp/config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/providers/scp/node_provider.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/autoscaler.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/cli.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/command_runner.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/log_monitor.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/resource_demand_scheduler.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/updater.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/ray_patches/worker.py.patch +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/skylet.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skylet/subprocess_daemon.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/skypilot_config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/status_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/task.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/aws-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/azure-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/cudo-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/fluidstack-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/gcp-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/ibm-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/jobs-controller.yaml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/kubernetes-ingress.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/kubernetes-loadbalancer.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/kubernetes-port-forward-proxy-command.sh +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/kubernetes-ssh-jump.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/lambda-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/local-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/oci-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/paperspace-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/runpod-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/scp-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/sky-serve-controller.yaml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/templates/vsphere-ray.yml.j2 +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/usage/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/usage/constants.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/usage/usage_lib.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/accelerator_registry.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/admin_policy_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/cli_utils/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/cli_utils/status_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/cluster_yaml_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/command_runner.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/command_runner.pyi +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/common_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/control_master_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/controller_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/dag_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/db_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/env_options.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/__init__.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/create_cluster.sh +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/delete_cluster.sh +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/deploy_remote_cluster.sh +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/generate_kind_config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/gpu_labeler.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/rsync_helper.sh +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/kubernetes_enums.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/log_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/resources_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/rich_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/subprocess_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/timeline.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/ux_utils.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/utils/validator.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/skypilot_nightly.egg-info/SOURCES.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/skypilot_nightly.egg-info/dependency_links.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/skypilot_nightly.egg-info/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/skypilot_nightly.egg-info/requires.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/skypilot_nightly.egg-info/top_level.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_api.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_cli.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_config.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_global_user_state.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_jobs.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_jobs_and_serve.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_list_accelerators.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_optimizer_dryruns.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_optimizer_random_dag.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_serve_autoscaler.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_storage.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_wheels.py +0 -0
- {skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/tests/test_yaml_parser.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skypilot-nightly
|
3
|
-
Version: 1.0.0.
|
3
|
+
Version: 1.0.0.dev20241112
|
4
4
|
Summary: SkyPilot: An intercloud broker for the clouds
|
5
5
|
Author: SkyPilot Team
|
6
6
|
License: Apache 2.0
|
@@ -319,7 +319,7 @@ Runnable examples:
|
|
319
319
|
- [LocalGPT](./llm/localgpt)
|
320
320
|
- [Falcon](./llm/falcon)
|
321
321
|
- Add yours here & see more in [`llm/`](./llm)!
|
322
|
-
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/
|
322
|
+
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/), [programmatic grid search](https://github.com/skypilot-org/skypilot/blob/master/examples/huggingface_glue_imdb_grid_search_app.py), [Docker](https://github.com/skypilot-org/skypilot/blob/master/examples/docker/echo_app.yaml), [Cog](https://github.com/skypilot-org/skypilot/blob/master/examples/cog/), [Unsloth](https://github.com/skypilot-org/skypilot/blob/master/examples/unsloth/unsloth.yaml), [Ollama](https://github.com/skypilot-org/skypilot/blob/master/llm/ollama), [llm.c](https://github.com/skypilot-org/skypilot/tree/master/llm/gpt-2), [Airflow](./examples/airflow/training_workflow) and [many more (`examples/`)](./examples).
|
323
323
|
|
324
324
|
Case Studies and Integrations: [Community Spotlights](https://blog.skypilot.co/community/)
|
325
325
|
|
@@ -183,7 +183,7 @@ Runnable examples:
|
|
183
183
|
- [LocalGPT](./llm/localgpt)
|
184
184
|
- [Falcon](./llm/falcon)
|
185
185
|
- Add yours here & see more in [`llm/`](./llm)!
|
186
|
-
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/
|
186
|
+
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/), [programmatic grid search](https://github.com/skypilot-org/skypilot/blob/master/examples/huggingface_glue_imdb_grid_search_app.py), [Docker](https://github.com/skypilot-org/skypilot/blob/master/examples/docker/echo_app.yaml), [Cog](https://github.com/skypilot-org/skypilot/blob/master/examples/cog/), [Unsloth](https://github.com/skypilot-org/skypilot/blob/master/examples/unsloth/unsloth.yaml), [Ollama](https://github.com/skypilot-org/skypilot/blob/master/llm/ollama), [llm.c](https://github.com/skypilot-org/skypilot/tree/master/llm/gpt-2), [Airflow](./examples/airflow/training_workflow) and [many more (`examples/`)](./examples).
|
187
187
|
|
188
188
|
Case Studies and Integrations: [Community Spotlights](https://blog.skypilot.co/community/)
|
189
189
|
|
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = '140125eaad5fb64da37934c8f6650d68aa135f77'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241112'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
@@ -3102,6 +3102,7 @@ def show_gpus(
|
|
3102
3102
|
kubernetes_autoscaling = kubernetes_utils.get_autoscaler_type() is not None
|
3103
3103
|
kubernetes_is_enabled = sky_clouds.cloud_in_iterable(
|
3104
3104
|
sky_clouds.Kubernetes(), global_user_state.get_cached_enabled_clouds())
|
3105
|
+
no_permissions_str = '<no permissions>'
|
3105
3106
|
|
3106
3107
|
def _list_to_str(lst):
|
3107
3108
|
return ', '.join([str(e) for e in lst])
|
@@ -3146,9 +3147,11 @@ def show_gpus(
|
|
3146
3147
|
debug_msg)
|
3147
3148
|
raise ValueError(full_err_msg)
|
3148
3149
|
for gpu, _ in sorted(counts.items()):
|
3150
|
+
available_qty = available[gpu] if available[gpu] != -1 else (
|
3151
|
+
no_permissions_str)
|
3149
3152
|
realtime_gpu_table.add_row([
|
3150
3153
|
gpu,
|
3151
|
-
_list_to_str(counts.pop(gpu)), capacity[gpu],
|
3154
|
+
_list_to_str(counts.pop(gpu)), capacity[gpu], available_qty
|
3152
3155
|
])
|
3153
3156
|
return realtime_gpu_table
|
3154
3157
|
|
@@ -3158,10 +3161,11 @@ def show_gpus(
|
|
3158
3161
|
|
3159
3162
|
node_info_dict = kubernetes_utils.get_kubernetes_node_info(context)
|
3160
3163
|
for node_name, node_info in node_info_dict.items():
|
3164
|
+
available = node_info.free['nvidia.com/gpu'] if node_info.free[
|
3165
|
+
'nvidia.com/gpu'] != -1 else no_permissions_str
|
3161
3166
|
node_table.add_row([
|
3162
3167
|
node_name, node_info.gpu_type,
|
3163
|
-
node_info.total['nvidia.com/gpu'],
|
3164
|
-
node_info.free['nvidia.com/gpu']
|
3168
|
+
node_info.total['nvidia.com/gpu'], available
|
3165
3169
|
])
|
3166
3170
|
return node_table
|
3167
3171
|
|
@@ -10,6 +10,7 @@ from typing import Dict, List, Optional, Set, Tuple
|
|
10
10
|
from sky import check as sky_check
|
11
11
|
from sky import sky_logging
|
12
12
|
from sky.adaptors import common as adaptors_common
|
13
|
+
from sky.adaptors import kubernetes
|
13
14
|
from sky.clouds import Kubernetes
|
14
15
|
from sky.clouds.service_catalog import CloudFilter
|
15
16
|
from sky.clouds.service_catalog import common
|
@@ -22,6 +23,8 @@ if typing.TYPE_CHECKING:
|
|
22
23
|
else:
|
23
24
|
pd = adaptors_common.LazyImport('pandas')
|
24
25
|
|
26
|
+
logger = sky_logging.init_logger(__name__)
|
27
|
+
|
25
28
|
_PULL_FREQUENCY_HOURS = 7
|
26
29
|
|
27
30
|
# We keep pull_frequency_hours so we can remotely update the default image paths
|
@@ -77,6 +80,11 @@ def list_accelerators_realtime(
|
|
77
80
|
require_price: bool = True
|
78
81
|
) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
|
79
82
|
int]]:
|
83
|
+
"""List accelerators in the Kubernetes cluster.
|
84
|
+
|
85
|
+
If the user does not have sufficient permissions to list pods in all
|
86
|
+
namespaces, the function will return free GPUs as -1.
|
87
|
+
"""
|
80
88
|
# TODO(romilb): This should be refactored to use get_kubernetes_node_info()
|
81
89
|
# function from kubernetes_utils.
|
82
90
|
del all_regions, require_price # Unused.
|
@@ -108,7 +116,17 @@ def list_accelerators_realtime(
|
|
108
116
|
key = label_formatter.get_label_key()
|
109
117
|
nodes = kubernetes_utils.get_kubernetes_nodes(context)
|
110
118
|
# Get the pods to get the real-time GPU usage
|
111
|
-
|
119
|
+
try:
|
120
|
+
pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(context)
|
121
|
+
except kubernetes.api_exception() as e:
|
122
|
+
if e.status == 403:
|
123
|
+
logger.warning('Failed to get pods in the Kubernetes cluster '
|
124
|
+
'(forbidden). Please check if your account has '
|
125
|
+
'necessary permissions to list pods. Realtime GPU '
|
126
|
+
'availability information may be incorrect.')
|
127
|
+
pods = None
|
128
|
+
else:
|
129
|
+
raise
|
112
130
|
# Total number of GPUs in the cluster
|
113
131
|
total_accelerators_capacity: Dict[str, int] = {}
|
114
132
|
# Total number of GPUs currently available in the cluster
|
@@ -141,6 +159,21 @@ def list_accelerators_realtime(
|
|
141
159
|
if accelerator_count not in accelerators_qtys:
|
142
160
|
accelerators_qtys.add((accelerator_name, accelerator_count))
|
143
161
|
|
162
|
+
if accelerator_count >= min_quantity_filter:
|
163
|
+
quantized_count = (min_quantity_filter *
|
164
|
+
(accelerator_count // min_quantity_filter))
|
165
|
+
if accelerator_name not in total_accelerators_capacity:
|
166
|
+
total_accelerators_capacity[
|
167
|
+
accelerator_name] = quantized_count
|
168
|
+
else:
|
169
|
+
total_accelerators_capacity[
|
170
|
+
accelerator_name] += quantized_count
|
171
|
+
|
172
|
+
if pods is None:
|
173
|
+
# If we can't get the pods, we can't get the GPU usage
|
174
|
+
total_accelerators_available[accelerator_name] = -1
|
175
|
+
continue
|
176
|
+
|
144
177
|
for pod in pods:
|
145
178
|
# Get all the pods running on the node
|
146
179
|
if (pod.spec.node_name == node.metadata.name and
|
@@ -155,16 +188,6 @@ def list_accelerators_realtime(
|
|
155
188
|
|
156
189
|
accelerators_available = accelerator_count - allocated_qty
|
157
190
|
|
158
|
-
if accelerator_count >= min_quantity_filter:
|
159
|
-
quantized_count = (min_quantity_filter *
|
160
|
-
(accelerator_count // min_quantity_filter))
|
161
|
-
if accelerator_name not in total_accelerators_capacity:
|
162
|
-
total_accelerators_capacity[
|
163
|
-
accelerator_name] = quantized_count
|
164
|
-
else:
|
165
|
-
total_accelerators_capacity[
|
166
|
-
accelerator_name] += quantized_count
|
167
|
-
|
168
191
|
if accelerator_name not in total_accelerators_available:
|
169
192
|
total_accelerators_available[accelerator_name] = 0
|
170
193
|
if accelerators_available >= min_quantity_filter:
|
@@ -20,7 +20,7 @@ SETUP_ENV_VARS_CMD = (
|
|
20
20
|
'{ if [ $(id -u) -ne 0 ]; then echo "sudo"; else echo ""; fi; } && '
|
21
21
|
'printenv | while IFS=\'=\' read -r key value; do echo "export $key=\\\"$value\\\""; done > ' # pylint: disable=line-too-long
|
22
22
|
'~/container_env_var.sh && '
|
23
|
-
'$(prefix_cmd) mv ~/container_env_var.sh /etc/profile.d/container_env_var.sh'
|
23
|
+
'$(prefix_cmd) mv ~/container_env_var.sh /etc/profile.d/container_env_var.sh;'
|
24
24
|
)
|
25
25
|
|
26
26
|
# Docker daemon may not be ready when the machine is firstly started. The error
|
@@ -333,52 +333,37 @@ def _run_function_with_retries(func: Callable,
|
|
333
333
|
raise
|
334
334
|
|
335
335
|
|
336
|
-
def
|
337
|
-
|
338
|
-
"""Setting environment variables in pods.
|
339
|
-
|
340
|
-
Once all containers are ready, we can exec into them and set env vars.
|
341
|
-
Kubernetes automatically populates containers with critical
|
342
|
-
environment variables, such as those for discovering services running
|
343
|
-
in the cluster and CUDA/nvidia environment variables. We need to
|
344
|
-
make sure these env vars are available in every task and ssh session.
|
345
|
-
This is needed for GPU support and service discovery.
|
346
|
-
See https://github.com/skypilot-org/skypilot/issues/2287 for
|
347
|
-
more details.
|
348
|
-
|
349
|
-
To do so, we capture env vars from the pod's runtime and write them to
|
350
|
-
/etc/profile.d/, making them available for all users in future
|
351
|
-
shell sessions.
|
352
|
-
"""
|
353
|
-
set_k8s_env_var_cmd = docker_utils.SETUP_ENV_VARS_CMD
|
336
|
+
def pre_init(namespace: str, context: Optional[str], new_nodes: List) -> None:
|
337
|
+
"""Pre-initialization step for SkyPilot pods.
|
354
338
|
|
355
|
-
|
356
|
-
|
357
|
-
logger.info(f'{"-"*20}Start: Set up env vars in pod {pod_name!r} '
|
358
|
-
f'{"-"*20}')
|
359
|
-
runner = command_runner.KubernetesCommandRunner(
|
360
|
-
((namespace, context), pod_name))
|
339
|
+
This step is run in the pod right after it is created and before the
|
340
|
+
SkyPilot runtime is setup.
|
361
341
|
|
362
|
-
|
363
|
-
rc, stdout, _ = runner.run(set_k8s_env_var_cmd,
|
364
|
-
require_outputs=True,
|
365
|
-
stream_logs=False)
|
366
|
-
_raise_command_running_error('set env vars', set_k8s_env_var_cmd,
|
367
|
-
pod_name, rc, stdout)
|
342
|
+
This step includes three key steps:
|
368
343
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
344
|
+
1. Privilege check: Checks if the default user has sufficient privilege
|
345
|
+
to set up the kubernetes instance pod.
|
346
|
+
2. SSH setup: Sets up SSH for the pod instance.
|
347
|
+
3. Environment variable setup to populate k8s env vars in the pod.
|
373
348
|
|
374
|
-
|
375
|
-
|
349
|
+
Make sure commands used in these methods are generic and work
|
350
|
+
on most base images. E.g., do not use Python, since that may not
|
351
|
+
be installed by default.
|
376
352
|
|
353
|
+
If you run any apt commands, be sure to check if the lock is available.
|
354
|
+
It is possible the `apt update` run in the pod container args may still
|
355
|
+
be running.
|
356
|
+
|
357
|
+
Args:
|
358
|
+
namespace (str): Kubernetes namespace.
|
359
|
+
context (Optional[str]): Kubernetes context.
|
360
|
+
new_nodes (List): List of new pod instances.
|
361
|
+
|
362
|
+
Raises:
|
363
|
+
config_lib.KubernetesError: If user privileges are insufficient or
|
364
|
+
setup fails.
|
365
|
+
"""
|
377
366
|
|
378
|
-
def _check_user_privilege(namespace: str, context: Optional[str],
|
379
|
-
new_nodes: List) -> None:
|
380
|
-
# Checks if the default user has sufficient privilege to set up
|
381
|
-
# the kubernetes instance pod.
|
382
367
|
check_k8s_user_sudo_cmd = (
|
383
368
|
'if [ $(id -u) -eq 0 ]; then'
|
384
369
|
# If user is root, create an alias for sudo used in skypilot setup
|
@@ -386,56 +371,67 @@ def _check_user_privilege(namespace: str, context: Optional[str],
|
|
386
371
|
'else '
|
387
372
|
' if command -v sudo >/dev/null 2>&1; then '
|
388
373
|
' timeout 2 sudo -l >/dev/null 2>&1 && echo succeed || '
|
389
|
-
f' ( echo {exceptions.INSUFFICIENT_PRIVILEGES_CODE!r};
|
374
|
+
f' ( echo {exceptions.INSUFFICIENT_PRIVILEGES_CODE!r}; '
|
375
|
+
f' exit {exceptions.INSUFFICIENT_PRIVILEGES_CODE}; ); '
|
390
376
|
' else '
|
391
|
-
f' ( echo {exceptions.INSUFFICIENT_PRIVILEGES_CODE!r};
|
377
|
+
f' ( echo {exceptions.INSUFFICIENT_PRIVILEGES_CODE!r}; '
|
378
|
+
f' exit {exceptions.INSUFFICIENT_PRIVILEGES_CODE}; ); '
|
392
379
|
' fi; '
|
393
|
-
'fi')
|
380
|
+
'fi;')
|
381
|
+
|
382
|
+
# Kubernetes automatically populates containers with critical
|
383
|
+
# environment variables, such as those for discovering services running
|
384
|
+
# in the cluster and CUDA/nvidia environment variables. We need to
|
385
|
+
# make sure these env vars are available in every task and ssh session.
|
386
|
+
# This is needed for GPU support and service discovery.
|
387
|
+
# See https://github.com/skypilot-org/skypilot/issues/2287 for more details.
|
388
|
+
# To do so, we capture env vars from the pod's runtime and write them to
|
389
|
+
# /etc/profile.d/, making them available for all users in future
|
390
|
+
# shell sessions.
|
391
|
+
set_k8s_env_var_cmd = docker_utils.SETUP_ENV_VARS_CMD
|
394
392
|
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
393
|
+
check_apt_update_complete_cmd = (
|
394
|
+
'echo "Checking if apt update from container init is complete..."; '
|
395
|
+
'timeout_secs=600; '
|
396
|
+
'start_time=$(date +%s); '
|
397
|
+
'while ! grep -q "Fetched" /tmp/apt-update.log 2>/dev/null; do '
|
398
|
+
' echo "apt update still running. Logs:"; '
|
399
|
+
' cat /tmp/apt-update.log; '
|
400
|
+
' current_time=$(date +%s); '
|
401
|
+
' elapsed=$((current_time - start_time)); '
|
402
|
+
' if [ $elapsed -ge $timeout_secs ]; then '
|
403
|
+
' echo "Timed out waiting for apt update"; '
|
404
|
+
' exit 1; '
|
405
|
+
' fi; '
|
406
|
+
' sleep 5; '
|
407
|
+
'done; '
|
408
|
+
'echo "apt update complete."; ')
|
399
409
|
|
400
|
-
|
401
|
-
((namespace, context), pod_name))
|
402
|
-
logger.info(f'{"-"*20}Start: Check user privilege in pod {pod_name!r} '
|
403
|
-
f'{"-"*20}')
|
404
|
-
|
405
|
-
def _run_privilege_check():
|
406
|
-
rc, stdout, stderr = runner.run(check_k8s_user_sudo_cmd,
|
407
|
-
require_outputs=True,
|
408
|
-
separate_stderr=True,
|
409
|
-
stream_logs=False)
|
410
|
-
_raise_command_running_error('check user privilege',
|
411
|
-
check_k8s_user_sudo_cmd, pod_name, rc,
|
412
|
-
stdout + stderr)
|
413
|
-
return stdout
|
414
|
-
|
415
|
-
stdout = _run_function_with_retries(
|
416
|
-
_run_privilege_check, f'check user privilege in pod {pod_name!r}')
|
417
|
-
|
418
|
-
if stdout == str(exceptions.INSUFFICIENT_PRIVILEGES_CODE):
|
419
|
-
raise config_lib.KubernetesError(
|
420
|
-
'Insufficient system privileges detected. '
|
421
|
-
'Ensure the default user has root access or '
|
422
|
-
'"sudo" is installed and the user is added to the sudoers '
|
423
|
-
'from the image.')
|
424
|
-
logger.info(f'{"-"*20}End: Check user privilege in pod {pod_name!r} '
|
425
|
-
f'{"-"*20}')
|
426
|
-
|
427
|
-
|
428
|
-
def _setup_ssh_in_pods(namespace: str, context: Optional[str],
|
429
|
-
new_nodes: List) -> None:
|
430
|
-
# Setting up ssh for the pod instance. This is already setup for
|
431
|
-
# the jump pod so it does not need to be run for it.
|
432
|
-
set_k8s_ssh_cmd = (
|
433
|
-
'set -ex; '
|
410
|
+
install_ssh_k8s_cmd = (
|
434
411
|
'prefix_cmd() '
|
435
412
|
'{ if [ $(id -u) -ne 0 ]; then echo "sudo"; else echo ""; fi; }; '
|
436
413
|
'export DEBIAN_FRONTEND=noninteractive;'
|
437
|
-
'
|
438
|
-
'
|
414
|
+
'echo "Installing missing packages..."; '
|
415
|
+
'for i in {1..5}; do '
|
416
|
+
' output=$($(prefix_cmd) apt install openssh-server rsync -y 2>&1); '
|
417
|
+
' rc=$?; '
|
418
|
+
' if [ $rc -eq 0 ]; then '
|
419
|
+
' break; '
|
420
|
+
' fi; '
|
421
|
+
' echo "$output" | grep -qi "could not get lock" || '
|
422
|
+
' grep -qi "Unable to acquire the dpkg frontend lock"; '
|
423
|
+
' if [ $? -eq 0 ]; then '
|
424
|
+
' echo "apt install failed due to lock, retrying. (Attempt $i/5)"; '
|
425
|
+
' sleep 5; '
|
426
|
+
' else '
|
427
|
+
' echo "apt install failed for a non-lock reason: $output"; '
|
428
|
+
' exit $rc; '
|
429
|
+
' fi; '
|
430
|
+
'done; '
|
431
|
+
'if [ $rc -ne 0 ]; then '
|
432
|
+
' echo "apt install failed after 5 attempts due to lock errors."; '
|
433
|
+
' exit $rc; '
|
434
|
+
'fi; '
|
439
435
|
'$(prefix_cmd) mkdir -p /var/run/sshd; '
|
440
436
|
'$(prefix_cmd) '
|
441
437
|
'sed -i "s/PermitRootLogin prohibit-password/PermitRootLogin yes/" '
|
@@ -456,24 +452,35 @@ def _setup_ssh_in_pods(namespace: str, context: Optional[str],
|
|
456
452
|
# See https://www.educative.io/answers/error-mesg-ttyname-failed-inappropriate-ioctl-for-device # pylint: disable=line-too-long
|
457
453
|
'$(prefix_cmd) sed -i "s/mesg n/tty -s \\&\\& mesg n/" ~/.profile;')
|
458
454
|
|
459
|
-
|
455
|
+
pre_init_cmd = ('set -ex; ' + check_k8s_user_sudo_cmd +
|
456
|
+
set_k8s_env_var_cmd + check_apt_update_complete_cmd +
|
457
|
+
install_ssh_k8s_cmd)
|
458
|
+
|
459
|
+
def _pre_init_thread(new_node):
|
460
460
|
pod_name = new_node.metadata.name
|
461
|
+
logger.info(f'{"-"*20}Start: Pre-init in pod {pod_name!r} {"-"*20}')
|
461
462
|
runner = command_runner.KubernetesCommandRunner(
|
462
463
|
((namespace, context), pod_name))
|
463
|
-
logger.info(f'{"-"*20}Start: Set up SSH in pod {pod_name!r} {"-"*20}')
|
464
464
|
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
465
|
+
# Run the combined pre-init command
|
466
|
+
rc, stdout, _ = runner.run(pre_init_cmd,
|
467
|
+
require_outputs=True,
|
468
|
+
stream_logs=False)
|
469
|
+
if rc == exceptions.INSUFFICIENT_PRIVILEGES_CODE:
|
470
|
+
raise config_lib.KubernetesError(
|
471
|
+
'Insufficient system privileges detected. '
|
472
|
+
'Ensure the default user has root access or '
|
473
|
+
'"sudo" is installed and the user is added to the sudoers '
|
474
|
+
'from the image.')
|
475
|
+
|
476
|
+
op_name = 'pre-init'
|
477
|
+
_raise_command_running_error(op_name, pre_init_cmd, pod_name, rc,
|
478
|
+
stdout)
|
471
479
|
|
472
|
-
|
473
|
-
f'setup ssh in pod {pod_name!r}')
|
474
|
-
logger.info(f'{"-"*20}End: Set up SSH in pod {pod_name!r} {"-"*20}')
|
480
|
+
logger.info(f'{"-"*20}End: Pre-init in pod {pod_name!r} {"-"*20}')
|
475
481
|
|
476
|
-
|
482
|
+
# Run pre_init in parallel across all new_nodes
|
483
|
+
subprocess_utils.run_in_parallel(_pre_init_thread, new_nodes, NUM_THREADS)
|
477
484
|
|
478
485
|
|
479
486
|
def _label_pod(namespace: str, context: Optional[str], pod_name: str,
|
@@ -724,13 +731,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
|
|
724
731
|
f'pods: {list(uninitialized_pods.keys())}')
|
725
732
|
uninitialized_pods_list = list(uninitialized_pods.values())
|
726
733
|
|
727
|
-
#
|
728
|
-
|
729
|
-
# on most base images. E.g., do not use Python, since that may not
|
730
|
-
# be installed by default.
|
731
|
-
_check_user_privilege(namespace, context, uninitialized_pods_list)
|
732
|
-
_setup_ssh_in_pods(namespace, context, uninitialized_pods_list)
|
733
|
-
_set_env_vars_in_pods(namespace, context, uninitialized_pods_list)
|
734
|
+
# Run pre-init steps in the pod.
|
735
|
+
pre_init(namespace, context, uninitialized_pods_list)
|
734
736
|
|
735
737
|
for pod in uninitialized_pods.values():
|
736
738
|
_label_pod(namespace,
|
@@ -1801,13 +1801,22 @@ def get_kubernetes_node_info(
|
|
1801
1801
|
number of GPUs available on the node and the number of free GPUs on the
|
1802
1802
|
node.
|
1803
1803
|
|
1804
|
+
If the user does not have sufficient permissions to list pods in all
|
1805
|
+
namespaces, the function will return free GPUs as -1.
|
1806
|
+
|
1804
1807
|
Returns:
|
1805
1808
|
Dict[str, KubernetesNodeInfo]: Dictionary containing the node name as
|
1806
1809
|
key and the KubernetesNodeInfo object as value
|
1807
1810
|
"""
|
1808
1811
|
nodes = get_kubernetes_nodes(context)
|
1809
1812
|
# Get the pods to get the real-time resource usage
|
1810
|
-
|
1813
|
+
try:
|
1814
|
+
pods = get_all_pods_in_kubernetes_cluster(context)
|
1815
|
+
except kubernetes.api_exception() as e:
|
1816
|
+
if e.status == 403:
|
1817
|
+
pods = None
|
1818
|
+
else:
|
1819
|
+
raise
|
1811
1820
|
|
1812
1821
|
label_formatter, _ = detect_gpu_label_formatter(context)
|
1813
1822
|
if not label_formatter:
|
@@ -1828,19 +1837,22 @@ def get_kubernetes_node_info(
|
|
1828
1837
|
accelerator_count = int(node.status.allocatable.get(
|
1829
1838
|
'nvidia.com/gpu', 0))
|
1830
1839
|
|
1831
|
-
|
1832
|
-
|
1833
|
-
|
1834
|
-
|
1835
|
-
|
1836
|
-
#
|
1837
|
-
|
1838
|
-
|
1839
|
-
|
1840
|
-
|
1841
|
-
|
1842
|
-
|
1843
|
-
|
1840
|
+
if pods is None:
|
1841
|
+
accelerators_available = -1
|
1842
|
+
|
1843
|
+
else:
|
1844
|
+
for pod in pods:
|
1845
|
+
# Get all the pods running on the node
|
1846
|
+
if (pod.spec.node_name == node.metadata.name and
|
1847
|
+
pod.status.phase in ['Running', 'Pending']):
|
1848
|
+
# Iterate over all the containers in the pod and sum the
|
1849
|
+
# GPU requests
|
1850
|
+
for container in pod.spec.containers:
|
1851
|
+
if container.resources.requests:
|
1852
|
+
allocated_qty += int(
|
1853
|
+
container.resources.requests.get(
|
1854
|
+
'nvidia.com/gpu', 0))
|
1855
|
+
accelerators_available = accelerator_count - allocated_qty
|
1844
1856
|
|
1845
1857
|
node_info_dict[node.metadata.name] = KubernetesNodeInfo(
|
1846
1858
|
name=node.metadata.name,
|
{skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/__init__.py
RENAMED
@@ -11,6 +11,7 @@ from sky.serve.core import tail_logs
|
|
11
11
|
from sky.serve.core import terminate_replica
|
12
12
|
from sky.serve.core import up
|
13
13
|
from sky.serve.core import update
|
14
|
+
from sky.serve.load_balancing_policies import LB_POLICIES
|
14
15
|
from sky.serve.serve_state import ReplicaStatus
|
15
16
|
from sky.serve.serve_state import ServiceStatus
|
16
17
|
from sky.serve.serve_utils import DEFAULT_UPDATE_MODE
|
@@ -35,6 +36,7 @@ __all__ = [
|
|
35
36
|
'get_endpoint',
|
36
37
|
'INITIAL_VERSION',
|
37
38
|
'LB_CONTROLLER_SYNC_INTERVAL_SECONDS',
|
39
|
+
'LB_POLICIES',
|
38
40
|
'ReplicaStatus',
|
39
41
|
'ServiceComponent',
|
40
42
|
'ServiceStatus',
|
{skypilot_nightly-1.0.0.dev20241110 → skypilot_nightly-1.0.0.dev20241112}/sky/serve/load_balancer.py
RENAMED
@@ -2,7 +2,7 @@
|
|
2
2
|
import asyncio
|
3
3
|
import logging
|
4
4
|
import threading
|
5
|
-
from typing import Dict, Union
|
5
|
+
from typing import Dict, Optional, Union
|
6
6
|
|
7
7
|
import aiohttp
|
8
8
|
import fastapi
|
@@ -27,18 +27,24 @@ class SkyServeLoadBalancer:
|
|
27
27
|
policy.
|
28
28
|
"""
|
29
29
|
|
30
|
-
def __init__(self,
|
30
|
+
def __init__(self,
|
31
|
+
controller_url: str,
|
32
|
+
load_balancer_port: int,
|
33
|
+
load_balancing_policy_name: Optional[str] = None) -> None:
|
31
34
|
"""Initialize the load balancer.
|
32
35
|
|
33
36
|
Args:
|
34
37
|
controller_url: The URL of the controller.
|
35
38
|
load_balancer_port: The port where the load balancer listens to.
|
39
|
+
load_balancing_policy_name: The name of the load balancing policy
|
40
|
+
to use. Defaults to None.
|
36
41
|
"""
|
37
42
|
self._app = fastapi.FastAPI()
|
38
43
|
self._controller_url: str = controller_url
|
39
44
|
self._load_balancer_port: int = load_balancer_port
|
40
|
-
|
41
|
-
|
45
|
+
# Use the registry to create the load balancing policy
|
46
|
+
self._load_balancing_policy = lb_policies.LoadBalancingPolicy.make(
|
47
|
+
load_balancing_policy_name)
|
42
48
|
self._request_aggregator: serve_utils.RequestsAggregator = (
|
43
49
|
serve_utils.RequestTimestamp())
|
44
50
|
# TODO(tian): httpx.Client has a resource limit of 100 max connections
|
@@ -223,9 +229,21 @@ class SkyServeLoadBalancer:
|
|
223
229
|
uvicorn.run(self._app, host='0.0.0.0', port=self._load_balancer_port)
|
224
230
|
|
225
231
|
|
226
|
-
def run_load_balancer(controller_addr: str,
|
227
|
-
|
228
|
-
|
232
|
+
def run_load_balancer(controller_addr: str,
|
233
|
+
load_balancer_port: int,
|
234
|
+
load_balancing_policy_name: Optional[str] = None) -> None:
|
235
|
+
""" Run the load balancer.
|
236
|
+
|
237
|
+
Args:
|
238
|
+
controller_addr: The address of the controller.
|
239
|
+
load_balancer_port: The port where the load balancer listens to.
|
240
|
+
policy_name: The name of the load balancing policy to use. Defaults to
|
241
|
+
None.
|
242
|
+
"""
|
243
|
+
load_balancer = SkyServeLoadBalancer(
|
244
|
+
controller_url=controller_addr,
|
245
|
+
load_balancer_port=load_balancer_port,
|
246
|
+
load_balancing_policy_name=load_balancing_policy_name)
|
229
247
|
load_balancer.run()
|
230
248
|
|
231
249
|
|
@@ -241,5 +259,13 @@ if __name__ == '__main__':
|
|
241
259
|
required=True,
|
242
260
|
default=8890,
|
243
261
|
help='The port where the load balancer listens to.')
|
262
|
+
available_policies = list(lb_policies.LB_POLICIES.keys())
|
263
|
+
parser.add_argument(
|
264
|
+
'--load-balancing-policy',
|
265
|
+
choices=available_policies,
|
266
|
+
default='round_robin',
|
267
|
+
help=f'The load balancing policy to use. Available policies: '
|
268
|
+
f'{", ".join(available_policies)}.')
|
244
269
|
args = parser.parse_args()
|
245
|
-
run_load_balancer(args.controller_addr, args.load_balancer_port
|
270
|
+
run_load_balancer(args.controller_addr, args.load_balancer_port,
|
271
|
+
args.load_balancing_policy)
|
@@ -10,6 +10,10 @@ if typing.TYPE_CHECKING:
|
|
10
10
|
|
11
11
|
logger = sky_logging.init_logger(__name__)
|
12
12
|
|
13
|
+
# Define a registry for load balancing policies
|
14
|
+
LB_POLICIES = {}
|
15
|
+
DEFAULT_LB_POLICY = None
|
16
|
+
|
13
17
|
|
14
18
|
def _request_repr(request: 'fastapi.Request') -> str:
|
15
19
|
return ('<Request '
|
@@ -25,6 +29,24 @@ class LoadBalancingPolicy:
|
|
25
29
|
def __init__(self) -> None:
|
26
30
|
self.ready_replicas: List[str] = []
|
27
31
|
|
32
|
+
def __init_subclass__(cls, name: str, default: bool = False):
|
33
|
+
LB_POLICIES[name] = cls
|
34
|
+
if default:
|
35
|
+
global DEFAULT_LB_POLICY
|
36
|
+
assert DEFAULT_LB_POLICY is None, (
|
37
|
+
'Only one policy can be default.')
|
38
|
+
DEFAULT_LB_POLICY = name
|
39
|
+
|
40
|
+
@classmethod
|
41
|
+
def make(cls, policy_name: Optional[str] = None) -> 'LoadBalancingPolicy':
|
42
|
+
"""Create a load balancing policy from a name."""
|
43
|
+
if policy_name is None:
|
44
|
+
policy_name = DEFAULT_LB_POLICY
|
45
|
+
|
46
|
+
if policy_name not in LB_POLICIES:
|
47
|
+
raise ValueError(f'Unknown load balancing policy: {policy_name}')
|
48
|
+
return LB_POLICIES[policy_name]()
|
49
|
+
|
28
50
|
def set_ready_replicas(self, ready_replicas: List[str]) -> None:
|
29
51
|
raise NotImplementedError
|
30
52
|
|
@@ -44,7 +66,7 @@ class LoadBalancingPolicy:
|
|
44
66
|
raise NotImplementedError
|
45
67
|
|
46
68
|
|
47
|
-
class RoundRobinPolicy(LoadBalancingPolicy):
|
69
|
+
class RoundRobinPolicy(LoadBalancingPolicy, name='round_robin', default=True):
|
48
70
|
"""Round-robin load balancing policy."""
|
49
71
|
|
50
72
|
def __init__(self) -> None:
|