skypilot-nightly 1.0.0.dev20250310__tar.gz → 1.0.0.dev20250312__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (375) hide show
  1. {skypilot_nightly-1.0.0.dev20250310/skypilot_nightly.egg-info → skypilot_nightly-1.0.0.dev20250312}/PKG-INFO +1 -1
  2. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/__init__.py +2 -2
  3. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/nebius.py +11 -1
  4. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/backend_utils.py +38 -15
  5. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/cloud_vm_ray_backend.py +17 -52
  6. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/kubernetes.py +89 -9
  7. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/nebius.py +8 -6
  8. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/kubernetes_catalog.py +3 -2
  9. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/exceptions.py +20 -3
  10. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/network.py +7 -0
  11. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/network_utils.py +3 -2
  12. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/utils.py +22 -15
  13. skypilot_nightly-1.0.0.dev20250312/sky/server/requests/event_loop.py +31 -0
  14. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/executor.py +50 -22
  15. skypilot_nightly-1.0.0.dev20250312/sky/server/requests/preconditions.py +174 -0
  16. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/requests.py +42 -3
  17. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/server.py +29 -8
  18. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/stream_utils.py +9 -6
  19. skypilot_nightly-1.0.0.dev20250312/sky/server/uvicorn.py +81 -0
  20. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/accelerator_registry.py +1 -1
  21. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/subprocess_utils.py +56 -1
  22. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312/skypilot_nightly.egg-info}/PKG-INFO +1 -1
  23. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/SOURCES.txt +3 -0
  24. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/LICENSE +0 -0
  25. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/MANIFEST.in +0 -0
  26. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/README.md +0 -0
  27. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/pyproject.toml +0 -0
  28. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/setup.cfg +0 -0
  29. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/setup.py +0 -0
  30. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/__init__.py +0 -0
  31. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/aws.py +0 -0
  32. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/azure.py +0 -0
  33. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/cloudflare.py +0 -0
  34. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/common.py +0 -0
  35. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/cudo.py +0 -0
  36. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/do.py +0 -0
  37. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/docker.py +0 -0
  38. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/gcp.py +0 -0
  39. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/ibm.py +0 -0
  40. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/kubernetes.py +0 -0
  41. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/oci.py +0 -0
  42. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/runpod.py +0 -0
  43. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/vast.py +0 -0
  44. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/adaptors/vsphere.py +0 -0
  45. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/admin_policy.py +0 -0
  46. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/authentication.py +0 -0
  47. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/__init__.py +0 -0
  48. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/backend.py +0 -0
  49. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/docker_utils.py +0 -0
  50. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/local_docker_backend.py +0 -0
  51. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/monkey_patches/monkey_patch_ray_up.py +0 -0
  52. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/backends/wheel_utils.py +0 -0
  53. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/benchmark/__init__.py +0 -0
  54. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/benchmark/benchmark_state.py +0 -0
  55. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/benchmark/benchmark_utils.py +0 -0
  56. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/check.py +0 -0
  57. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/cli.py +0 -0
  58. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/__init__.py +0 -0
  59. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/cli.py +0 -0
  60. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/common.py +0 -0
  61. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/client/sdk.py +0 -0
  62. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/cloud_stores.py +0 -0
  63. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/__init__.py +0 -0
  64. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/aws.py +0 -0
  65. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/azure.py +0 -0
  66. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/cloud.py +0 -0
  67. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/cudo.py +0 -0
  68. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/do.py +0 -0
  69. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/fluidstack.py +0 -0
  70. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/gcp.py +0 -0
  71. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/ibm.py +0 -0
  72. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/lambda_cloud.py +0 -0
  73. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/oci.py +0 -0
  74. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/paperspace.py +0 -0
  75. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/runpod.py +0 -0
  76. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/scp.py +0 -0
  77. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/__init__.py +0 -0
  78. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/aws_catalog.py +0 -0
  79. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/azure_catalog.py +0 -0
  80. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/common.py +0 -0
  81. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/config.py +0 -0
  82. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/constants.py +0 -0
  83. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/cudo_catalog.py +0 -0
  84. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/__init__.py +0 -0
  85. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_aws.py +0 -0
  86. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_azure.py +0 -0
  87. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +0 -0
  88. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +0 -0
  89. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +0 -0
  90. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +0 -0
  91. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_vast.py +0 -0
  92. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +0 -0
  93. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/do_catalog.py +0 -0
  94. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/fluidstack_catalog.py +0 -0
  95. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/gcp_catalog.py +0 -0
  96. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/ibm_catalog.py +0 -0
  97. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/lambda_catalog.py +0 -0
  98. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/nebius_catalog.py +0 -0
  99. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/oci_catalog.py +0 -0
  100. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/paperspace_catalog.py +0 -0
  101. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/runpod_catalog.py +0 -0
  102. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/scp_catalog.py +0 -0
  103. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/vast_catalog.py +0 -0
  104. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/service_catalog/vsphere_catalog.py +0 -0
  105. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/__init__.py +0 -0
  106. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/aws_utils.py +0 -0
  107. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/azure_utils.py +0 -0
  108. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/gcp_utils.py +0 -0
  109. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/oci_utils.py +0 -0
  110. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/utils/scp_utils.py +0 -0
  111. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/vast.py +0 -0
  112. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/clouds/vsphere.py +0 -0
  113. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/core.py +0 -0
  114. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/dag.py +0 -0
  115. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/__init__.py +0 -0
  116. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/data_transfer.py +0 -0
  117. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/data_utils.py +0 -0
  118. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/mounting_utils.py +0 -0
  119. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/storage.py +0 -0
  120. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/data/storage_utils.py +0 -0
  121. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/execution.py +0 -0
  122. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/global_user_state.py +0 -0
  123. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/__init__.py +0 -0
  124. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/client/__init__.py +0 -0
  125. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/client/sdk.py +0 -0
  126. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/constants.py +0 -0
  127. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/controller.py +0 -0
  128. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/dashboard/dashboard.py +0 -0
  129. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/dashboard/static/favicon.ico +0 -0
  130. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/dashboard/templates/index.html +0 -0
  131. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/recovery_strategy.py +0 -0
  132. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/scheduler.py +0 -0
  133. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/__init__.py +0 -0
  134. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/core.py +0 -0
  135. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/dashboard_utils.py +0 -0
  136. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/server/server.py +0 -0
  137. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/state.py +0 -0
  138. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/jobs/utils.py +0 -0
  139. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/models.py +0 -0
  140. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/optimizer.py +0 -0
  141. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/__init__.py +0 -0
  142. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/__init__.py +0 -0
  143. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/config.py +0 -0
  144. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/instance.py +0 -0
  145. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/aws/utils.py +0 -0
  146. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/__init__.py +0 -0
  147. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/azure-config-template.json +0 -0
  148. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/config.py +0 -0
  149. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/azure/instance.py +0 -0
  150. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/common.py +0 -0
  151. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/constants.py +0 -0
  152. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/__init__.py +0 -0
  153. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/config.py +0 -0
  154. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/cudo_machine_type.py +0 -0
  155. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/cudo_utils.py +0 -0
  156. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/cudo_wrapper.py +0 -0
  157. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/cudo/instance.py +0 -0
  158. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/__init__.py +0 -0
  159. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/config.py +0 -0
  160. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/constants.py +0 -0
  161. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/instance.py +0 -0
  162. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/do/utils.py +0 -0
  163. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/docker_utils.py +0 -0
  164. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/__init__.py +0 -0
  165. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/config.py +0 -0
  166. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/fluidstack_utils.py +0 -0
  167. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/fluidstack/instance.py +0 -0
  168. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/__init__.py +0 -0
  169. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/config.py +0 -0
  170. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/constants.py +0 -0
  171. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/instance.py +0 -0
  172. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/instance_utils.py +0 -0
  173. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/gcp/mig_utils.py +0 -0
  174. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/instance_setup.py +0 -0
  175. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/__init__.py +0 -0
  176. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/config.py +0 -0
  177. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/constants.py +0 -0
  178. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/instance.py +0 -0
  179. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/manifests/smarter-device-manager-configmap.yaml +0 -0
  180. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +0 -0
  181. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/__init__.py +0 -0
  182. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/config.py +0 -0
  183. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/instance.py +0 -0
  184. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/lambda_cloud/lambda_utils.py +0 -0
  185. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/logging.py +0 -0
  186. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/metadata_utils.py +0 -0
  187. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/__init__.py +0 -0
  188. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/config.py +0 -0
  189. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/instance.py +0 -0
  190. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/nebius/utils.py +0 -0
  191. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/__init__.py +0 -0
  192. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/config.py +0 -0
  193. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/instance.py +0 -0
  194. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/oci/query_utils.py +0 -0
  195. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/__init__.py +0 -0
  196. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/config.py +0 -0
  197. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/constants.py +0 -0
  198. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/instance.py +0 -0
  199. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/paperspace/utils.py +0 -0
  200. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/provisioner.py +0 -0
  201. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/__init__.py +0 -0
  202. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/api/__init__.py +0 -0
  203. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/api/commands.py +0 -0
  204. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/api/pods.py +0 -0
  205. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/config.py +0 -0
  206. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/instance.py +0 -0
  207. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/runpod/utils.py +0 -0
  208. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/__init__.py +0 -0
  209. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/config.py +0 -0
  210. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/instance.py +0 -0
  211. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vast/utils.py +0 -0
  212. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/__init__.py +0 -0
  213. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/__init__.py +0 -0
  214. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/cls_api_client.py +0 -0
  215. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/cls_api_helper.py +0 -0
  216. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/custom_script.py +0 -0
  217. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/id_generator.py +0 -0
  218. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/metadata_utils.py +0 -0
  219. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/service_manager.py +0 -0
  220. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/service_manager_factory.py +0 -0
  221. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/ssl_helper.py +0 -0
  222. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/vapiconnect.py +0 -0
  223. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/common/vim_utils.py +0 -0
  224. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/config.py +0 -0
  225. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/instance.py +0 -0
  226. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/provision/vsphere/vsphere_utils.py +0 -0
  227. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/resources.py +0 -0
  228. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/__init__.py +0 -0
  229. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/autoscalers.py +0 -0
  230. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/client/__init__.py +0 -0
  231. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/client/sdk.py +0 -0
  232. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/constants.py +0 -0
  233. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/controller.py +0 -0
  234. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/load_balancer.py +0 -0
  235. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/load_balancing_policies.py +0 -0
  236. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/replica_managers.py +0 -0
  237. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/serve_state.py +0 -0
  238. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/serve_utils.py +0 -0
  239. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/server/__init__.py +0 -0
  240. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/server/core.py +0 -0
  241. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/server/server.py +0 -0
  242. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/service.py +0 -0
  243. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/serve/service_spec.py +0 -0
  244. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/__init__.py +0 -0
  245. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/common.py +0 -0
  246. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/constants.py +0 -0
  247. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/html/log.html +0 -0
  248. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/__init__.py +0 -0
  249. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/payloads.py +0 -0
  250. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/queues/__init__.py +0 -0
  251. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/queues/mp_queue.py +0 -0
  252. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/serializers/__init__.py +0 -0
  253. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/serializers/decoders.py +0 -0
  254. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/server/requests/serializers/encoders.py +0 -0
  255. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/setup_files/MANIFEST.in +0 -0
  256. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/setup_files/dependencies.py +0 -0
  257. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/setup_files/setup.py +0 -0
  258. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/sky_logging.py +0 -0
  259. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/LICENSE +0 -0
  260. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/__init__.py +0 -0
  261. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/attempt_skylet.py +0 -0
  262. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/autostop_lib.py +0 -0
  263. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/configs.py +0 -0
  264. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/constants.py +0 -0
  265. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/events.py +0 -0
  266. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/job_lib.py +0 -0
  267. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/log_lib.py +0 -0
  268. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/log_lib.pyi +0 -0
  269. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/__init__.py +0 -0
  270. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/command_runner.py +0 -0
  271. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/__init__.py +0 -0
  272. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/node_provider.py +0 -0
  273. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/utils.py +0 -0
  274. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/ibm/vpc_provider.py +0 -0
  275. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/scp/__init__.py +0 -0
  276. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/scp/config.py +0 -0
  277. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/providers/scp/node_provider.py +0 -0
  278. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/__init__.py +0 -0
  279. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/autoscaler.py.patch +0 -0
  280. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/cli.py.patch +0 -0
  281. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/command_runner.py.patch +0 -0
  282. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/log_monitor.py.patch +0 -0
  283. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/resource_demand_scheduler.py.patch +0 -0
  284. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/updater.py.patch +0 -0
  285. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/ray_patches/worker.py.patch +0 -0
  286. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/skylet.py +0 -0
  287. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skylet/subprocess_daemon.py +0 -0
  288. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/skypilot_config.py +0 -0
  289. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/task.py +0 -0
  290. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/aws-ray.yml.j2 +0 -0
  291. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/azure-ray.yml.j2 +0 -0
  292. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/cudo-ray.yml.j2 +0 -0
  293. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/do-ray.yml.j2 +0 -0
  294. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/fluidstack-ray.yml.j2 +0 -0
  295. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/gcp-ray.yml.j2 +0 -0
  296. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/ibm-ray.yml.j2 +0 -0
  297. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/jobs-controller.yaml.j2 +0 -0
  298. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-ingress.yml.j2 +0 -0
  299. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-loadbalancer.yml.j2 +0 -0
  300. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-port-forward-proxy-command.sh +0 -0
  301. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-ray.yml.j2 +0 -0
  302. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/kubernetes-ssh-jump.yml.j2 +0 -0
  303. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/lambda-ray.yml.j2 +0 -0
  304. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/local-ray.yml.j2 +0 -0
  305. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/nebius-ray.yml.j2 +0 -0
  306. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/oci-ray.yml.j2 +0 -0
  307. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/paperspace-ray.yml.j2 +0 -0
  308. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/runpod-ray.yml.j2 +0 -0
  309. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/scp-ray.yml.j2 +0 -0
  310. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/sky-serve-controller.yaml.j2 +0 -0
  311. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/skypilot-server-kubernetes-proxy.sh +0 -0
  312. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/vast-ray.yml.j2 +0 -0
  313. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/vsphere-ray.yml.j2 +0 -0
  314. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/templates/websocket_proxy.py +0 -0
  315. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/usage/__init__.py +0 -0
  316. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/usage/constants.py +0 -0
  317. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/usage/usage_lib.py +0 -0
  318. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/__init__.py +0 -0
  319. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/admin_policy_utils.py +0 -0
  320. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/annotations.py +0 -0
  321. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/cli_utils/__init__.py +0 -0
  322. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/cli_utils/status_utils.py +0 -0
  323. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/cluster_utils.py +0 -0
  324. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/command_runner.py +0 -0
  325. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/command_runner.pyi +0 -0
  326. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/common.py +0 -0
  327. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/common_utils.py +0 -0
  328. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/config_utils.py +0 -0
  329. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/control_master_utils.py +0 -0
  330. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/controller_utils.py +0 -0
  331. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/dag_utils.py +0 -0
  332. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/db_utils.py +0 -0
  333. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/env_options.py +0 -0
  334. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/__init__.py +0 -0
  335. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/create_cluster.sh +0 -0
  336. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/delete_cluster.sh +0 -0
  337. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/deploy_remote_cluster.sh +0 -0
  338. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/exec_kubeconfig_converter.py +0 -0
  339. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/generate_kind_config.py +0 -0
  340. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/generate_kubeconfig.sh +0 -0
  341. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/gpu_labeler.py +0 -0
  342. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +0 -0
  343. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +0 -0
  344. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/kubernetes_deploy_utils.py +0 -0
  345. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/rsync_helper.sh +0 -0
  346. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -0
  347. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/kubernetes_enums.py +0 -0
  348. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/log_utils.py +0 -0
  349. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/message_utils.py +0 -0
  350. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/registry.py +0 -0
  351. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/resources_utils.py +0 -0
  352. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/rich_utils.py +0 -0
  353. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/schemas.py +0 -0
  354. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/status_lib.py +0 -0
  355. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/timeline.py +0 -0
  356. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/ux_utils.py +0 -0
  357. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/sky/utils/validator.py +0 -0
  358. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/dependency_links.txt +0 -0
  359. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/entry_points.txt +0 -0
  360. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/requires.txt +0 -0
  361. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/skypilot_nightly.egg-info/top_level.txt +0 -0
  362. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_api.py +0 -0
  363. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_cli.py +0 -0
  364. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_config.py +0 -0
  365. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_global_user_state.py +0 -0
  366. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_jobs.py +0 -0
  367. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_jobs_and_serve.py +0 -0
  368. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_list_accelerators.py +0 -0
  369. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_optimizer_dryruns.py +0 -0
  370. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_optimizer_random_dag.py +0 -0
  371. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_serve_autoscaler.py +0 -0
  372. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_smoke.py +0 -0
  373. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_storage.py +0 -0
  374. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_wheels.py +0 -0
  375. {skypilot_nightly-1.0.0.dev20250310 → skypilot_nightly-1.0.0.dev20250312}/tests/test_yaml_parser.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250310
3
+ Version: 1.0.0.dev20250312
4
4
  Summary: SkyPilot: An intercloud broker for the clouds
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'f5170ee3a1a6711f9ed54941aae2dee4e6525acc'
8
+ _SKYPILOT_COMMIT_SHA = '78a42b6e733bbc29b68efe0e9c79191eaaca9fcd'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250310'
38
+ __version__ = '1.0.0.dev20250312'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -6,9 +6,11 @@ from sky.adaptors import common
6
6
  NEBIUS_TENANT_ID_FILENAME = 'NEBIUS_TENANT_ID.txt'
7
7
  NEBIUS_IAM_TOKEN_FILENAME = 'NEBIUS_IAM_TOKEN.txt'
8
8
  NEBIUS_PROJECT_ID_FILENAME = 'NEBIUS_PROJECT_ID.txt'
9
+ NEBIUS_CREDENTIALS_FILENAME = 'credentials.json'
9
10
  NEBIUS_TENANT_ID_PATH = '~/.nebius/' + NEBIUS_TENANT_ID_FILENAME
10
11
  NEBIUS_IAM_TOKEN_PATH = '~/.nebius/' + NEBIUS_IAM_TOKEN_FILENAME
11
12
  NEBIUS_PROJECT_ID_PATH = '~/.nebius/' + NEBIUS_PROJECT_ID_FILENAME
13
+ NEBIUS_CREDENTIALS_PATH = '~/.nebius/' + NEBIUS_CREDENTIALS_FILENAME
12
14
 
13
15
  MAX_RETRIES_TO_DISK_CREATE = 120
14
16
  MAX_RETRIES_TO_INSTANCE_STOP = 120
@@ -72,6 +74,11 @@ def get_iam_token():
72
74
  return _iam_token
73
75
 
74
76
 
77
+ def is_token_or_cred_file_exist():
78
+ return (os.path.exists(os.path.expanduser(NEBIUS_IAM_TOKEN_PATH)) or
79
+ os.path.exists(os.path.expanduser(NEBIUS_CREDENTIALS_PATH)))
80
+
81
+
75
82
  def get_project_id():
76
83
  global _project_id
77
84
  if _project_id is None:
@@ -97,4 +104,7 @@ def get_tenant_id():
97
104
 
98
105
 
99
106
  def sdk():
100
- return nebius.sdk.SDK(credentials=get_iam_token())
107
+ if get_iam_token() is not None:
108
+ return nebius.sdk.SDK(credentials=get_iam_token())
109
+ return nebius.sdk.SDK(
110
+ credentials_file_name=os.path.expanduser(NEBIUS_CREDENTIALS_PATH))
@@ -1802,6 +1802,21 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
1802
1802
  status == status_lib.ClusterStatus.UP for status in node_statuses) and
1803
1803
  len(node_statuses) == handle.launched_nodes)
1804
1804
 
1805
+ def get_node_counts_from_ray_status(
1806
+ runner: command_runner.CommandRunner) -> Tuple[int, int, str, str]:
1807
+ rc, output, stderr = runner.run(
1808
+ instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND,
1809
+ stream_logs=False,
1810
+ require_outputs=True,
1811
+ separate_stderr=True)
1812
+ if rc:
1813
+ raise RuntimeError(
1814
+ f'Refreshing status ({cluster_name!r}): Failed to check '
1815
+ f'ray cluster\'s healthiness with '
1816
+ f'{instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND}.\n'
1817
+ f'-- stdout --\n{output}\n-- stderr --\n{stderr}')
1818
+ return (*_count_healthy_nodes_from_ray(output), output, stderr)
1819
+
1805
1820
  def run_ray_status_to_check_ray_cluster_healthy() -> bool:
1806
1821
  try:
1807
1822
  # NOTE: fetching the IPs is very slow as it calls into
@@ -1822,26 +1837,34 @@ def _update_cluster_status(cluster_name: str) -> Optional[Dict[str, Any]]:
1822
1837
  raise exceptions.FetchClusterInfoError(
1823
1838
  reason=exceptions.FetchClusterInfoError.Reason.HEAD)
1824
1839
  head_runner = runners[0]
1825
- rc, output, stderr = head_runner.run(
1826
- instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND,
1827
- stream_logs=False,
1828
- require_outputs=True,
1829
- separate_stderr=True)
1830
- if rc:
1831
- raise RuntimeError(
1832
- f'Refreshing status ({cluster_name!r}): Failed to check '
1833
- f'ray cluster\'s healthiness with '
1834
- f'{instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND}.\n'
1835
- f'-- stdout --\n{output}\n-- stderr --\n{stderr}')
1836
1840
 
1837
- ready_head, ready_workers = _count_healthy_nodes_from_ray(output)
1838
1841
  total_nodes = handle.launched_nodes * handle.num_ips_per_node
1839
- if ready_head + ready_workers == total_nodes:
1840
- return True
1842
+
1843
+ for i in range(5):
1844
+ ready_head, ready_workers, output, stderr = (
1845
+ get_node_counts_from_ray_status(head_runner))
1846
+ if ready_head + ready_workers == total_nodes:
1847
+ return True
1848
+ logger.debug(f'Refreshing status ({cluster_name!r}) attempt '
1849
+ f'{i}: ray status not showing all nodes '
1850
+ f'({ready_head + ready_workers}/{total_nodes});\n'
1851
+ f'output:\n{output}\nstderr:\n{stderr}')
1852
+
1853
+ # If cluster JUST started, maybe not all the nodes have shown
1854
+ # up. Try again for a few seconds.
1855
+ # Note: We are okay with this performance hit because it's very
1856
+ # rare to normally hit this case. It requires:
1857
+ # - All the instances in the cluster are up on the cloud side
1858
+ # (not preempted), but
1859
+ # - The ray cluster is somehow degraded so not all instances are
1860
+ # showing up
1861
+ time.sleep(1)
1862
+
1841
1863
  raise RuntimeError(
1842
1864
  f'Refreshing status ({cluster_name!r}): ray status not showing '
1843
1865
  f'all nodes ({ready_head + ready_workers}/'
1844
- f'{total_nodes}); output: {output}; stderr: {stderr}')
1866
+ f'{total_nodes});\noutput:\n{output}\nstderr:\n{stderr}')
1867
+
1845
1868
  except exceptions.FetchClusterInfoError:
1846
1869
  logger.debug(
1847
1870
  f'Refreshing status ({cluster_name!r}) failed to get IPs.')
@@ -772,32 +772,6 @@ class FailoverCloudErrorHandlerV1:
772
772
  setattr(e, 'detailed_reason', detailed_reason)
773
773
  raise e
774
774
 
775
- @staticmethod
776
- def _lambda_handler(blocked_resources: Set['resources_lib.Resources'],
777
- launchable_resources: 'resources_lib.Resources',
778
- region: 'clouds.Region',
779
- zones: Optional[List['clouds.Zone']], stdout: str,
780
- stderr: str):
781
- del region, zones # Unused.
782
- errors = FailoverCloudErrorHandlerV1._handle_errors(
783
- stdout,
784
- stderr,
785
- is_error_str_known=lambda x: 'LambdaCloudError:' in x.strip())
786
- messages = '\n '.join(errors)
787
- style = colorama.Style
788
- logger.warning(f' {style.DIM}{messages}{style.RESET_ALL}')
789
- _add_to_blocked_resources(blocked_resources,
790
- launchable_resources.copy(zone=None))
791
-
792
- # Sometimes, LambdaCloudError will list available regions.
793
- for e in errors:
794
- if e.find('Regions with capacity available:') != -1:
795
- for r in service_catalog.regions('lambda'):
796
- if e.find(r.name) == -1:
797
- _add_to_blocked_resources(
798
- blocked_resources,
799
- launchable_resources.copy(region=r.name, zone=None))
800
-
801
775
  @staticmethod
802
776
  def _scp_handler(blocked_resources: Set['resources_lib.Resources'],
803
777
  launchable_resources: 'resources_lib.Resources',
@@ -846,32 +820,6 @@ class FailoverCloudErrorHandlerV1:
846
820
  _add_to_blocked_resources(blocked_resources,
847
821
  launchable_resources.copy(zone=zone.name))
848
822
 
849
- # Apr, 2023 by Hysun(hysun.he@oracle.com): Added support for OCI
850
- @staticmethod
851
- def _oci_handler(blocked_resources: Set['resources_lib.Resources'],
852
- launchable_resources: 'resources_lib.Resources',
853
- region: 'clouds.Region',
854
- zones: Optional[List['clouds.Zone']], stdout: str,
855
- stderr: str):
856
- known_service_errors = [
857
- 'NotAuthorizedOrNotFound', 'CannotParseRequest', 'InternalError',
858
- 'LimitExceeded', 'NotAuthenticated'
859
- ]
860
- errors = FailoverCloudErrorHandlerV1._handle_errors(
861
- stdout, stderr, lambda x: 'VcnSubnetNotFound' in x.strip() or
862
- ('oci.exceptions.ServiceError' in x.strip() and any(
863
- known_err in x.strip() for known_err in known_service_errors)))
864
- logger.warning(f'Got error(s) in {region.name}:')
865
- messages = '\n\t'.join(errors)
866
- style = colorama.Style
867
- logger.warning(f'{style.DIM}\t{messages}{style.RESET_ALL}')
868
-
869
- if zones is not None:
870
- for zone in zones:
871
- _add_to_blocked_resources(
872
- blocked_resources,
873
- launchable_resources.copy(zone=zone.name))
874
-
875
823
  @staticmethod
876
824
  def update_blocklist_on_error(
877
825
  blocked_resources: Set['resources_lib.Resources'],
@@ -1123,6 +1071,23 @@ class FailoverCloudErrorHandlerV2:
1123
1071
  blocked_resources,
1124
1072
  launchable_resources.copy(zone=zone.name))
1125
1073
 
1074
+ @staticmethod
1075
+ def _lambda_handler(blocked_resources: Set['resources_lib.Resources'],
1076
+ launchable_resources: 'resources_lib.Resources',
1077
+ region: 'clouds.Region',
1078
+ zones: Optional[List['clouds.Zone']], error: Exception):
1079
+ output = str(error)
1080
+ # Sometimes, lambda cloud error will list available regions.
1081
+ if output.find('Regions with capacity available:') != -1:
1082
+ for r in service_catalog.regions('lambda'):
1083
+ if output.find(r.name) == -1:
1084
+ _add_to_blocked_resources(
1085
+ blocked_resources,
1086
+ launchable_resources.copy(region=r.name, zone=None))
1087
+ else:
1088
+ FailoverCloudErrorHandlerV2._default_handler(
1089
+ blocked_resources, launchable_resources, region, zones, error)
1090
+
1126
1091
  @staticmethod
1127
1092
  def _default_handler(blocked_resources: Set['resources_lib.Resources'],
1128
1093
  launchable_resources: 'resources_lib.Resources',
@@ -2,9 +2,10 @@
2
2
  import os
3
3
  import re
4
4
  import typing
5
- from typing import Dict, Iterator, List, Optional, Tuple, Union
5
+ from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
6
6
 
7
7
  from sky import clouds
8
+ from sky import exceptions
8
9
  from sky import sky_logging
9
10
  from sky import skypilot_config
10
11
  from sky.adaptors import kubernetes
@@ -78,6 +79,11 @@ class Kubernetes(clouds.Cloud):
78
79
  PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
79
80
  STATUS_VERSION = clouds.StatusVersion.SKYPILOT
80
81
 
82
+ _INDENT_PREFIX = ' ' * 4
83
+
84
+ # Set of contexts that has logged as temporarily unreachable
85
+ logged_unreachable_contexts: Set[str] = set()
86
+
81
87
  @property
82
88
  def ssh_key_secret_field_name(self):
83
89
  # Use a fresh user hash to avoid conflicts in the secret object naming.
@@ -90,6 +96,8 @@ class Kubernetes(clouds.Cloud):
90
96
  def _unsupported_features_for_resources(
91
97
  cls, resources: 'resources_lib.Resources'
92
98
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
99
+ # TODO(aylei): features need to be regional (per context) to make
100
+ # multi-kubernetes selection/failover work.
93
101
  unsupported_features = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
94
102
  context = resources.region
95
103
  if context is None:
@@ -106,10 +114,13 @@ class Kubernetes(clouds.Cloud):
106
114
  unsupported_features[
107
115
  clouds.CloudImplementationFeatures.AUTO_TERMINATE] = message
108
116
  # Allow spot instances if supported by the cluster
109
- spot_label_key, _ = kubernetes_utils.get_spot_label(context)
110
- if spot_label_key is not None:
111
- unsupported_features.pop(
112
- clouds.CloudImplementationFeatures.SPOT_INSTANCE, None)
117
+ try:
118
+ spot_label_key, _ = kubernetes_utils.get_spot_label(context)
119
+ if spot_label_key is not None:
120
+ unsupported_features.pop(
121
+ clouds.CloudImplementationFeatures.SPOT_INSTANCE, None)
122
+ except exceptions.KubeAPIUnreachableError as e:
123
+ cls._log_unreachable_context(context, str(e))
113
124
  return unsupported_features
114
125
 
115
126
  @classmethod
@@ -170,6 +181,36 @@ class Kubernetes(clouds.Cloud):
170
181
  cls._log_skipped_contexts_once(tuple(skipped_contexts))
171
182
  return existing_contexts
172
183
 
184
+ @classmethod
185
+ def _log_unreachable_context(cls,
186
+ context: str,
187
+ reason: Optional[str] = None) -> None:
188
+ """Logs a Kubernetes context as unreachable.
189
+
190
+ Args:
191
+ context: The Kubernetes context to mark as unreachable.
192
+ reason: Optional reason for marking the context as unreachable.
193
+ silent: Whether to suppress the log message.
194
+ """
195
+ # Skip if this context has already been logged as unreachable
196
+ if context in cls.logged_unreachable_contexts:
197
+ return
198
+
199
+ cls.logged_unreachable_contexts.add(context)
200
+ msg = f'Excluding Kubernetes context {context}'
201
+ if reason is not None:
202
+ msg += f': {reason}'
203
+ logger.info(msg)
204
+
205
+ # Check if all existing allowed contexts are now unreachable
206
+ existing_contexts = cls.existing_allowed_contexts()
207
+ if existing_contexts and all(ctx in cls.logged_unreachable_contexts
208
+ for ctx in existing_contexts):
209
+ logger.warning(
210
+ 'All Kubernetes contexts are unreachable. '
211
+ 'Retry if it is a transient error, or run sky check to '
212
+ 'refresh Kubernetes availability if permanent.')
213
+
173
214
  @classmethod
174
215
  def regions_with_offering(cls, instance_type: Optional[str],
175
216
  accelerators: Optional[Dict[str, int]],
@@ -198,8 +239,12 @@ class Kubernetes(clouds.Cloud):
198
239
  # provision_timeout, after which failover will be triggered.
199
240
  for r in regions:
200
241
  context = r.name
201
- fits, reason = kubernetes_utils.check_instance_fits(
202
- context, instance_type)
242
+ try:
243
+ fits, reason = kubernetes_utils.check_instance_fits(
244
+ context, instance_type)
245
+ except exceptions.KubeAPIUnreachableError as e:
246
+ cls._log_unreachable_context(context, str(e))
247
+ continue
203
248
  if fits:
204
249
  regions_to_return.append(r)
205
250
  else:
@@ -609,18 +654,53 @@ class Kubernetes(clouds.Cloud):
609
654
  'Check if you have a valid kubeconfig file' +
610
655
  check_skypilot_config_msg)
611
656
  reasons = []
657
+ hints = []
658
+ success = False
612
659
  for context in existing_allowed_contexts:
613
660
  try:
614
661
  check_result = kubernetes_utils.check_credentials(context)
615
662
  if check_result[0]:
616
- return check_result
617
- reasons.append(f'{context}: {check_result[1]}')
663
+ success = True
664
+ if check_result[1] is not None:
665
+ hints.append(f'Context {context}: {check_result[1]}')
666
+ else:
667
+ reasons.append(f'Context {context}: {check_result[1]}')
618
668
  except Exception as e: # pylint: disable=broad-except
619
669
  return (False, f'Credential check failed for {context}: '
620
670
  f'{common_utils.format_exception(e)}')
671
+ if success:
672
+ return (True, cls._format_credential_check_results(hints, reasons))
621
673
  return (False, 'Failed to find available context with working '
622
674
  'credentials. Details:\n' + '\n'.join(reasons))
623
675
 
676
+ @classmethod
677
+ def _format_credential_check_results(cls, hints: List[str],
678
+ reasons: List[str]) -> str:
679
+ """Format credential check results with hints and reasons.
680
+
681
+ Args:
682
+ hints: List of successful context check messages.
683
+ reasons: List of failed context check reasons.
684
+
685
+ Returns:
686
+ A formatted string containing hints and by failure reasons.
687
+ """
688
+ message_parts = []
689
+ if len(hints) == 1 and not reasons:
690
+ return hints[0]
691
+ if hints:
692
+ message_parts.append(f'\n{cls._INDENT_PREFIX} ' +
693
+ f'\n{cls._INDENT_PREFIX} '.join(hints))
694
+ if reasons:
695
+ if hints:
696
+ message_parts.append('\n')
697
+ message_parts.append(
698
+ f'\n{cls._INDENT_PREFIX}Unavailable contexts (remove from '
699
+ '"allowed_contexts" config if permanently unavailable): '
700
+ f'\n{cls._INDENT_PREFIX} ' +
701
+ f'\n{cls._INDENT_PREFIX} '.join(reasons))
702
+ return ''.join(message_parts)
703
+
624
704
  def get_credential_file_mounts(self) -> Dict[str, str]:
625
705
  if os.path.exists(os.path.expanduser(CREDENTIAL_PATH)):
626
706
  # Upload kubeconfig to the default path to avoid having to set
@@ -17,6 +17,7 @@ _CREDENTIAL_FILES = [
17
17
  nebius.NEBIUS_TENANT_ID_FILENAME,
18
18
  nebius.NEBIUS_IAM_TOKEN_FILENAME,
19
19
  nebius.NEBIUS_PROJECT_ID_FILENAME,
20
+ nebius.NEBIUS_CREDENTIALS_FILENAME
20
21
  ]
21
22
 
22
23
 
@@ -252,15 +253,16 @@ class Nebius(clouds.Cloud):
252
253
  def check_credentials(cls) -> Tuple[bool, Optional[str]]:
253
254
  """ Verify that the user has valid credentials for Nebius. """
254
255
  logging.debug('Nebius cloud check credentials')
255
- token = nebius.get_iam_token()
256
- token_msg = (' Credentials can be set up by running: \n'\
257
- f' $ nebius iam get-access-token > {nebius.NEBIUS_IAM_TOKEN_PATH} \n') # pylint: disable=line-too-long
256
+ token_cred_msg = (' Credentials can be set up by running: \n'\
257
+ f' $ nebius iam get-access-token > {nebius.NEBIUS_IAM_TOKEN_PATH} \n'\
258
+ ' or generate ~/.nebius/credentials.json') # pylint: disable=line-too-long
259
+
258
260
  tenant_msg = (' Copy your tenat ID from the web console and save it to file \n' # pylint: disable=line-too-long
259
261
  f' $ echo $NEBIUS_TENANT_ID_PATH > {nebius.NEBIUS_TENANT_ID_PATH} \n' # pylint: disable=line-too-long
260
262
  ' Or if you have 1 tenant you can run:\n' # pylint: disable=line-too-long
261
263
  f' $ nebius --format json iam whoami|jq -r \'.user_profile.tenants[0].tenant_id\' > {nebius.NEBIUS_TENANT_ID_PATH} \n') # pylint: disable=line-too-long
262
- if token is None:
263
- return False, f'{token_msg}'
264
+ if not nebius.is_token_or_cred_file_exist():
265
+ return False, f'{token_cred_msg}'
264
266
  sdk = nebius.sdk()
265
267
  tenant_id = nebius.get_tenant_id()
266
268
  if tenant_id is None:
@@ -272,7 +274,7 @@ class Nebius(clouds.Cloud):
272
274
  except nebius.request_error() as e:
273
275
  return False, (
274
276
  f'{e.status} \n' # First line is indented by 4 spaces
275
- f'{token_msg}'
277
+ f'{token_cred_msg}'
276
278
  f'{tenant_msg}')
277
279
  return True, None
278
280
 
@@ -164,12 +164,13 @@ def _list_accelerators(
164
164
 
165
165
  accelerators_qtys: Set[Tuple[str, int]] = set()
166
166
  keys = lf.get_label_keys()
167
- nodes = kubernetes_utils.get_kubernetes_nodes(context)
167
+ nodes = kubernetes_utils.get_kubernetes_nodes(context=context)
168
168
  pods = None
169
169
  if realtime:
170
170
  # Get the pods to get the real-time GPU usage
171
171
  try:
172
- pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(context)
172
+ pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(
173
+ context=context)
173
174
  except kubernetes.api_exception() as e:
174
175
  if e.status == 403:
175
176
  logger.warning(
@@ -28,12 +28,19 @@ GIT_FATAL_EXIT_CODE = 128
28
28
  ARCH_NOT_SUPPORTED_EXIT_CODE = 133
29
29
 
30
30
 
31
- def is_safe_exception(exc: Exception) -> bool:
31
+ def is_safe_exception(exc: BaseException) -> bool:
32
32
  """Returns True if the exception is safe to send to clients.
33
33
 
34
34
  Safe exceptions are:
35
35
  1. Built-in exceptions
36
36
  2. SkyPilot's own exceptions
37
+
38
+ Args:
39
+ exc: The exception to check, accept BaseException to handle SystemExit
40
+ and KeyboardInterrupt.
41
+
42
+ Returns:
43
+ True if the exception is safe to send to clients, False otherwise.
37
44
  """
38
45
  module = type(exc).__module__
39
46
 
@@ -48,7 +55,7 @@ def is_safe_exception(exc: Exception) -> bool:
48
55
  return False
49
56
 
50
57
 
51
- def wrap_exception(exc: Exception) -> Exception:
58
+ def wrap_exception(exc: BaseException) -> BaseException:
52
59
  """Wraps non-safe exceptions into SkyPilot exceptions
53
60
 
54
61
  This is used to wrap exceptions that are not safe to deserialize at clients.
@@ -64,7 +71,8 @@ def wrap_exception(exc: Exception) -> Exception:
64
71
  error_type=type(exc).__name__)
65
72
 
66
73
 
67
- def serialize_exception(e: Exception) -> Dict[str, Any]:
74
+ # Accept BaseException to handle SystemExit and KeyboardInterrupt
75
+ def serialize_exception(e: BaseException) -> Dict[str, Any]:
68
76
  """Serialize the exception.
69
77
 
70
78
  This function also wraps any unsafe exceptions (e.g., cloud exceptions)
@@ -156,6 +164,15 @@ class ResourcesUnavailableError(Exception):
156
164
  return self
157
165
 
158
166
 
167
+ class KubeAPIUnreachableError(ResourcesUnavailableError):
168
+ """Raised when the Kubernetes API is currently unreachable.
169
+
170
+ This is a subclass of ResourcesUnavailableError to trigger same failover
171
+ behavior as other ResourcesUnavailableError.
172
+ """
173
+ pass
174
+
175
+
159
176
  class InvalidCloudConfigs(Exception):
160
177
  """Raised when invalid configurations are provided for a given cloud."""
161
178
  pass
@@ -157,7 +157,11 @@ def _cleanup_ports_for_loadbalancer(
157
157
  ) -> None:
158
158
  service_name = _LOADBALANCER_SERVICE_NAME.format(
159
159
  cluster_name_on_cloud=cluster_name_on_cloud)
160
+ # TODO(aylei): test coverage
161
+ context = provider_config.get(
162
+ 'context', kubernetes_utils.get_current_kube_config_context_name())
160
163
  network_utils.delete_namespaced_service(
164
+ context=context,
161
165
  namespace=provider_config.get('namespace', 'default'),
162
166
  service_name=service_name,
163
167
  )
@@ -169,9 +173,12 @@ def _cleanup_ports_for_ingress(
169
173
  provider_config: Dict[str, Any],
170
174
  ) -> None:
171
175
  # Delete services for each port
176
+ context = provider_config.get(
177
+ 'context', kubernetes_utils.get_current_kube_config_context_name())
172
178
  for port in ports:
173
179
  service_name = f'{cluster_name_on_cloud}--skypilot-svc--{port}'
174
180
  network_utils.delete_namespaced_service(
181
+ context=context,
175
182
  namespace=provider_config.get('namespace',
176
183
  kubernetes_utils.DEFAULT_NAMESPACE),
177
184
  service_name=service_name,
@@ -194,9 +194,10 @@ def create_or_replace_namespaced_service(
194
194
  _request_timeout=kubernetes.API_TIMEOUT)
195
195
 
196
196
 
197
- def delete_namespaced_service(namespace: str, service_name: str) -> None:
197
+ def delete_namespaced_service(context: Optional[str], namespace: str,
198
+ service_name: str) -> None:
198
199
  """Deletes a service resource."""
199
- core_api = kubernetes.core_api()
200
+ core_api = kubernetes.core_api(context)
200
201
 
201
202
  try:
202
203
  core_api.delete_namespaced_service(
@@ -125,6 +125,10 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
125
125
  retry_interval: Initial seconds to wait between retries
126
126
  resource_type: Type of resource being accessed (e.g. 'node', 'pod').
127
127
  Used to provide more specific error messages.
128
+
129
+ Raises:
130
+ KubeAPIUnreachableError: If the API server of the given context is
131
+ unreachable.
128
132
  """
129
133
 
130
134
  def decorator(func):
@@ -135,6 +139,9 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
135
139
  backoff = common_utils.Backoff(initial_backoff=retry_interval,
136
140
  max_backoff_factor=3)
137
141
 
142
+ assert 'context' in kwargs, 'context is required'
143
+ context = kwargs.get('context')
144
+
138
145
  for attempt in range(max_retries):
139
146
  try:
140
147
  return func(*args, **kwargs)
@@ -160,6 +167,8 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
160
167
  if resource_type else ''
161
168
  debug_cmd = f' To debug, run: kubectl get {resource_type}s' \
162
169
  if resource_type else ''
170
+ if context:
171
+ debug_cmd += f' --context {context}'
163
172
 
164
173
  if isinstance(last_exception, kubernetes.max_retry_error()):
165
174
  error_msg = f'Timed out{resource_msg} from Kubernetes cluster.'
@@ -170,7 +179,7 @@ def _retry_on_error(max_retries=DEFAULT_MAX_RETRIES,
170
179
  error_msg = (f'Kubernetes configuration error{resource_msg}: '
171
180
  f'{str(last_exception)}')
172
181
 
173
- raise exceptions.ResourcesUnavailableError(
182
+ raise exceptions.KubeAPIUnreachableError(
174
183
  f'{error_msg}'
175
184
  f' Please check if the cluster is healthy and retry.'
176
185
  f'{debug_cmd}') from last_exception
@@ -529,7 +538,7 @@ def detect_gpu_label_formatter(
529
538
  """
530
539
  # Get all labels across all nodes
531
540
  node_labels: Dict[str, List[Tuple[str, str]]] = {}
532
- nodes = get_kubernetes_nodes(context)
541
+ nodes = get_kubernetes_nodes(context=context)
533
542
  for node in nodes:
534
543
  node_labels[node.metadata.name] = []
535
544
  for label, value in node.metadata.labels.items():
@@ -564,7 +573,7 @@ def detect_accelerator_resource(
564
573
  """
565
574
  # Get the set of resources across all nodes
566
575
  cluster_resources: Set[str] = set()
567
- nodes = get_kubernetes_nodes(context)
576
+ nodes = get_kubernetes_nodes(context=context)
568
577
  for node in nodes:
569
578
  cluster_resources.update(node.status.allocatable.keys())
570
579
  has_accelerator = (get_gpu_resource_key() in cluster_resources or
@@ -575,7 +584,7 @@ def detect_accelerator_resource(
575
584
 
576
585
  @annotations.lru_cache(scope='request', maxsize=10)
577
586
  @_retry_on_error(resource_type='node')
578
- def get_kubernetes_nodes(context: Optional[str] = None) -> List[Any]:
587
+ def get_kubernetes_nodes(*, context: Optional[str] = None) -> List[Any]:
579
588
  """Gets the kubernetes nodes in the context.
580
589
 
581
590
  If context is None, gets the nodes in the current context.
@@ -589,8 +598,9 @@ def get_kubernetes_nodes(context: Optional[str] = None) -> List[Any]:
589
598
 
590
599
 
591
600
  @_retry_on_error(resource_type='pod')
592
- def get_all_pods_in_kubernetes_cluster(
593
- context: Optional[str] = None) -> List[Any]:
601
+ def get_all_pods_in_kubernetes_cluster(*,
602
+ context: Optional[str] = None
603
+ ) -> List[Any]:
594
604
  """Gets pods in all namespaces in kubernetes cluster indicated by context.
595
605
 
596
606
  Used for computing cluster resource usage.
@@ -619,9 +629,6 @@ def check_instance_fits(context: Optional[str],
619
629
  Optional[str]: Error message if the instance does not fit.
620
630
  """
621
631
 
622
- # TODO(zhwu): this should check the node for specific context, instead
623
- # of the default context to make failover fully functional.
624
-
625
632
  def check_cpu_mem_fits(candidate_instance_type: 'KubernetesInstanceType',
626
633
  node_list: List[Any]) -> Tuple[bool, Optional[str]]:
627
634
  """Checks if the instance fits on the cluster based on CPU and memory.
@@ -682,7 +689,7 @@ def check_instance_fits(context: Optional[str],
682
689
  f'{tpu_list_in_cluster_str}. Note that multi-host TPU '
683
690
  'podslices are currently not unsupported.')
684
691
 
685
- nodes = get_kubernetes_nodes(context)
692
+ nodes = get_kubernetes_nodes(context=context)
686
693
  k8s_instance_type = KubernetesInstanceType.\
687
694
  from_instance_type(instance)
688
695
  acc_type = k8s_instance_type.accelerator_type
@@ -846,7 +853,7 @@ def get_accelerator_label_key_value(
846
853
  for label, value in label_list:
847
854
  if (label_formatter.match_label_key(label) and
848
855
  label_formatter.get_accelerator_from_label_value(
849
- value) == acc_type):
856
+ value).lower() == acc_type.lower()):
850
857
  if is_tpu_on_gke(acc_type):
851
858
  assert isinstance(label_formatter,
852
859
  GKELabelFormatter)
@@ -2083,7 +2090,7 @@ def get_spot_label(
2083
2090
  """
2084
2091
  # Check if the cluster supports spot instances by checking nodes for known
2085
2092
  # spot label keys and values
2086
- for node in get_kubernetes_nodes(context):
2093
+ for node in get_kubernetes_nodes(context=context):
2087
2094
  for _, (key, value) in SPOT_LABEL_MAP.items():
2088
2095
  if key in node.metadata.labels and node.metadata.labels[
2089
2096
  key] == value:
@@ -2133,10 +2140,10 @@ def get_kubernetes_node_info(
2133
2140
  Dict[str, KubernetesNodeInfo]: Dictionary containing the node name as
2134
2141
  key and the KubernetesNodeInfo object as value
2135
2142
  """
2136
- nodes = get_kubernetes_nodes(context)
2143
+ nodes = get_kubernetes_nodes(context=context)
2137
2144
  # Get the pods to get the real-time resource usage
2138
2145
  try:
2139
- pods = get_all_pods_in_kubernetes_cluster(context)
2146
+ pods = get_all_pods_in_kubernetes_cluster(context=context)
2140
2147
  except kubernetes.api_exception() as e:
2141
2148
  if e.status == 403:
2142
2149
  pods = None
@@ -2443,7 +2450,7 @@ def is_multi_host_tpu(node_metadata_labels: dict) -> bool:
2443
2450
 
2444
2451
  def multi_host_tpu_exists_in_cluster(context: Optional[str] = None) -> bool:
2445
2452
  """Checks if there exists a multi-host TPU within the cluster."""
2446
- nodes = get_kubernetes_nodes(context)
2453
+ nodes = get_kubernetes_nodes(context=context)
2447
2454
  for node in nodes:
2448
2455
  if is_multi_host_tpu(node.metadata.labels):
2449
2456
  return True