skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/clouds/kubernetes.py
CHANGED
@@ -1,18 +1,21 @@
|
|
1
1
|
"""Kubernetes."""
|
2
|
-
import json
|
3
2
|
import os
|
4
3
|
import re
|
5
4
|
import typing
|
6
|
-
from typing import Dict, Iterator, List, Optional, Tuple
|
5
|
+
from typing import Dict, Iterator, List, Optional, Tuple, Union
|
7
6
|
|
8
7
|
from sky import clouds
|
9
8
|
from sky import sky_logging
|
10
9
|
from sky import skypilot_config
|
11
10
|
from sky.adaptors import kubernetes
|
12
11
|
from sky.clouds import service_catalog
|
12
|
+
from sky.provision import instance_setup
|
13
13
|
from sky.provision.kubernetes import network_utils
|
14
14
|
from sky.provision.kubernetes import utils as kubernetes_utils
|
15
|
+
from sky.skylet import constants
|
16
|
+
from sky.utils import annotations
|
15
17
|
from sky.utils import common_utils
|
18
|
+
from sky.utils import registry
|
16
19
|
from sky.utils import resources_utils
|
17
20
|
from sky.utils import schemas
|
18
21
|
|
@@ -32,24 +35,21 @@ CREDENTIAL_PATH = os.environ.get('KUBECONFIG', DEFAULT_KUBECONFIG_PATH)
|
|
32
35
|
_SKYPILOT_SYSTEM_NAMESPACE = 'skypilot-system'
|
33
36
|
|
34
37
|
|
35
|
-
@
|
38
|
+
@registry.CLOUD_REGISTRY.register(aliases=['k8s'])
|
36
39
|
class Kubernetes(clouds.Cloud):
|
37
40
|
"""Kubernetes."""
|
38
41
|
|
39
42
|
SKY_SSH_KEY_SECRET_NAME = 'sky-ssh-keys'
|
40
43
|
SKY_SSH_JUMP_NAME = 'sky-ssh-jump-pod'
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
|
50
|
-
# For non-autoscaling clusters, we conservatively set this to 10s.
|
51
|
-
timeout = skypilot_config.get_nested(['kubernetes', 'provision_timeout'],
|
52
|
-
10)
|
44
|
+
|
45
|
+
LEGACY_SINGLETON_REGION = 'kubernetes'
|
46
|
+
|
47
|
+
# Limit the length of the cluster name to avoid exceeding the limit of 63
|
48
|
+
# characters for Kubernetes resources. We limit to 42 characters (63-21) to
|
49
|
+
# allow additional characters for creating ingress services to expose ports.
|
50
|
+
# These services are named as {cluster_name_on_cloud}--skypilot-svc--{port},
|
51
|
+
# where the suffix is 21 characters long.
|
52
|
+
_MAX_CLUSTER_NAME_LEN_LIMIT = 42
|
53
53
|
|
54
54
|
_SUPPORTS_SERVICE_ACCOUNT_ON_REMOTE = True
|
55
55
|
|
@@ -57,8 +57,6 @@ class Kubernetes(clouds.Cloud):
|
|
57
57
|
_DEFAULT_MEMORY_CPU_RATIO = 1
|
58
58
|
_DEFAULT_MEMORY_CPU_RATIO_WITH_GPU = 4 # Allocate more memory for GPU tasks
|
59
59
|
_REPR = 'Kubernetes'
|
60
|
-
_SINGLETON_REGION = 'kubernetes'
|
61
|
-
_regions: List[clouds.Region] = [clouds.Region(_SINGLETON_REGION)]
|
62
60
|
_CLOUD_UNSUPPORTED_FEATURES = {
|
63
61
|
# TODO(romilb): Stopping might be possible to implement with
|
64
62
|
# container checkpointing introduced in Kubernetes v1.25. See:
|
@@ -74,8 +72,8 @@ class Kubernetes(clouds.Cloud):
|
|
74
72
|
'Kubernetes.',
|
75
73
|
}
|
76
74
|
|
77
|
-
IMAGE_CPU = 'skypilot:cpu-ubuntu-2004'
|
78
|
-
IMAGE_GPU = 'skypilot:gpu-ubuntu-2004'
|
75
|
+
IMAGE_CPU = 'skypilot:custom-cpu-ubuntu-2004'
|
76
|
+
IMAGE_GPU = 'skypilot:custom-gpu-ubuntu-2004'
|
79
77
|
|
80
78
|
PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
|
81
79
|
STATUS_VERSION = clouds.StatusVersion.SKYPILOT
|
@@ -85,15 +83,20 @@ class Kubernetes(clouds.Cloud):
|
|
85
83
|
# Use a fresh user hash to avoid conflicts in the secret object naming.
|
86
84
|
# This can happen when the controller is reusing the same user hash
|
87
85
|
# through USER_ID_ENV_VAR but has a different SSH key.
|
88
|
-
fresh_user_hash = common_utils.
|
86
|
+
fresh_user_hash = common_utils.generate_user_hash()
|
89
87
|
return f'ssh-publickey-{fresh_user_hash}'
|
90
88
|
|
91
89
|
@classmethod
|
92
90
|
def _unsupported_features_for_resources(
|
93
91
|
cls, resources: 'resources_lib.Resources'
|
94
92
|
) -> Dict[clouds.CloudImplementationFeatures, str]:
|
95
|
-
unsupported_features = cls._CLOUD_UNSUPPORTED_FEATURES
|
96
|
-
|
93
|
+
unsupported_features = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
|
94
|
+
context = resources.region
|
95
|
+
if context is None:
|
96
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
97
|
+
# Features to be disabled for exec auth
|
98
|
+
is_exec_auth, message = kubernetes_utils.is_kubeconfig_exec_auth(
|
99
|
+
context)
|
97
100
|
if is_exec_auth:
|
98
101
|
assert isinstance(message, str), message
|
99
102
|
# Controllers cannot spin up new pods with exec auth.
|
@@ -102,19 +105,112 @@ class Kubernetes(clouds.Cloud):
|
|
102
105
|
# Pod does not have permissions to terminate itself with exec auth.
|
103
106
|
unsupported_features[
|
104
107
|
clouds.CloudImplementationFeatures.AUTO_TERMINATE] = message
|
108
|
+
# Allow spot instances if supported by the cluster
|
109
|
+
spot_label_key, _ = kubernetes_utils.get_spot_label(context)
|
110
|
+
if spot_label_key is not None:
|
111
|
+
unsupported_features.pop(
|
112
|
+
clouds.CloudImplementationFeatures.SPOT_INSTANCE, None)
|
105
113
|
return unsupported_features
|
106
114
|
|
107
115
|
@classmethod
|
108
|
-
def
|
109
|
-
return cls.
|
116
|
+
def max_cluster_name_length(cls) -> Optional[int]:
|
117
|
+
return cls._MAX_CLUSTER_NAME_LEN_LIMIT
|
118
|
+
|
119
|
+
@classmethod
|
120
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
121
|
+
def _log_skipped_contexts_once(cls, skipped_contexts: Tuple[str,
|
122
|
+
...]) -> None:
|
123
|
+
"""Log skipped contexts for only once.
|
124
|
+
|
125
|
+
We don't directly cache the result of _filter_existing_allowed_contexts
|
126
|
+
as the admin policy may update the allowed contexts.
|
127
|
+
"""
|
128
|
+
if skipped_contexts:
|
129
|
+
logger.warning(
|
130
|
+
f'Kubernetes contexts {set(skipped_contexts)!r} specified in '
|
131
|
+
'"allowed_contexts" not found in kubeconfig. '
|
132
|
+
'Ignoring these contexts.')
|
133
|
+
|
134
|
+
@classmethod
|
135
|
+
def existing_allowed_contexts(cls) -> List[str]:
|
136
|
+
"""Get existing allowed contexts.
|
137
|
+
|
138
|
+
If None is returned in the list, it means that we are running in a pod
|
139
|
+
with in-cluster auth. In this case, we specify None context, which will
|
140
|
+
use the service account mounted in the pod.
|
141
|
+
"""
|
142
|
+
all_contexts = kubernetes_utils.get_all_kube_context_names()
|
143
|
+
if not all_contexts:
|
144
|
+
return []
|
145
|
+
|
146
|
+
all_contexts = set(all_contexts)
|
147
|
+
|
148
|
+
allowed_contexts = skypilot_config.get_nested(
|
149
|
+
('kubernetes', 'allowed_contexts'), None)
|
150
|
+
|
151
|
+
if allowed_contexts is None:
|
152
|
+
# Try kubeconfig if present
|
153
|
+
current_context = (
|
154
|
+
kubernetes_utils.get_current_kube_config_context_name())
|
155
|
+
if (current_context is None and
|
156
|
+
kubernetes_utils.is_incluster_config_available()):
|
157
|
+
# If no kubeconfig contexts found, use in-cluster if available
|
158
|
+
current_context = kubernetes.in_cluster_context_name()
|
159
|
+
allowed_contexts = []
|
160
|
+
if current_context is not None:
|
161
|
+
allowed_contexts = [current_context]
|
162
|
+
|
163
|
+
existing_contexts = []
|
164
|
+
skipped_contexts = []
|
165
|
+
for context in allowed_contexts:
|
166
|
+
if context in all_contexts:
|
167
|
+
existing_contexts.append(context)
|
168
|
+
else:
|
169
|
+
skipped_contexts.append(context)
|
170
|
+
cls._log_skipped_contexts_once(tuple(skipped_contexts))
|
171
|
+
return existing_contexts
|
110
172
|
|
111
173
|
@classmethod
|
112
174
|
def regions_with_offering(cls, instance_type: Optional[str],
|
113
175
|
accelerators: Optional[Dict[str, int]],
|
114
176
|
use_spot: bool, region: Optional[str],
|
115
177
|
zone: Optional[str]) -> List[clouds.Region]:
|
116
|
-
|
117
|
-
|
178
|
+
del accelerators, zone, use_spot # unused
|
179
|
+
existing_contexts = cls.existing_allowed_contexts()
|
180
|
+
|
181
|
+
regions = []
|
182
|
+
for context in existing_contexts:
|
183
|
+
regions.append(clouds.Region(context))
|
184
|
+
|
185
|
+
if region is not None:
|
186
|
+
regions = [r for r in regions if r.name == region]
|
187
|
+
|
188
|
+
# Check if requested instance type will fit in the cluster.
|
189
|
+
# TODO(zhwu,romilb): autoscaler type needs to be regional (per
|
190
|
+
# kubernetes cluster/context).
|
191
|
+
regions_to_return = []
|
192
|
+
autoscaler_type = kubernetes_utils.get_autoscaler_type()
|
193
|
+
if autoscaler_type is None and instance_type is not None:
|
194
|
+
# If autoscaler is not set, check if the instance type fits in the
|
195
|
+
# cluster. Else, rely on the autoscaler to provision the right
|
196
|
+
# instance type without running checks. Worst case, if autoscaling
|
197
|
+
# fails, the pod will be stuck in pending state until
|
198
|
+
# provision_timeout, after which failover will be triggered.
|
199
|
+
for r in regions:
|
200
|
+
context = r.name
|
201
|
+
fits, reason = kubernetes_utils.check_instance_fits(
|
202
|
+
context, instance_type)
|
203
|
+
if fits:
|
204
|
+
regions_to_return.append(r)
|
205
|
+
else:
|
206
|
+
logger.debug(
|
207
|
+
f'Instance type {instance_type} does '
|
208
|
+
'not fit in the Kubernetes cluster with context: '
|
209
|
+
f'{context}. Reason: {reason}')
|
210
|
+
else:
|
211
|
+
regions_to_return = regions
|
212
|
+
|
213
|
+
return regions_to_return
|
118
214
|
|
119
215
|
def instance_type_to_hourly_cost(self,
|
120
216
|
instance_type: str,
|
@@ -140,17 +236,12 @@ class Kubernetes(clouds.Cloud):
|
|
140
236
|
def __repr__(self):
|
141
237
|
return self._REPR
|
142
238
|
|
143
|
-
@classmethod
|
144
|
-
def get_port(cls, svc_name) -> int:
|
145
|
-
ns = kubernetes_utils.get_current_kube_config_context_namespace()
|
146
|
-
return kubernetes_utils.get_port(svc_name, ns)
|
147
|
-
|
148
239
|
@classmethod
|
149
240
|
def get_default_instance_type(
|
150
241
|
cls,
|
151
242
|
cpus: Optional[str] = None,
|
152
243
|
memory: Optional[str] = None,
|
153
|
-
disk_tier: Optional[resources_utils.DiskTier] = None) -> str:
|
244
|
+
disk_tier: Optional['resources_utils.DiskTier'] = None) -> str:
|
154
245
|
# TODO(romilb): In the future, we may want to move the instance type
|
155
246
|
# selection + availability checking to a kubernetes_catalog module.
|
156
247
|
del disk_tier # Unused.
|
@@ -175,7 +266,7 @@ class Kubernetes(clouds.Cloud):
|
|
175
266
|
def get_accelerators_from_instance_type(
|
176
267
|
cls,
|
177
268
|
instance_type: str,
|
178
|
-
) -> Optional[Dict[str, int]]:
|
269
|
+
) -> Optional[Dict[str, Union[int, float]]]:
|
179
270
|
inst = kubernetes_utils.KubernetesInstanceType.from_instance_type(
|
180
271
|
instance_type)
|
181
272
|
return {
|
@@ -201,9 +292,9 @@ class Kubernetes(clouds.Cloud):
|
|
201
292
|
accelerators: Optional[Dict[str, int]] = None,
|
202
293
|
use_spot: bool = False,
|
203
294
|
) -> Iterator[Optional[List[clouds.Zone]]]:
|
204
|
-
|
205
|
-
|
206
|
-
|
295
|
+
# Always yield None for zones, since Kubernetes does not have zones, and
|
296
|
+
# we should allow any region get to this point.
|
297
|
+
yield None
|
207
298
|
|
208
299
|
@classmethod
|
209
300
|
def get_zone_shell_cmd(cls) -> Optional[str]:
|
@@ -216,23 +307,46 @@ class Kubernetes(clouds.Cloud):
|
|
216
307
|
# we don't have a notion of disk size in Kubernetes.
|
217
308
|
return 0
|
218
309
|
|
310
|
+
@staticmethod
|
311
|
+
def _calculate_provision_timeout(num_nodes: int) -> int:
|
312
|
+
"""Calculate provision timeout based on number of nodes.
|
313
|
+
|
314
|
+
The timeout scales linearly with the number of nodes to account for
|
315
|
+
scheduling overhead, but is capped to avoid excessive waiting.
|
316
|
+
|
317
|
+
Args:
|
318
|
+
num_nodes: Number of nodes being provisioned
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
Timeout in seconds
|
322
|
+
"""
|
323
|
+
base_timeout = 10 # Base timeout for single node
|
324
|
+
per_node_timeout = 0.2 # Additional seconds per node
|
325
|
+
max_timeout = 60 # Cap at 1 minute
|
326
|
+
|
327
|
+
return int(
|
328
|
+
min(base_timeout + (per_node_timeout * (num_nodes - 1)),
|
329
|
+
max_timeout))
|
330
|
+
|
219
331
|
def make_deploy_resources_variables(
|
220
332
|
self,
|
221
333
|
resources: 'resources_lib.Resources',
|
222
|
-
|
334
|
+
cluster_name: 'resources_utils.ClusterName',
|
223
335
|
region: Optional['clouds.Region'],
|
224
336
|
zones: Optional[List['clouds.Zone']],
|
337
|
+
num_nodes: int,
|
225
338
|
dryrun: bool = False) -> Dict[str, Optional[str]]:
|
226
|
-
del
|
339
|
+
del cluster_name, zones, dryrun # Unused.
|
227
340
|
if region is None:
|
228
|
-
|
341
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
342
|
+
else:
|
343
|
+
context = region.name
|
344
|
+
assert context is not None, 'No context found in kubeconfig'
|
229
345
|
|
230
346
|
r = resources
|
231
347
|
acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
|
232
|
-
|
233
|
-
|
234
|
-
else:
|
235
|
-
custom_resources = None
|
348
|
+
custom_resources = resources_utils.make_ray_custom_resources_str(
|
349
|
+
acc_dict)
|
236
350
|
|
237
351
|
# resources.memory and cpus are None if they are not explicitly set.
|
238
352
|
# We fetch the default values for the instance type in that case.
|
@@ -244,9 +358,14 @@ class Kubernetes(clouds.Cloud):
|
|
244
358
|
acc_count = k.accelerator_count if k.accelerator_count else 0
|
245
359
|
acc_type = k.accelerator_type if k.accelerator_type else None
|
246
360
|
|
247
|
-
|
361
|
+
image_id_dict = resources.image_id
|
362
|
+
if image_id_dict is not None:
|
248
363
|
# Use custom image specified in resources
|
249
|
-
|
364
|
+
if None in image_id_dict:
|
365
|
+
image_id = image_id_dict[None]
|
366
|
+
else:
|
367
|
+
assert resources.region in image_id_dict, image_id_dict
|
368
|
+
image_id = image_id_dict[resources.region]
|
250
369
|
if image_id.startswith('docker:'):
|
251
370
|
image_id = image_id[len('docker:'):]
|
252
371
|
else:
|
@@ -261,23 +380,48 @@ class Kubernetes(clouds.Cloud):
|
|
261
380
|
|
262
381
|
k8s_acc_label_key = None
|
263
382
|
k8s_acc_label_value = None
|
383
|
+
k8s_topology_label_key = None
|
384
|
+
k8s_topology_label_value = None
|
385
|
+
k8s_resource_key = None
|
386
|
+
tpu_requested = False
|
264
387
|
|
265
|
-
# If
|
388
|
+
# If GPU/TPUs are requested, set node label to match the GPU/TPU type.
|
266
389
|
if acc_count > 0 and acc_type is not None:
|
267
|
-
k8s_acc_label_key, k8s_acc_label_value
|
268
|
-
|
390
|
+
(k8s_acc_label_key, k8s_acc_label_value, k8s_topology_label_key,
|
391
|
+
k8s_topology_label_value) = (
|
392
|
+
kubernetes_utils.get_accelerator_label_key_value(
|
393
|
+
context, acc_type, acc_count))
|
394
|
+
if (k8s_acc_label_key ==
|
395
|
+
kubernetes_utils.GKELabelFormatter.TPU_LABEL_KEY):
|
396
|
+
tpu_requested = True
|
397
|
+
k8s_resource_key = kubernetes_utils.TPU_RESOURCE_KEY
|
398
|
+
else:
|
399
|
+
k8s_resource_key = kubernetes_utils.get_gpu_resource_key()
|
269
400
|
|
270
401
|
port_mode = network_utils.get_port_mode(None)
|
271
402
|
|
272
403
|
remote_identity = skypilot_config.get_nested(
|
273
404
|
('kubernetes', 'remote_identity'),
|
274
405
|
schemas.get_default_remote_identity('kubernetes'))
|
275
|
-
|
406
|
+
|
407
|
+
if isinstance(remote_identity, dict):
|
408
|
+
# If remote_identity is a dict, use the service account for the
|
409
|
+
# current context
|
410
|
+
k8s_service_account_name = remote_identity.get(context, None)
|
411
|
+
if k8s_service_account_name is None:
|
412
|
+
err_msg = (f'Context {context!r} not found in '
|
413
|
+
'remote identities from config.yaml')
|
414
|
+
raise ValueError(err_msg)
|
415
|
+
else:
|
416
|
+
# If remote_identity is not a dict, use
|
417
|
+
k8s_service_account_name = remote_identity
|
418
|
+
|
419
|
+
if (k8s_service_account_name ==
|
276
420
|
schemas.RemoteIdentityOptions.LOCAL_CREDENTIALS.value):
|
277
421
|
# SA name doesn't matter since automounting credentials is disabled
|
278
422
|
k8s_service_account_name = 'default'
|
279
423
|
k8s_automount_sa_token = 'false'
|
280
|
-
elif (
|
424
|
+
elif (k8s_service_account_name ==
|
281
425
|
schemas.RemoteIdentityOptions.SERVICE_ACCOUNT.value):
|
282
426
|
# Use the default service account
|
283
427
|
k8s_service_account_name = (
|
@@ -285,22 +429,55 @@ class Kubernetes(clouds.Cloud):
|
|
285
429
|
k8s_automount_sa_token = 'true'
|
286
430
|
else:
|
287
431
|
# User specified a custom service account
|
288
|
-
k8s_service_account_name = remote_identity
|
289
432
|
k8s_automount_sa_token = 'true'
|
290
433
|
|
291
434
|
fuse_device_required = bool(resources.requires_fuse)
|
292
435
|
|
436
|
+
# Configure spot labels, if requested and supported
|
437
|
+
spot_label_key, spot_label_value = None, None
|
438
|
+
if resources.use_spot:
|
439
|
+
spot_label_key, spot_label_value = kubernetes_utils.get_spot_label()
|
440
|
+
|
441
|
+
# Timeout for resource provisioning. This timeout determines how long to
|
442
|
+
# wait for pod to be in pending status before giving up.
|
443
|
+
# Larger timeout may be required for autoscaling clusters, since
|
444
|
+
# autoscaler may take some time to provision new nodes.
|
445
|
+
# Note that this timeout includes time taken by the Kubernetes scheduler
|
446
|
+
# itself, which can be upto 2-3 seconds, and up to 10-15 seconds when
|
447
|
+
# scheduling 100s of pods.
|
448
|
+
# We use a linear scaling formula to determine the timeout based on the
|
449
|
+
# number of nodes.
|
450
|
+
|
451
|
+
timeout = self._calculate_provision_timeout(num_nodes)
|
452
|
+
timeout = skypilot_config.get_nested(
|
453
|
+
('kubernetes', 'provision_timeout'),
|
454
|
+
timeout,
|
455
|
+
override_configs=resources.cluster_config_overrides)
|
456
|
+
|
457
|
+
# Set environment variables for the pod. Note that SkyPilot env vars
|
458
|
+
# are set separately when the task is run. These env vars are
|
459
|
+
# independent of the SkyPilot task to be run.
|
460
|
+
k8s_env_vars = {kubernetes.IN_CLUSTER_CONTEXT_NAME_ENV_VAR: context}
|
461
|
+
|
462
|
+
# We specify object-store-memory to be 500MB to avoid taking up too
|
463
|
+
# much memory on the head node. 'num-cpus' should be set to limit
|
464
|
+
# the CPU usage on the head pod, otherwise the ray cluster will use the
|
465
|
+
# CPU resources on the node instead within the pod.
|
466
|
+
custom_ray_options = {
|
467
|
+
'object-store-memory': 500000000,
|
468
|
+
# 'num-cpus' must be an integer, but we should not set it to 0 if
|
469
|
+
# cpus is <1.
|
470
|
+
'num-cpus': str(max(int(cpus), 1)),
|
471
|
+
}
|
293
472
|
deploy_vars = {
|
294
473
|
'instance_type': resources.instance_type,
|
295
474
|
'custom_resources': custom_resources,
|
296
|
-
'region': region.name,
|
297
475
|
'cpus': str(cpus),
|
298
476
|
'memory': str(mem),
|
299
477
|
'accelerator_count': str(acc_count),
|
300
|
-
'timeout': str(
|
301
|
-
'k8s_namespace':
|
302
|
-
kubernetes_utils.get_current_kube_config_context_namespace(),
|
478
|
+
'timeout': str(timeout),
|
303
479
|
'k8s_port_mode': port_mode.value,
|
480
|
+
'k8s_networking_mode': network_utils.get_networking_mode().value,
|
304
481
|
'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME,
|
305
482
|
'k8s_acc_label_key': k8s_acc_label_key,
|
306
483
|
'k8s_acc_label_value': k8s_acc_label_value,
|
@@ -311,19 +488,51 @@ class Kubernetes(clouds.Cloud):
|
|
311
488
|
'k8s_fuse_device_required': fuse_device_required,
|
312
489
|
# Namespace to run the FUSE device manager in
|
313
490
|
'k8s_skypilot_system_namespace': _SKYPILOT_SYSTEM_NAMESPACE,
|
491
|
+
'k8s_spot_label_key': spot_label_key,
|
492
|
+
'k8s_spot_label_value': spot_label_value,
|
493
|
+
'tpu_requested': tpu_requested,
|
494
|
+
'k8s_topology_label_key': k8s_topology_label_key,
|
495
|
+
'k8s_topology_label_value': k8s_topology_label_value,
|
496
|
+
'k8s_resource_key': k8s_resource_key,
|
497
|
+
'k8s_env_vars': k8s_env_vars,
|
314
498
|
'image_id': image_id,
|
499
|
+
'ray_installation_commands': constants.RAY_INSTALLATION_COMMANDS,
|
500
|
+
'ray_head_start_command': instance_setup.ray_head_start_command(
|
501
|
+
custom_resources, custom_ray_options),
|
502
|
+
'skypilot_ray_port': constants.SKY_REMOTE_RAY_PORT,
|
503
|
+
'ray_worker_start_command': instance_setup.ray_worker_start_command(
|
504
|
+
custom_resources, custom_ray_options, no_restart=False),
|
315
505
|
}
|
316
506
|
|
507
|
+
# Add kubecontext if it is set. It may be None if SkyPilot is running
|
508
|
+
# inside a pod with in-cluster auth.
|
509
|
+
if context is not None:
|
510
|
+
deploy_vars['k8s_context'] = context
|
511
|
+
|
512
|
+
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
513
|
+
deploy_vars['k8s_namespace'] = namespace
|
514
|
+
|
317
515
|
return deploy_vars
|
318
516
|
|
319
517
|
def _get_feasible_launchable_resources(
|
320
518
|
self, resources: 'resources_lib.Resources'
|
321
|
-
) ->
|
519
|
+
) -> 'resources_utils.FeasibleResources':
|
520
|
+
# TODO(zhwu): This needs to be updated to return the correct region
|
521
|
+
# (context) that has enough resources.
|
322
522
|
fuzzy_candidate_list: List[str] = []
|
323
523
|
if resources.instance_type is not None:
|
324
524
|
assert resources.is_launchable(), resources
|
525
|
+
regions = self.regions_with_offering(
|
526
|
+
resources.instance_type,
|
527
|
+
accelerators=resources.accelerators,
|
528
|
+
use_spot=resources.use_spot,
|
529
|
+
region=resources.region,
|
530
|
+
zone=resources.zone)
|
531
|
+
if not regions:
|
532
|
+
return resources_utils.FeasibleResources([], [], None)
|
325
533
|
resources = resources.copy(accelerators=None)
|
326
|
-
return ([resources],
|
534
|
+
return resources_utils.FeasibleResources([resources],
|
535
|
+
fuzzy_candidate_list, None)
|
327
536
|
|
328
537
|
def _make(instance_list):
|
329
538
|
resource_list = []
|
@@ -365,33 +574,52 @@ class Kubernetes(clouds.Cloud):
|
|
365
574
|
kubernetes_utils.KubernetesInstanceType.from_resources(
|
366
575
|
gpu_task_cpus, gpu_task_memory, acc_count, acc_type).name)
|
367
576
|
|
368
|
-
# Check
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
chosen_instance_type)
|
378
|
-
if not fits:
|
379
|
-
logger.debug(f'Instance type {chosen_instance_type} does '
|
380
|
-
'not fit in the Kubernetes cluster. '
|
381
|
-
f'Reason: {reason}')
|
382
|
-
return [], []
|
383
|
-
|
577
|
+
# Check the availability of the specified instance type in all contexts.
|
578
|
+
available_regions = self.regions_with_offering(
|
579
|
+
chosen_instance_type,
|
580
|
+
accelerators=None,
|
581
|
+
use_spot=resources.use_spot,
|
582
|
+
region=resources.region,
|
583
|
+
zone=resources.zone)
|
584
|
+
if not available_regions:
|
585
|
+
return resources_utils.FeasibleResources([], [], None)
|
384
586
|
# No fuzzy lists for Kubernetes
|
385
|
-
|
587
|
+
# We don't set the resources returned with regions, because the
|
588
|
+
# optimizer will further find the valid region (context) for the
|
589
|
+
# resources.
|
590
|
+
return resources_utils.FeasibleResources(_make([chosen_instance_type]),
|
591
|
+
[], None)
|
386
592
|
|
387
593
|
@classmethod
|
388
594
|
def check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
389
595
|
# Test using python API
|
390
596
|
try:
|
391
|
-
|
392
|
-
except
|
393
|
-
return (False,
|
394
|
-
f'{common_utils.format_exception(e)}')
|
597
|
+
existing_allowed_contexts = cls.existing_allowed_contexts()
|
598
|
+
except ImportError as e:
|
599
|
+
return (False,
|
600
|
+
f'{common_utils.format_exception(e, use_bracket=True)}')
|
601
|
+
if not existing_allowed_contexts:
|
602
|
+
if skypilot_config.loaded_config_path() is None:
|
603
|
+
check_skypilot_config_msg = ''
|
604
|
+
else:
|
605
|
+
check_skypilot_config_msg = (
|
606
|
+
' and check "allowed_contexts" in your '
|
607
|
+
f'{skypilot_config.loaded_config_path()} file.')
|
608
|
+
return (False, 'No available context found in kubeconfig. '
|
609
|
+
'Check if you have a valid kubeconfig file' +
|
610
|
+
check_skypilot_config_msg)
|
611
|
+
reasons = []
|
612
|
+
for context in existing_allowed_contexts:
|
613
|
+
try:
|
614
|
+
check_result = kubernetes_utils.check_credentials(context)
|
615
|
+
if check_result[0]:
|
616
|
+
return check_result
|
617
|
+
reasons.append(f'{context}: {check_result[1]}')
|
618
|
+
except Exception as e: # pylint: disable=broad-except
|
619
|
+
return (False, f'Credential check failed for {context}: '
|
620
|
+
f'{common_utils.format_exception(e)}')
|
621
|
+
return (False, 'Failed to find available context with working '
|
622
|
+
'credentials. Details:\n' + '\n'.join(reasons))
|
395
623
|
|
396
624
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
397
625
|
if os.path.exists(os.path.expanduser(CREDENTIAL_PATH)):
|
@@ -406,30 +634,56 @@ class Kubernetes(clouds.Cloud):
|
|
406
634
|
instance_type)
|
407
635
|
|
408
636
|
def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
|
409
|
-
if region
|
637
|
+
if region == self.LEGACY_SINGLETON_REGION:
|
638
|
+
# For backward compatibility, we allow the region to be set to the
|
639
|
+
# legacy singleton region.
|
640
|
+
# TODO: Remove this after 0.9.0.
|
641
|
+
return region, zone
|
642
|
+
|
643
|
+
if region == kubernetes.in_cluster_context_name():
|
644
|
+
# If running incluster, we set region to IN_CLUSTER_REGION
|
645
|
+
# since there is no context name available.
|
646
|
+
return region, zone
|
647
|
+
|
648
|
+
all_contexts = kubernetes_utils.get_all_kube_context_names()
|
649
|
+
|
650
|
+
if region not in all_contexts:
|
410
651
|
raise ValueError(
|
411
|
-
'
|
412
|
-
'
|
652
|
+
f'Context {region} not found in kubeconfig. Kubernetes only '
|
653
|
+
'supports context names as regions. Available '
|
654
|
+
f'contexts: {all_contexts}')
|
413
655
|
if zone is not None:
|
414
656
|
raise ValueError('Kubernetes support does not support setting zone.'
|
415
657
|
' Cluster used is determined by the kubeconfig.')
|
416
658
|
return region, zone
|
417
659
|
|
660
|
+
@staticmethod
|
661
|
+
def get_identity_from_context(context):
|
662
|
+
if 'namespace' in context['context']:
|
663
|
+
namespace = context['context']['namespace']
|
664
|
+
else:
|
665
|
+
namespace = kubernetes_utils.DEFAULT_NAMESPACE
|
666
|
+
user = context['context']['user']
|
667
|
+
cluster = context['context']['cluster']
|
668
|
+
identity_str = f'{cluster}_{user}_{namespace}'
|
669
|
+
return identity_str
|
670
|
+
|
418
671
|
@classmethod
|
419
|
-
def
|
672
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
420
673
|
k8s = kubernetes.kubernetes
|
674
|
+
identities = []
|
421
675
|
try:
|
422
|
-
|
423
|
-
|
424
|
-
namespace = current_context['context']['namespace']
|
425
|
-
else:
|
426
|
-
namespace = kubernetes_utils.DEFAULT_NAMESPACE
|
427
|
-
|
428
|
-
user = current_context['context']['user']
|
429
|
-
cluster = current_context['context']['cluster']
|
430
|
-
return [f'{cluster}_{user}_{namespace}']
|
676
|
+
all_contexts, current_context = (
|
677
|
+
k8s.config.list_kube_config_contexts())
|
431
678
|
except k8s.config.config_exception.ConfigException:
|
432
679
|
return None
|
680
|
+
# Add current context at the head of the list
|
681
|
+
current_identity = [cls.get_identity_from_context(current_context)]
|
682
|
+
identities.append(current_identity)
|
683
|
+
for context in all_contexts:
|
684
|
+
identity = [cls.get_identity_from_context(context)]
|
685
|
+
identities.append(identity)
|
686
|
+
return identities
|
433
687
|
|
434
688
|
@classmethod
|
435
689
|
def is_label_valid(cls, label_key: str,
|