skypilot-nightly 1.0.0.dev20240910__py3-none-any.whl → 1.0.0.dev20240911__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/kubernetes.py +32 -67
- sky/authentication.py +12 -7
- sky/backends/backend_utils.py +40 -33
- sky/backends/cloud_vm_ray_backend.py +1 -1
- sky/check.py +1 -1
- sky/clouds/aws.py +8 -6
- sky/clouds/azure.py +7 -5
- sky/clouds/cloud.py +43 -14
- sky/clouds/cudo.py +1 -1
- sky/clouds/fluidstack.py +2 -2
- sky/clouds/gcp.py +12 -7
- sky/clouds/kubernetes.py +28 -15
- sky/clouds/lambda_cloud.py +2 -2
- sky/clouds/oci.py +1 -1
- sky/clouds/paperspace.py +1 -1
- sky/clouds/runpod.py +1 -1
- sky/clouds/scp.py +2 -2
- sky/clouds/service_catalog/aws_catalog.py +1 -1
- sky/clouds/vsphere.py +1 -1
- sky/provision/kubernetes/config.py +52 -34
- sky/provision/kubernetes/instance.py +73 -61
- sky/provision/kubernetes/network.py +11 -5
- sky/provision/kubernetes/network_utils.py +10 -8
- sky/provision/kubernetes/utils.py +72 -45
- sky/skylet/log_lib.py +4 -1
- sky/skylet/subprocess_daemon.py +47 -15
- sky/templates/kubernetes-port-forward-proxy-command.sh +29 -4
- sky/templates/kubernetes-ray.yml.j2 +5 -0
- sky/templates/lambda-ray.yml.j2 +2 -2
- sky/utils/command_runner.py +12 -6
- sky/utils/command_runner.pyi +1 -1
- sky/utils/kubernetes/rsync_helper.sh +12 -3
- {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240911.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240911.dist-info}/RECORD +39 -39
- {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240911.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240911.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240911.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240911.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'bad7dabf372f3512679098f63e4cfca8bc9b0870'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20240911'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/kubernetes.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
"""Kubernetes adaptors"""
|
2
|
-
|
3
|
-
# pylint: disable=import-outside-toplevel
|
4
|
-
|
2
|
+
import functools
|
5
3
|
import logging
|
6
4
|
import os
|
7
|
-
from typing import Any, Callable, Set
|
5
|
+
from typing import Any, Callable, Optional, Set
|
8
6
|
|
9
7
|
from sky.adaptors import common
|
10
8
|
from sky.sky_logging import set_logging_level
|
@@ -18,15 +16,6 @@ kubernetes = common.LazyImport('kubernetes',
|
|
18
16
|
urllib3 = common.LazyImport('urllib3',
|
19
17
|
import_error_message=_IMPORT_ERROR_MESSAGE)
|
20
18
|
|
21
|
-
_configured = False
|
22
|
-
_core_api = None
|
23
|
-
_auth_api = None
|
24
|
-
_networking_api = None
|
25
|
-
_custom_objects_api = None
|
26
|
-
_node_api = None
|
27
|
-
_apps_api = None
|
28
|
-
_api_client = None
|
29
|
-
|
30
19
|
# Timeout to use for API calls
|
31
20
|
API_TIMEOUT = 5
|
32
21
|
|
@@ -66,10 +55,7 @@ def _api_logging_decorator(logger: str, level: int):
|
|
66
55
|
return decorated_api
|
67
56
|
|
68
57
|
|
69
|
-
def _load_config():
|
70
|
-
global _configured
|
71
|
-
if _configured:
|
72
|
-
return
|
58
|
+
def _load_config(context: Optional[str] = None):
|
73
59
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
74
60
|
try:
|
75
61
|
# Load in-cluster config if running in a pod
|
@@ -82,7 +68,7 @@ def _load_config():
|
|
82
68
|
kubernetes.config.load_incluster_config()
|
83
69
|
except kubernetes.config.config_exception.ConfigException:
|
84
70
|
try:
|
85
|
-
kubernetes.config.load_kube_config()
|
71
|
+
kubernetes.config.load_kube_config(context=context)
|
86
72
|
except kubernetes.config.config_exception.ConfigException as e:
|
87
73
|
suffix = ''
|
88
74
|
if env_options.Options.SHOW_DEBUG_INFO.get():
|
@@ -101,76 +87,55 @@ def _load_config():
|
|
101
87
|
err_str += '\nTo disable Kubernetes for SkyPilot: run `sky check`.'
|
102
88
|
with ux_utils.print_exception_no_traceback():
|
103
89
|
raise ValueError(err_str) from None
|
104
|
-
_configured = True
|
105
90
|
|
106
91
|
|
107
92
|
@_api_logging_decorator('urllib3', logging.ERROR)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
_core_api = kubernetes.client.CoreV1Api()
|
113
|
-
return _core_api
|
93
|
+
@functools.lru_cache()
|
94
|
+
def core_api(context: Optional[str] = None):
|
95
|
+
_load_config(context)
|
96
|
+
return kubernetes.client.CoreV1Api()
|
114
97
|
|
115
98
|
|
116
99
|
@_api_logging_decorator('urllib3', logging.ERROR)
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
_auth_api = kubernetes.client.RbacAuthorizationV1Api()
|
122
|
-
|
123
|
-
return _auth_api
|
100
|
+
@functools.lru_cache()
|
101
|
+
def auth_api(context: Optional[str] = None):
|
102
|
+
_load_config(context)
|
103
|
+
return kubernetes.client.RbacAuthorizationV1Api()
|
124
104
|
|
125
105
|
|
126
106
|
@_api_logging_decorator('urllib3', logging.ERROR)
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
_networking_api = kubernetes.client.NetworkingV1Api()
|
132
|
-
|
133
|
-
return _networking_api
|
107
|
+
@functools.lru_cache()
|
108
|
+
def networking_api(context: Optional[str] = None):
|
109
|
+
_load_config(context)
|
110
|
+
return kubernetes.client.NetworkingV1Api()
|
134
111
|
|
135
112
|
|
136
113
|
@_api_logging_decorator('urllib3', logging.ERROR)
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
_custom_objects_api = kubernetes.client.CustomObjectsApi()
|
142
|
-
|
143
|
-
return _custom_objects_api
|
114
|
+
@functools.lru_cache()
|
115
|
+
def custom_objects_api(context: Optional[str] = None):
|
116
|
+
_load_config(context)
|
117
|
+
return kubernetes.client.CustomObjectsApi()
|
144
118
|
|
145
119
|
|
146
120
|
@_api_logging_decorator('urllib3', logging.ERROR)
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
_node_api = kubernetes.client.NodeV1Api()
|
152
|
-
|
153
|
-
return _node_api
|
121
|
+
@functools.lru_cache()
|
122
|
+
def node_api(context: Optional[str] = None):
|
123
|
+
_load_config(context)
|
124
|
+
return kubernetes.client.NodeV1Api()
|
154
125
|
|
155
126
|
|
156
127
|
@_api_logging_decorator('urllib3', logging.ERROR)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
_apps_api = kubernetes.client.AppsV1Api()
|
162
|
-
|
163
|
-
return _apps_api
|
128
|
+
@functools.lru_cache()
|
129
|
+
def apps_api(context: Optional[str] = None):
|
130
|
+
_load_config(context)
|
131
|
+
return kubernetes.client.AppsV1Api()
|
164
132
|
|
165
133
|
|
166
134
|
@_api_logging_decorator('urllib3', logging.ERROR)
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
_api_client = kubernetes.client.ApiClient()
|
172
|
-
|
173
|
-
return _api_client
|
135
|
+
@functools.lru_cache()
|
136
|
+
def api_client(context: Optional[str] = None):
|
137
|
+
_load_config(context)
|
138
|
+
return kubernetes.client.ApiClient()
|
174
139
|
|
175
140
|
|
176
141
|
def api_exception():
|
sky/authentication.py
CHANGED
@@ -378,7 +378,11 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
378
378
|
public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH)
|
379
379
|
secret_name = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME
|
380
380
|
secret_field_name = clouds.Kubernetes().ssh_key_secret_field_name
|
381
|
-
namespace =
|
381
|
+
namespace = config['provider'].get(
|
382
|
+
'namespace',
|
383
|
+
kubernetes_utils.get_current_kube_config_context_namespace())
|
384
|
+
context = config['provider'].get(
|
385
|
+
'context', kubernetes_utils.get_current_kube_config_context_name())
|
382
386
|
k8s = kubernetes.kubernetes
|
383
387
|
with open(public_key_path, 'r', encoding='utf-8') as f:
|
384
388
|
public_key = f.read()
|
@@ -399,14 +403,14 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
399
403
|
secret = k8s.client.V1Secret(
|
400
404
|
metadata=k8s.client.V1ObjectMeta(**secret_metadata),
|
401
405
|
string_data={secret_field_name: public_key})
|
402
|
-
if kubernetes_utils.check_secret_exists(secret_name, namespace):
|
406
|
+
if kubernetes_utils.check_secret_exists(secret_name, namespace, context):
|
403
407
|
logger.debug(f'Key {secret_name} exists in the cluster, patching it...')
|
404
|
-
kubernetes.core_api().patch_namespaced_secret(
|
405
|
-
|
408
|
+
kubernetes.core_api(context).patch_namespaced_secret(
|
409
|
+
secret_name, namespace, secret)
|
406
410
|
else:
|
407
411
|
logger.debug(
|
408
412
|
f'Key {secret_name} does not exist in the cluster, creating it...')
|
409
|
-
kubernetes.core_api().create_namespaced_secret(namespace, secret)
|
413
|
+
kubernetes.core_api(context).create_namespaced_secret(namespace, secret)
|
410
414
|
|
411
415
|
private_key_path, _ = get_or_generate_keys()
|
412
416
|
if network_mode == nodeport_mode:
|
@@ -415,13 +419,14 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
415
419
|
# Setup service for SSH jump pod. We create the SSH jump service here
|
416
420
|
# because we need to know the service IP address and port to set the
|
417
421
|
# ssh_proxy_command in the autoscaler config.
|
418
|
-
kubernetes_utils.setup_ssh_jump_svc(ssh_jump_name, namespace,
|
422
|
+
kubernetes_utils.setup_ssh_jump_svc(ssh_jump_name, namespace, context,
|
419
423
|
service_type)
|
420
424
|
ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
|
421
425
|
ssh_jump_name,
|
422
426
|
nodeport_mode,
|
423
427
|
private_key_path=private_key_path,
|
424
|
-
namespace=namespace
|
428
|
+
namespace=namespace,
|
429
|
+
context=context)
|
425
430
|
elif network_mode == port_forward_mode:
|
426
431
|
# Using `kubectl port-forward` creates a direct tunnel to the pod and
|
427
432
|
# does not require a ssh jump pod.
|
sky/backends/backend_utils.py
CHANGED
@@ -1558,58 +1558,65 @@ def check_owner_identity(cluster_name: str) -> None:
|
|
1558
1558
|
return
|
1559
1559
|
|
1560
1560
|
cloud = handle.launched_resources.cloud
|
1561
|
-
|
1561
|
+
user_identities = cloud.get_user_identities()
|
1562
1562
|
owner_identity = record['owner']
|
1563
|
-
if
|
1563
|
+
if user_identities is None:
|
1564
1564
|
# Skip the check if the cloud does not support user identity.
|
1565
1565
|
return
|
1566
1566
|
# The user identity can be None, if the cluster is created by an older
|
1567
1567
|
# version of SkyPilot. In that case, we set the user identity to the
|
1568
|
-
# current one.
|
1568
|
+
# current active one.
|
1569
1569
|
# NOTE: a user who upgrades SkyPilot and switches to a new cloud identity
|
1570
1570
|
# immediately without `sky status --refresh` first, will cause a leakage
|
1571
1571
|
# of the existing cluster. We deem this an acceptable tradeoff mainly
|
1572
1572
|
# because multi-identity is not common (at least at the moment).
|
1573
1573
|
if owner_identity is None:
|
1574
1574
|
global_user_state.set_owner_identity_for_cluster(
|
1575
|
-
cluster_name,
|
1575
|
+
cluster_name, user_identities[0])
|
1576
1576
|
else:
|
1577
1577
|
assert isinstance(owner_identity, list)
|
1578
1578
|
# It is OK if the owner identity is shorter, which will happen when
|
1579
1579
|
# the cluster is launched before #1808. In that case, we only check
|
1580
1580
|
# the same length (zip will stop at the shorter one).
|
1581
|
-
for
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1585
|
-
|
1586
|
-
|
1587
|
-
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1581
|
+
for identity in user_identities:
|
1582
|
+
for i, (owner, current) in enumerate(zip(owner_identity, identity)):
|
1583
|
+
# Clean up the owner identity for the backslash and newlines, caused
|
1584
|
+
# by the cloud CLI output, e.g. gcloud.
|
1585
|
+
owner = owner.replace('\n', '').replace('\\', '')
|
1586
|
+
if owner == current:
|
1587
|
+
if i != 0:
|
1588
|
+
logger.warning(
|
1589
|
+
f'The cluster was owned by {owner_identity}, but '
|
1590
|
+
f'a new identity {identity} is activated. We still '
|
1591
|
+
'allow the operation as the two identities are '
|
1592
|
+
'likely to have the same access to the cluster. '
|
1593
|
+
'Please be aware that this can cause unexpected '
|
1594
|
+
'cluster leakage if the two identities are not '
|
1595
|
+
'actually equivalent (e.g., belong to the same '
|
1596
|
+
'person).')
|
1597
|
+
if i != 0 or len(owner_identity) != len(identity):
|
1598
|
+
# We update the owner of a cluster, when:
|
1599
|
+
# 1. The strictest identty (i.e. the first one) does not
|
1600
|
+
# match, but the latter ones match.
|
1601
|
+
# 2. The length of the two identities are different,
|
1602
|
+
# which will only happen when the cluster is launched
|
1603
|
+
# before #1808. Update the user identity to avoid
|
1604
|
+
# showing the warning above again.
|
1605
|
+
global_user_state.set_owner_identity_for_cluster(
|
1606
|
+
cluster_name, identity)
|
1607
|
+
return # The user identity matches.
|
1608
|
+
# Generate error message if no match found
|
1609
|
+
if len(user_identities) == 1:
|
1610
|
+
err_msg = f'the activated identity is {user_identities[0]!r}.'
|
1611
|
+
else:
|
1612
|
+
err_msg = (f'available identities are {user_identities!r}.')
|
1613
|
+
if cloud.is_same_cloud(clouds.Kubernetes()):
|
1614
|
+
err_msg += (' Check your kubeconfig file and make sure the '
|
1615
|
+
'correct context is available.')
|
1608
1616
|
with ux_utils.print_exception_no_traceback():
|
1609
1617
|
raise exceptions.ClusterOwnerIdentityMismatchError(
|
1610
1618
|
f'{cluster_name!r} ({cloud}) is owned by account '
|
1611
|
-
f'{owner_identity!r}, but
|
1612
|
-
f'is {current_user_identity!r}.')
|
1619
|
+
f'{owner_identity!r}, but ' + err_msg)
|
1613
1620
|
|
1614
1621
|
|
1615
1622
|
def tag_filter_for_cluster(cluster_name: str) -> Dict[str, str]:
|
@@ -1945,7 +1945,7 @@ class RetryingVmProvisioner(object):
|
|
1945
1945
|
if dryrun:
|
1946
1946
|
cloud_user = None
|
1947
1947
|
else:
|
1948
|
-
cloud_user = to_provision.cloud.
|
1948
|
+
cloud_user = to_provision.cloud.get_active_user_identity()
|
1949
1949
|
|
1950
1950
|
requested_features = self._requested_features.copy()
|
1951
1951
|
# Skip stop feature for Kubernetes and RunPod controllers.
|
sky/check.py
CHANGED
@@ -44,7 +44,7 @@ def check(
|
|
44
44
|
if ok:
|
45
45
|
enabled_clouds.append(cloud_repr)
|
46
46
|
if verbose and cloud is not cloudflare:
|
47
|
-
activated_account = cloud.
|
47
|
+
activated_account = cloud.get_active_user_identity_str()
|
48
48
|
if activated_account is not None:
|
49
49
|
echo(f' Activated account: {activated_account}')
|
50
50
|
if reason is not None:
|
sky/clouds/aws.py
CHANGED
@@ -547,7 +547,7 @@ class AWS(clouds.Cloud):
|
|
547
547
|
# Checks if AWS credentials 1) exist and 2) are valid.
|
548
548
|
# https://stackoverflow.com/questions/53548737/verify-aws-credentials-with-boto3
|
549
549
|
try:
|
550
|
-
identity_str = cls.
|
550
|
+
identity_str = cls.get_active_user_identity_str()
|
551
551
|
except exceptions.CloudUserIdentityError as e:
|
552
552
|
return False, str(e)
|
553
553
|
|
@@ -584,7 +584,7 @@ class AWS(clouds.Cloud):
|
|
584
584
|
else:
|
585
585
|
# This file is required because it is required by the VMs launched on
|
586
586
|
# other clouds to access private s3 buckets and resources like EC2.
|
587
|
-
# `
|
587
|
+
# `get_active_user_identity` does not guarantee this file exists.
|
588
588
|
if not static_credential_exists:
|
589
589
|
return (False, '~/.aws/credentials does not exist. ' +
|
590
590
|
cls._STATIC_CREDENTIAL_HELP_STR)
|
@@ -648,7 +648,7 @@ class AWS(clouds.Cloud):
|
|
648
648
|
return AWSIdentityType.SHARED_CREDENTIALS_FILE
|
649
649
|
|
650
650
|
@classmethod
|
651
|
-
def
|
651
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
652
652
|
"""Returns a [UserId, Account] list that uniquely identifies the user.
|
653
653
|
|
654
654
|
These fields come from `aws sts get-caller-identity`. We permit the same
|
@@ -752,11 +752,13 @@ class AWS(clouds.Cloud):
|
|
752
752
|
f'Failed to get AWS user.\n'
|
753
753
|
f' Reason: {common_utils.format_exception(e, use_bracket=True)}.'
|
754
754
|
) from None
|
755
|
-
|
755
|
+
# TODO: Return a list of identities in the profile when we support
|
756
|
+
# automatic switching for AWS. Currently we only support one identity.
|
757
|
+
return [user_ids]
|
756
758
|
|
757
759
|
@classmethod
|
758
|
-
def
|
759
|
-
user_identity = cls.
|
760
|
+
def get_active_user_identity_str(cls) -> Optional[str]:
|
761
|
+
user_identity = cls.get_active_user_identity()
|
760
762
|
if user_identity is None:
|
761
763
|
return None
|
762
764
|
identity_str = f'{user_identity[0]} [account={user_identity[1]}]'
|
sky/clouds/azure.py
CHANGED
@@ -483,7 +483,7 @@ class Azure(clouds.Cloud):
|
|
483
483
|
# If Azure is properly logged in, this will return the account email
|
484
484
|
# address + subscription ID.
|
485
485
|
try:
|
486
|
-
cls.
|
486
|
+
cls.get_active_user_identity()
|
487
487
|
except exceptions.CloudUserIdentityError as e:
|
488
488
|
return False, (f'Getting user\'s Azure identity failed.{help_str}\n'
|
489
489
|
f'{cls._INDENT_PREFIX}Details: '
|
@@ -516,7 +516,7 @@ class Azure(clouds.Cloud):
|
|
516
516
|
|
517
517
|
@classmethod
|
518
518
|
@functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
|
519
|
-
def
|
519
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
520
520
|
"""Returns the cloud user identity."""
|
521
521
|
# This returns the user's email address + [subscription_id].
|
522
522
|
retry_cnt = 0
|
@@ -558,11 +558,13 @@ class Azure(clouds.Cloud):
|
|
558
558
|
with ux_utils.print_exception_no_traceback():
|
559
559
|
raise exceptions.CloudUserIdentityError(
|
560
560
|
'Failed to get Azure project ID.') from e
|
561
|
-
|
561
|
+
# TODO: Return a list of identities in the profile when we support
|
562
|
+
# automatic switching for Az. Currently we only support one identity.
|
563
|
+
return [[f'{account_email} [subscription_id={project_id}]']]
|
562
564
|
|
563
565
|
@classmethod
|
564
|
-
def
|
565
|
-
user_identity = cls.
|
566
|
+
def get_active_user_identity_str(cls) -> Optional[str]:
|
567
|
+
user_identity = cls.get_active_user_identity()
|
566
568
|
if user_identity is None:
|
567
569
|
return None
|
568
570
|
return user_identity[0]
|
sky/clouds/cloud.py
CHANGED
@@ -441,11 +441,11 @@ class Cloud:
|
|
441
441
|
|
442
442
|
# TODO(zhwu): Make the return type immutable.
|
443
443
|
@classmethod
|
444
|
-
def
|
445
|
-
"""(Advanced) Returns
|
444
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
445
|
+
"""(Advanced) Returns all available user identities of this cloud.
|
446
446
|
|
447
447
|
The user "identity" is associated with each SkyPilot cluster they
|
448
|
-
|
448
|
+
create. This is used in protecting cluster operations, such as
|
449
449
|
provision, teardown and status refreshing, in a multi-identity
|
450
450
|
scenario, where the same user/device can switch between different
|
451
451
|
cloud identities. We check that the user identity matches before:
|
@@ -453,10 +453,16 @@ class Cloud:
|
|
453
453
|
- Stopping/tearing down a cluster
|
454
454
|
- Refreshing the status of a cluster
|
455
455
|
|
456
|
-
Design
|
457
|
-
|
458
|
-
|
459
|
-
|
456
|
+
Design choices:
|
457
|
+
1. We allow the operations that can correctly work with a different
|
458
|
+
user identity, as a user should have full control over all their
|
459
|
+
clusters (no matter which identity it belongs to), e.g.,
|
460
|
+
submitting jobs, viewing logs, auto-stopping, etc.
|
461
|
+
2. A cloud implementation can optionally switch between different
|
462
|
+
identities if required for cluster operations. In this case,
|
463
|
+
the cloud implementation should return multiple identities
|
464
|
+
as a list. E.g., our Kubernetes implementation can use multiple
|
465
|
+
kubeconfig contexts to switch between different identities.
|
460
466
|
|
461
467
|
The choice of what constitutes an identity is up to each cloud's
|
462
468
|
implementation. In general, to suffice for the above purposes,
|
@@ -464,24 +470,34 @@ class Cloud:
|
|
464
470
|
resources are used when the user invoked each cloud's default
|
465
471
|
CLI/API.
|
466
472
|
|
467
|
-
|
473
|
+
An identity is a list of strings. The list is in the order of
|
468
474
|
strictness, i.e., the first element is the most strict identity, and
|
469
475
|
the last element is the least strict identity.
|
470
476
|
When performing an identity check between the current active identity
|
471
477
|
and the owner identity associated with a cluster, we compare the two
|
472
478
|
lists in order: if a position does not match, we go to the next. To
|
473
|
-
see an example, see the docstring of the AWS.
|
474
|
-
|
479
|
+
see an example, see the docstring of the AWS.get_user_identities.
|
475
480
|
|
476
481
|
Example identities (see cloud implementations):
|
477
482
|
- AWS: [UserId, AccountId]
|
478
483
|
- GCP: [email address + project ID]
|
479
484
|
- Azure: [email address + subscription ID]
|
485
|
+
- Kubernetes: [context name]
|
486
|
+
|
487
|
+
Example return values:
|
488
|
+
- AWS: [[UserId, AccountId]]
|
489
|
+
- GCP: [[email address + project ID]]
|
490
|
+
- Azure: [[email address + subscription ID]]
|
491
|
+
- Kubernetes: [[current active context], [context 2], ...]
|
480
492
|
|
481
493
|
Returns:
|
482
494
|
None if the cloud does not have a concept of user identity
|
483
495
|
(access protection will be disabled for these clusters);
|
484
|
-
otherwise the
|
496
|
+
otherwise a list of available identities with the current active
|
497
|
+
identity being the first element. Most clouds have only one identity
|
498
|
+
available, so the returned list will only have one element: the
|
499
|
+
current active identity.
|
500
|
+
|
485
501
|
Raises:
|
486
502
|
exceptions.CloudUserIdentityError: If the user identity cannot be
|
487
503
|
retrieved.
|
@@ -489,13 +505,26 @@ class Cloud:
|
|
489
505
|
return None
|
490
506
|
|
491
507
|
@classmethod
|
492
|
-
def
|
493
|
-
"""Returns a user friendly representation of the
|
494
|
-
user_identity = cls.
|
508
|
+
def get_active_user_identity_str(cls) -> Optional[str]:
|
509
|
+
"""Returns a user friendly representation of the active identity."""
|
510
|
+
user_identity = cls.get_active_user_identity()
|
495
511
|
if user_identity is None:
|
496
512
|
return None
|
497
513
|
return ', '.join(user_identity)
|
498
514
|
|
515
|
+
@classmethod
|
516
|
+
def get_active_user_identity(cls) -> Optional[List[str]]:
|
517
|
+
"""Returns currently active user identity of this cloud
|
518
|
+
|
519
|
+
See get_user_identities for definition of user identity.
|
520
|
+
|
521
|
+
Returns:
|
522
|
+
None if the cloud does not have a concept of user identity;
|
523
|
+
otherwise the current active identity.
|
524
|
+
"""
|
525
|
+
identities = cls.get_user_identities()
|
526
|
+
return identities[0] if identities is not None else None
|
527
|
+
|
499
528
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
500
529
|
"""Returns the files necessary to access this cloud.
|
501
530
|
|
sky/clouds/cudo.py
CHANGED
@@ -328,7 +328,7 @@ class Cudo(clouds.Cloud):
|
|
328
328
|
}
|
329
329
|
|
330
330
|
@classmethod
|
331
|
-
def
|
331
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
332
332
|
# NOTE: used for very advanced SkyPilot functionality
|
333
333
|
# Can implement later if desired
|
334
334
|
return None
|
sky/clouds/fluidstack.py
CHANGED
@@ -291,8 +291,8 @@ class Fluidstack(clouds.Cloud):
|
|
291
291
|
return {filename: filename for filename in _CREDENTIAL_FILES}
|
292
292
|
|
293
293
|
@classmethod
|
294
|
-
def
|
295
|
-
# TODO(mjibril): Implement
|
294
|
+
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
295
|
+
# TODO(mjibril): Implement get_active_user_identity for Fluidstack
|
296
296
|
return None
|
297
297
|
|
298
298
|
def instance_type_exists(self, instance_type: str) -> bool:
|
sky/clouds/gcp.py
CHANGED
@@ -715,7 +715,7 @@ class GCP(clouds.Cloud):
|
|
715
715
|
project_id = cls.get_project_id()
|
716
716
|
|
717
717
|
# Check if the user is activated.
|
718
|
-
identity = cls.
|
718
|
+
identity = cls.get_active_user_identity()
|
719
719
|
except (auth.exceptions.DefaultCredentialsError,
|
720
720
|
exceptions.CloudUserIdentityError) as e:
|
721
721
|
# See also: https://stackoverflow.com/a/53307505/1165051
|
@@ -826,16 +826,19 @@ class GCP(clouds.Cloud):
|
|
826
826
|
@classmethod
|
827
827
|
def _get_identity_type(cls) -> Optional[GCPIdentityType]:
|
828
828
|
try:
|
829
|
-
account = cls.
|
829
|
+
account = cls.get_active_user_identity()
|
830
830
|
except exceptions.CloudUserIdentityError:
|
831
831
|
return None
|
832
|
-
if
|
832
|
+
if account is None:
|
833
|
+
return None
|
834
|
+
assert account is not None
|
835
|
+
if GCPIdentityType.SERVICE_ACCOUNT.value in account[0]:
|
833
836
|
return GCPIdentityType.SERVICE_ACCOUNT
|
834
837
|
return GCPIdentityType.SHARED_CREDENTIALS_FILE
|
835
838
|
|
836
839
|
@classmethod
|
837
840
|
@functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
|
838
|
-
def
|
841
|
+
def get_user_identities(cls) -> List[List[str]]:
|
839
842
|
"""Returns the email address + project id of the active user."""
|
840
843
|
try:
|
841
844
|
account = _run_output('gcloud auth list --filter=status:ACTIVE '
|
@@ -866,11 +869,13 @@ class GCP(clouds.Cloud):
|
|
866
869
|
' Reason: '
|
867
870
|
f'{common_utils.format_exception(e, use_bracket=True)}'
|
868
871
|
) from e
|
869
|
-
|
872
|
+
# TODO: Return a list of identities in the profile when we support
|
873
|
+
# automatic switching for GCP. Currently we only support one identity.
|
874
|
+
return [[f'{account} [project_id={project_id}]']]
|
870
875
|
|
871
876
|
@classmethod
|
872
|
-
def
|
873
|
-
user_identity = cls.
|
877
|
+
def get_active_user_identity_str(cls) -> Optional[str]:
|
878
|
+
user_identity = cls.get_active_user_identity()
|
874
879
|
if user_identity is None:
|
875
880
|
return None
|
876
881
|
return user_identity[0].replace('\n', '')
|