skypilot-nightly 1.0.0.dev20240910__py3-none-any.whl → 1.0.0.dev20240912__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +32 -67
  3. sky/authentication.py +12 -7
  4. sky/backends/backend_utils.py +40 -33
  5. sky/backends/cloud_vm_ray_backend.py +1 -1
  6. sky/check.py +1 -1
  7. sky/clouds/aws.py +14 -6
  8. sky/clouds/azure.py +7 -5
  9. sky/clouds/cloud.py +43 -14
  10. sky/clouds/cudo.py +1 -1
  11. sky/clouds/fluidstack.py +2 -2
  12. sky/clouds/gcp.py +12 -7
  13. sky/clouds/kubernetes.py +28 -15
  14. sky/clouds/lambda_cloud.py +2 -2
  15. sky/clouds/oci.py +1 -1
  16. sky/clouds/paperspace.py +1 -1
  17. sky/clouds/runpod.py +1 -1
  18. sky/clouds/scp.py +2 -2
  19. sky/clouds/service_catalog/aws_catalog.py +1 -1
  20. sky/clouds/vsphere.py +1 -1
  21. sky/provision/kubernetes/config.py +52 -34
  22. sky/provision/kubernetes/instance.py +73 -61
  23. sky/provision/kubernetes/network.py +11 -5
  24. sky/provision/kubernetes/network_utils.py +10 -8
  25. sky/provision/kubernetes/utils.py +72 -45
  26. sky/skylet/log_lib.py +4 -1
  27. sky/skylet/subprocess_daemon.py +47 -15
  28. sky/templates/kubernetes-port-forward-proxy-command.sh +29 -4
  29. sky/templates/kubernetes-ray.yml.j2 +5 -0
  30. sky/templates/lambda-ray.yml.j2 +2 -2
  31. sky/utils/command_runner.py +12 -6
  32. sky/utils/command_runner.pyi +1 -1
  33. sky/utils/kubernetes/rsync_helper.sh +12 -3
  34. {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240912.dist-info}/METADATA +3 -1
  35. {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240912.dist-info}/RECORD +39 -39
  36. {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240912.dist-info}/LICENSE +0 -0
  37. {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240912.dist-info}/WHEEL +0 -0
  38. {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240912.dist-info}/entry_points.txt +0 -0
  39. {skypilot_nightly-1.0.0.dev20240910.dist-info → skypilot_nightly-1.0.0.dev20240912.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'db9aeb029d07cf7452751b4658af46e187985a98'
8
+ _SKYPILOT_COMMIT_SHA = '49b27fc889efa20ca688909b624f01da214a7be8'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20240910'
38
+ __version__ = '1.0.0.dev20240912'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -1,10 +1,8 @@
1
1
  """Kubernetes adaptors"""
2
-
3
- # pylint: disable=import-outside-toplevel
4
-
2
+ import functools
5
3
  import logging
6
4
  import os
7
- from typing import Any, Callable, Set
5
+ from typing import Any, Callable, Optional, Set
8
6
 
9
7
  from sky.adaptors import common
10
8
  from sky.sky_logging import set_logging_level
@@ -18,15 +16,6 @@ kubernetes = common.LazyImport('kubernetes',
18
16
  urllib3 = common.LazyImport('urllib3',
19
17
  import_error_message=_IMPORT_ERROR_MESSAGE)
20
18
 
21
- _configured = False
22
- _core_api = None
23
- _auth_api = None
24
- _networking_api = None
25
- _custom_objects_api = None
26
- _node_api = None
27
- _apps_api = None
28
- _api_client = None
29
-
30
19
  # Timeout to use for API calls
31
20
  API_TIMEOUT = 5
32
21
 
@@ -66,10 +55,7 @@ def _api_logging_decorator(logger: str, level: int):
66
55
  return decorated_api
67
56
 
68
57
 
69
- def _load_config():
70
- global _configured
71
- if _configured:
72
- return
58
+ def _load_config(context: Optional[str] = None):
73
59
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
74
60
  try:
75
61
  # Load in-cluster config if running in a pod
@@ -82,7 +68,7 @@ def _load_config():
82
68
  kubernetes.config.load_incluster_config()
83
69
  except kubernetes.config.config_exception.ConfigException:
84
70
  try:
85
- kubernetes.config.load_kube_config()
71
+ kubernetes.config.load_kube_config(context=context)
86
72
  except kubernetes.config.config_exception.ConfigException as e:
87
73
  suffix = ''
88
74
  if env_options.Options.SHOW_DEBUG_INFO.get():
@@ -101,76 +87,55 @@ def _load_config():
101
87
  err_str += '\nTo disable Kubernetes for SkyPilot: run `sky check`.'
102
88
  with ux_utils.print_exception_no_traceback():
103
89
  raise ValueError(err_str) from None
104
- _configured = True
105
90
 
106
91
 
107
92
  @_api_logging_decorator('urllib3', logging.ERROR)
108
- def core_api():
109
- global _core_api
110
- if _core_api is None:
111
- _load_config()
112
- _core_api = kubernetes.client.CoreV1Api()
113
- return _core_api
93
+ @functools.lru_cache()
94
+ def core_api(context: Optional[str] = None):
95
+ _load_config(context)
96
+ return kubernetes.client.CoreV1Api()
114
97
 
115
98
 
116
99
  @_api_logging_decorator('urllib3', logging.ERROR)
117
- def auth_api():
118
- global _auth_api
119
- if _auth_api is None:
120
- _load_config()
121
- _auth_api = kubernetes.client.RbacAuthorizationV1Api()
122
-
123
- return _auth_api
100
+ @functools.lru_cache()
101
+ def auth_api(context: Optional[str] = None):
102
+ _load_config(context)
103
+ return kubernetes.client.RbacAuthorizationV1Api()
124
104
 
125
105
 
126
106
  @_api_logging_decorator('urllib3', logging.ERROR)
127
- def networking_api():
128
- global _networking_api
129
- if _networking_api is None:
130
- _load_config()
131
- _networking_api = kubernetes.client.NetworkingV1Api()
132
-
133
- return _networking_api
107
+ @functools.lru_cache()
108
+ def networking_api(context: Optional[str] = None):
109
+ _load_config(context)
110
+ return kubernetes.client.NetworkingV1Api()
134
111
 
135
112
 
136
113
  @_api_logging_decorator('urllib3', logging.ERROR)
137
- def custom_objects_api():
138
- global _custom_objects_api
139
- if _custom_objects_api is None:
140
- _load_config()
141
- _custom_objects_api = kubernetes.client.CustomObjectsApi()
142
-
143
- return _custom_objects_api
114
+ @functools.lru_cache()
115
+ def custom_objects_api(context: Optional[str] = None):
116
+ _load_config(context)
117
+ return kubernetes.client.CustomObjectsApi()
144
118
 
145
119
 
146
120
  @_api_logging_decorator('urllib3', logging.ERROR)
147
- def node_api():
148
- global _node_api
149
- if _node_api is None:
150
- _load_config()
151
- _node_api = kubernetes.client.NodeV1Api()
152
-
153
- return _node_api
121
+ @functools.lru_cache()
122
+ def node_api(context: Optional[str] = None):
123
+ _load_config(context)
124
+ return kubernetes.client.NodeV1Api()
154
125
 
155
126
 
156
127
  @_api_logging_decorator('urllib3', logging.ERROR)
157
- def apps_api():
158
- global _apps_api
159
- if _apps_api is None:
160
- _load_config()
161
- _apps_api = kubernetes.client.AppsV1Api()
162
-
163
- return _apps_api
128
+ @functools.lru_cache()
129
+ def apps_api(context: Optional[str] = None):
130
+ _load_config(context)
131
+ return kubernetes.client.AppsV1Api()
164
132
 
165
133
 
166
134
  @_api_logging_decorator('urllib3', logging.ERROR)
167
- def api_client():
168
- global _api_client
169
- if _api_client is None:
170
- _load_config()
171
- _api_client = kubernetes.client.ApiClient()
172
-
173
- return _api_client
135
+ @functools.lru_cache()
136
+ def api_client(context: Optional[str] = None):
137
+ _load_config(context)
138
+ return kubernetes.client.ApiClient()
174
139
 
175
140
 
176
141
  def api_exception():
sky/authentication.py CHANGED
@@ -378,7 +378,11 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
378
378
  public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH)
379
379
  secret_name = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME
380
380
  secret_field_name = clouds.Kubernetes().ssh_key_secret_field_name
381
- namespace = kubernetes_utils.get_current_kube_config_context_namespace()
381
+ namespace = config['provider'].get(
382
+ 'namespace',
383
+ kubernetes_utils.get_current_kube_config_context_namespace())
384
+ context = config['provider'].get(
385
+ 'context', kubernetes_utils.get_current_kube_config_context_name())
382
386
  k8s = kubernetes.kubernetes
383
387
  with open(public_key_path, 'r', encoding='utf-8') as f:
384
388
  public_key = f.read()
@@ -399,14 +403,14 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
399
403
  secret = k8s.client.V1Secret(
400
404
  metadata=k8s.client.V1ObjectMeta(**secret_metadata),
401
405
  string_data={secret_field_name: public_key})
402
- if kubernetes_utils.check_secret_exists(secret_name, namespace):
406
+ if kubernetes_utils.check_secret_exists(secret_name, namespace, context):
403
407
  logger.debug(f'Key {secret_name} exists in the cluster, patching it...')
404
- kubernetes.core_api().patch_namespaced_secret(secret_name, namespace,
405
- secret)
408
+ kubernetes.core_api(context).patch_namespaced_secret(
409
+ secret_name, namespace, secret)
406
410
  else:
407
411
  logger.debug(
408
412
  f'Key {secret_name} does not exist in the cluster, creating it...')
409
- kubernetes.core_api().create_namespaced_secret(namespace, secret)
413
+ kubernetes.core_api(context).create_namespaced_secret(namespace, secret)
410
414
 
411
415
  private_key_path, _ = get_or_generate_keys()
412
416
  if network_mode == nodeport_mode:
@@ -415,13 +419,14 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
415
419
  # Setup service for SSH jump pod. We create the SSH jump service here
416
420
  # because we need to know the service IP address and port to set the
417
421
  # ssh_proxy_command in the autoscaler config.
418
- kubernetes_utils.setup_ssh_jump_svc(ssh_jump_name, namespace,
422
+ kubernetes_utils.setup_ssh_jump_svc(ssh_jump_name, namespace, context,
419
423
  service_type)
420
424
  ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
421
425
  ssh_jump_name,
422
426
  nodeport_mode,
423
427
  private_key_path=private_key_path,
424
- namespace=namespace)
428
+ namespace=namespace,
429
+ context=context)
425
430
  elif network_mode == port_forward_mode:
426
431
  # Using `kubectl port-forward` creates a direct tunnel to the pod and
427
432
  # does not require a ssh jump pod.
@@ -1558,58 +1558,65 @@ def check_owner_identity(cluster_name: str) -> None:
1558
1558
  return
1559
1559
 
1560
1560
  cloud = handle.launched_resources.cloud
1561
- current_user_identity = cloud.get_current_user_identity()
1561
+ user_identities = cloud.get_user_identities()
1562
1562
  owner_identity = record['owner']
1563
- if current_user_identity is None:
1563
+ if user_identities is None:
1564
1564
  # Skip the check if the cloud does not support user identity.
1565
1565
  return
1566
1566
  # The user identity can be None, if the cluster is created by an older
1567
1567
  # version of SkyPilot. In that case, we set the user identity to the
1568
- # current one.
1568
+ # current active one.
1569
1569
  # NOTE: a user who upgrades SkyPilot and switches to a new cloud identity
1570
1570
  # immediately without `sky status --refresh` first, will cause a leakage
1571
1571
  # of the existing cluster. We deem this an acceptable tradeoff mainly
1572
1572
  # because multi-identity is not common (at least at the moment).
1573
1573
  if owner_identity is None:
1574
1574
  global_user_state.set_owner_identity_for_cluster(
1575
- cluster_name, current_user_identity)
1575
+ cluster_name, user_identities[0])
1576
1576
  else:
1577
1577
  assert isinstance(owner_identity, list)
1578
1578
  # It is OK if the owner identity is shorter, which will happen when
1579
1579
  # the cluster is launched before #1808. In that case, we only check
1580
1580
  # the same length (zip will stop at the shorter one).
1581
- for i, (owner,
1582
- current) in enumerate(zip(owner_identity,
1583
- current_user_identity)):
1584
- # Clean up the owner identity for the backslash and newlines, caused
1585
- # by the cloud CLI output, e.g. gcloud.
1586
- owner = owner.replace('\n', '').replace('\\', '')
1587
- if owner == current:
1588
- if i != 0:
1589
- logger.warning(
1590
- f'The cluster was owned by {owner_identity}, but '
1591
- f'a new identity {current_user_identity} is activated. We still '
1592
- 'allow the operation as the two identities are likely to have '
1593
- 'the same access to the cluster. Please be aware that this can '
1594
- 'cause unexpected cluster leakage if the two identities are not '
1595
- 'actually equivalent (e.g., belong to the same person).'
1596
- )
1597
- if i != 0 or len(owner_identity) != len(current_user_identity):
1598
- # We update the owner of a cluster, when:
1599
- # 1. The strictest identty (i.e. the first one) does not
1600
- # match, but the latter ones match.
1601
- # 2. The length of the two identities are different, which
1602
- # will only happen when the cluster is launched before #1808.
1603
- # Update the user identity to avoid showing the warning above
1604
- # again.
1605
- global_user_state.set_owner_identity_for_cluster(
1606
- cluster_name, current_user_identity)
1607
- return # The user identity matches.
1581
+ for identity in user_identities:
1582
+ for i, (owner, current) in enumerate(zip(owner_identity, identity)):
1583
+ # Clean up the owner identity for the backslash and newlines, caused
1584
+ # by the cloud CLI output, e.g. gcloud.
1585
+ owner = owner.replace('\n', '').replace('\\', '')
1586
+ if owner == current:
1587
+ if i != 0:
1588
+ logger.warning(
1589
+ f'The cluster was owned by {owner_identity}, but '
1590
+ f'a new identity {identity} is activated. We still '
1591
+ 'allow the operation as the two identities are '
1592
+ 'likely to have the same access to the cluster. '
1593
+ 'Please be aware that this can cause unexpected '
1594
+ 'cluster leakage if the two identities are not '
1595
+ 'actually equivalent (e.g., belong to the same '
1596
+ 'person).')
1597
+ if i != 0 or len(owner_identity) != len(identity):
1598
+ # We update the owner of a cluster, when:
1599
+ # 1. The strictest identty (i.e. the first one) does not
1600
+ # match, but the latter ones match.
1601
+ # 2. The length of the two identities are different,
1602
+ # which will only happen when the cluster is launched
1603
+ # before #1808. Update the user identity to avoid
1604
+ # showing the warning above again.
1605
+ global_user_state.set_owner_identity_for_cluster(
1606
+ cluster_name, identity)
1607
+ return # The user identity matches.
1608
+ # Generate error message if no match found
1609
+ if len(user_identities) == 1:
1610
+ err_msg = f'the activated identity is {user_identities[0]!r}.'
1611
+ else:
1612
+ err_msg = (f'available identities are {user_identities!r}.')
1613
+ if cloud.is_same_cloud(clouds.Kubernetes()):
1614
+ err_msg += (' Check your kubeconfig file and make sure the '
1615
+ 'correct context is available.')
1608
1616
  with ux_utils.print_exception_no_traceback():
1609
1617
  raise exceptions.ClusterOwnerIdentityMismatchError(
1610
1618
  f'{cluster_name!r} ({cloud}) is owned by account '
1611
- f'{owner_identity!r}, but the activated account '
1612
- f'is {current_user_identity!r}.')
1619
+ f'{owner_identity!r}, but ' + err_msg)
1613
1620
 
1614
1621
 
1615
1622
  def tag_filter_for_cluster(cluster_name: str) -> Dict[str, str]:
@@ -1945,7 +1945,7 @@ class RetryingVmProvisioner(object):
1945
1945
  if dryrun:
1946
1946
  cloud_user = None
1947
1947
  else:
1948
- cloud_user = to_provision.cloud.get_current_user_identity()
1948
+ cloud_user = to_provision.cloud.get_active_user_identity()
1949
1949
 
1950
1950
  requested_features = self._requested_features.copy()
1951
1951
  # Skip stop feature for Kubernetes and RunPod controllers.
sky/check.py CHANGED
@@ -44,7 +44,7 @@ def check(
44
44
  if ok:
45
45
  enabled_clouds.append(cloud_repr)
46
46
  if verbose and cloud is not cloudflare:
47
- activated_account = cloud.get_current_user_identity_str()
47
+ activated_account = cloud.get_active_user_identity_str()
48
48
  if activated_account is not None:
49
49
  echo(f' Activated account: {activated_account}')
50
50
  if reason is not None:
sky/clouds/aws.py CHANGED
@@ -547,7 +547,7 @@ class AWS(clouds.Cloud):
547
547
  # Checks if AWS credentials 1) exist and 2) are valid.
548
548
  # https://stackoverflow.com/questions/53548737/verify-aws-credentials-with-boto3
549
549
  try:
550
- identity_str = cls.get_current_user_identity_str()
550
+ identity_str = cls.get_active_user_identity_str()
551
551
  except exceptions.CloudUserIdentityError as e:
552
552
  return False, str(e)
553
553
 
@@ -584,7 +584,7 @@ class AWS(clouds.Cloud):
584
584
  else:
585
585
  # This file is required because it is required by the VMs launched on
586
586
  # other clouds to access private s3 buckets and resources like EC2.
587
- # `get_current_user_identity` does not guarantee this file exists.
587
+ # `get_active_user_identity` does not guarantee this file exists.
588
588
  if not static_credential_exists:
589
589
  return (False, '~/.aws/credentials does not exist. ' +
590
590
  cls._STATIC_CREDENTIAL_HELP_STR)
@@ -648,7 +648,7 @@ class AWS(clouds.Cloud):
648
648
  return AWSIdentityType.SHARED_CREDENTIALS_FILE
649
649
 
650
650
  @classmethod
651
- def get_current_user_identity(cls) -> Optional[List[str]]:
651
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
652
652
  """Returns a [UserId, Account] list that uniquely identifies the user.
653
653
 
654
654
  These fields come from `aws sts get-caller-identity`. We permit the same
@@ -752,11 +752,13 @@ class AWS(clouds.Cloud):
752
752
  f'Failed to get AWS user.\n'
753
753
  f' Reason: {common_utils.format_exception(e, use_bracket=True)}.'
754
754
  ) from None
755
- return user_ids
755
+ # TODO: Return a list of identities in the profile when we support
756
+ # automatic switching for AWS. Currently we only support one identity.
757
+ return [user_ids]
756
758
 
757
759
  @classmethod
758
- def get_current_user_identity_str(cls) -> Optional[str]:
759
- user_identity = cls.get_current_user_identity()
760
+ def get_active_user_identity_str(cls) -> Optional[str]:
761
+ user_identity = cls.get_active_user_identity()
760
762
  if user_identity is None:
761
763
  return None
762
764
  identity_str = f'{user_identity[0]} [account={user_identity[1]}]'
@@ -856,6 +858,12 @@ class AWS(clouds.Cloud):
856
858
  # Quota code not found in the catalog for the chosen instance_type, try provisioning anyway
857
859
  return True
858
860
 
861
+ if aws_utils.use_reservations():
862
+ # When reservations are used, it is possible that a user has
863
+ # reservations for an instance type, but does not have the quota
864
+ # for that instance type. Skipping the quota check in this case.
865
+ return True
866
+
859
867
  client = aws.client('service-quotas', region_name=region)
860
868
  try:
861
869
  response = client.get_service_quota(ServiceCode='ec2',
sky/clouds/azure.py CHANGED
@@ -483,7 +483,7 @@ class Azure(clouds.Cloud):
483
483
  # If Azure is properly logged in, this will return the account email
484
484
  # address + subscription ID.
485
485
  try:
486
- cls.get_current_user_identity()
486
+ cls.get_active_user_identity()
487
487
  except exceptions.CloudUserIdentityError as e:
488
488
  return False, (f'Getting user\'s Azure identity failed.{help_str}\n'
489
489
  f'{cls._INDENT_PREFIX}Details: '
@@ -516,7 +516,7 @@ class Azure(clouds.Cloud):
516
516
 
517
517
  @classmethod
518
518
  @functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
519
- def get_current_user_identity(cls) -> Optional[List[str]]:
519
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
520
520
  """Returns the cloud user identity."""
521
521
  # This returns the user's email address + [subscription_id].
522
522
  retry_cnt = 0
@@ -558,11 +558,13 @@ class Azure(clouds.Cloud):
558
558
  with ux_utils.print_exception_no_traceback():
559
559
  raise exceptions.CloudUserIdentityError(
560
560
  'Failed to get Azure project ID.') from e
561
- return [f'{account_email} [subscription_id={project_id}]']
561
+ # TODO: Return a list of identities in the profile when we support
562
+ # automatic switching for Az. Currently we only support one identity.
563
+ return [[f'{account_email} [subscription_id={project_id}]']]
562
564
 
563
565
  @classmethod
564
- def get_current_user_identity_str(cls) -> Optional[str]:
565
- user_identity = cls.get_current_user_identity()
566
+ def get_active_user_identity_str(cls) -> Optional[str]:
567
+ user_identity = cls.get_active_user_identity()
566
568
  if user_identity is None:
567
569
  return None
568
570
  return user_identity[0]
sky/clouds/cloud.py CHANGED
@@ -441,11 +441,11 @@ class Cloud:
441
441
 
442
442
  # TODO(zhwu): Make the return type immutable.
443
443
  @classmethod
444
- def get_current_user_identity(cls) -> Optional[List[str]]:
445
- """(Advanced) Returns currently active user identity of this cloud.
444
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
445
+ """(Advanced) Returns all available user identities of this cloud.
446
446
 
447
447
  The user "identity" is associated with each SkyPilot cluster they
448
- creates. This is used in protecting cluster operations, such as
448
+ create. This is used in protecting cluster operations, such as
449
449
  provision, teardown and status refreshing, in a multi-identity
450
450
  scenario, where the same user/device can switch between different
451
451
  cloud identities. We check that the user identity matches before:
@@ -453,10 +453,16 @@ class Cloud:
453
453
  - Stopping/tearing down a cluster
454
454
  - Refreshing the status of a cluster
455
455
 
456
- Design choice: we allow the operations that can correctly work with
457
- a different user identity, as a user should have full control over
458
- all their clusters (no matter which identity it belongs to), e.g.,
459
- submitting jobs, viewing logs, auto-stopping, etc.
456
+ Design choices:
457
+ 1. We allow the operations that can correctly work with a different
458
+ user identity, as a user should have full control over all their
459
+ clusters (no matter which identity it belongs to), e.g.,
460
+ submitting jobs, viewing logs, auto-stopping, etc.
461
+ 2. A cloud implementation can optionally switch between different
462
+ identities if required for cluster operations. In this case,
463
+ the cloud implementation should return multiple identities
464
+ as a list. E.g., our Kubernetes implementation can use multiple
465
+ kubeconfig contexts to switch between different identities.
460
466
 
461
467
  The choice of what constitutes an identity is up to each cloud's
462
468
  implementation. In general, to suffice for the above purposes,
@@ -464,24 +470,34 @@ class Cloud:
464
470
  resources are used when the user invoked each cloud's default
465
471
  CLI/API.
466
472
 
467
- The returned identity is a list of strings. The list is in the order of
473
+ An identity is a list of strings. The list is in the order of
468
474
  strictness, i.e., the first element is the most strict identity, and
469
475
  the last element is the least strict identity.
470
476
  When performing an identity check between the current active identity
471
477
  and the owner identity associated with a cluster, we compare the two
472
478
  lists in order: if a position does not match, we go to the next. To
473
- see an example, see the docstring of the AWS.get_current_user_identity.
474
-
479
+ see an example, see the docstring of the AWS.get_user_identities.
475
480
 
476
481
  Example identities (see cloud implementations):
477
482
  - AWS: [UserId, AccountId]
478
483
  - GCP: [email address + project ID]
479
484
  - Azure: [email address + subscription ID]
485
+ - Kubernetes: [context name]
486
+
487
+ Example return values:
488
+ - AWS: [[UserId, AccountId]]
489
+ - GCP: [[email address + project ID]]
490
+ - Azure: [[email address + subscription ID]]
491
+ - Kubernetes: [[current active context], [context 2], ...]
480
492
 
481
493
  Returns:
482
494
  None if the cloud does not have a concept of user identity
483
495
  (access protection will be disabled for these clusters);
484
- otherwise the currently active user identity.
496
+ otherwise a list of available identities with the current active
497
+ identity being the first element. Most clouds have only one identity
498
+ available, so the returned list will only have one element: the
499
+ current active identity.
500
+
485
501
  Raises:
486
502
  exceptions.CloudUserIdentityError: If the user identity cannot be
487
503
  retrieved.
@@ -489,13 +505,26 @@ class Cloud:
489
505
  return None
490
506
 
491
507
  @classmethod
492
- def get_current_user_identity_str(cls) -> Optional[str]:
493
- """Returns a user friendly representation of the current identity."""
494
- user_identity = cls.get_current_user_identity()
508
+ def get_active_user_identity_str(cls) -> Optional[str]:
509
+ """Returns a user friendly representation of the active identity."""
510
+ user_identity = cls.get_active_user_identity()
495
511
  if user_identity is None:
496
512
  return None
497
513
  return ', '.join(user_identity)
498
514
 
515
+ @classmethod
516
+ def get_active_user_identity(cls) -> Optional[List[str]]:
517
+ """Returns currently active user identity of this cloud
518
+
519
+ See get_user_identities for definition of user identity.
520
+
521
+ Returns:
522
+ None if the cloud does not have a concept of user identity;
523
+ otherwise the current active identity.
524
+ """
525
+ identities = cls.get_user_identities()
526
+ return identities[0] if identities is not None else None
527
+
499
528
  def get_credential_file_mounts(self) -> Dict[str, str]:
500
529
  """Returns the files necessary to access this cloud.
501
530
 
sky/clouds/cudo.py CHANGED
@@ -328,7 +328,7 @@ class Cudo(clouds.Cloud):
328
328
  }
329
329
 
330
330
  @classmethod
331
- def get_current_user_identity(cls) -> Optional[List[str]]:
331
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
332
332
  # NOTE: used for very advanced SkyPilot functionality
333
333
  # Can implement later if desired
334
334
  return None
sky/clouds/fluidstack.py CHANGED
@@ -291,8 +291,8 @@ class Fluidstack(clouds.Cloud):
291
291
  return {filename: filename for filename in _CREDENTIAL_FILES}
292
292
 
293
293
  @classmethod
294
- def get_current_user_identity(cls) -> Optional[List[str]]:
295
- # TODO(mjibril): Implement get_current_user_identity for Fluidstack
294
+ def get_user_identities(cls) -> Optional[List[List[str]]]:
295
+ # TODO(mjibril): Implement get_active_user_identity for Fluidstack
296
296
  return None
297
297
 
298
298
  def instance_type_exists(self, instance_type: str) -> bool:
sky/clouds/gcp.py CHANGED
@@ -715,7 +715,7 @@ class GCP(clouds.Cloud):
715
715
  project_id = cls.get_project_id()
716
716
 
717
717
  # Check if the user is activated.
718
- identity = cls.get_current_user_identity()
718
+ identity = cls.get_active_user_identity()
719
719
  except (auth.exceptions.DefaultCredentialsError,
720
720
  exceptions.CloudUserIdentityError) as e:
721
721
  # See also: https://stackoverflow.com/a/53307505/1165051
@@ -826,16 +826,19 @@ class GCP(clouds.Cloud):
826
826
  @classmethod
827
827
  def _get_identity_type(cls) -> Optional[GCPIdentityType]:
828
828
  try:
829
- account = cls.get_current_user_identity()[0]
829
+ account = cls.get_active_user_identity()
830
830
  except exceptions.CloudUserIdentityError:
831
831
  return None
832
- if GCPIdentityType.SERVICE_ACCOUNT.value in account:
832
+ if account is None:
833
+ return None
834
+ assert account is not None
835
+ if GCPIdentityType.SERVICE_ACCOUNT.value in account[0]:
833
836
  return GCPIdentityType.SERVICE_ACCOUNT
834
837
  return GCPIdentityType.SHARED_CREDENTIALS_FILE
835
838
 
836
839
  @classmethod
837
840
  @functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
838
- def get_current_user_identity(cls) -> List[str]:
841
+ def get_user_identities(cls) -> List[List[str]]:
839
842
  """Returns the email address + project id of the active user."""
840
843
  try:
841
844
  account = _run_output('gcloud auth list --filter=status:ACTIVE '
@@ -866,11 +869,13 @@ class GCP(clouds.Cloud):
866
869
  ' Reason: '
867
870
  f'{common_utils.format_exception(e, use_bracket=True)}'
868
871
  ) from e
869
- return [f'{account} [project_id={project_id}]']
872
+ # TODO: Return a list of identities in the profile when we support
873
+ # automatic switching for GCP. Currently we only support one identity.
874
+ return [[f'{account} [project_id={project_id}]']]
870
875
 
871
876
  @classmethod
872
- def get_current_user_identity_str(cls) -> Optional[str]:
873
- user_identity = cls.get_current_user_identity()
877
+ def get_active_user_identity_str(cls) -> Optional[str]:
878
+ user_identity = cls.get_active_user_identity()
874
879
  if user_identity is None:
875
880
  return None
876
881
  return user_identity[0].replace('\n', '')