skypilot-nightly 1.0.0.dev20250318__py3-none-any.whl → 1.0.0.dev20250320__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/cloudflare.py +4 -0
- sky/check.py +156 -53
- sky/clouds/aws.py +5 -0
- sky/clouds/azure.py +5 -0
- sky/clouds/cloud.py +12 -0
- sky/clouds/gcp.py +55 -33
- sky/clouds/ibm.py +5 -0
- sky/clouds/oci.py +5 -0
- sky/clouds/utils/gcp_utils.py +11 -1
- sky/core.py +3 -1
- sky/data/storage.py +7 -9
- sky/execution.py +6 -1
- sky/global_user_state.py +30 -0
- sky/optimizer.py +10 -5
- sky/provision/gcp/config.py +3 -3
- sky/provision/gcp/constants.py +16 -2
- sky/provision/gcp/instance.py +4 -1
- sky/provision/kubernetes/utils.py +37 -24
- sky/serve/replica_managers.py +10 -1
- sky/server/requests/executor.py +33 -19
- sky/server/server.py +4 -1
- sky/utils/controller_utils.py +7 -1
- sky/utils/kubernetes/kubernetes_deploy_utils.py +3 -1
- sky/utils/subprocess_utils.py +47 -25
- {skypilot_nightly-1.0.0.dev20250318.dist-info → skypilot_nightly-1.0.0.dev20250320.dist-info}/METADATA +3 -2
- {skypilot_nightly-1.0.0.dev20250318.dist-info → skypilot_nightly-1.0.0.dev20250320.dist-info}/RECORD +31 -31
- {skypilot_nightly-1.0.0.dev20250318.dist-info → skypilot_nightly-1.0.0.dev20250320.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250318.dist-info → skypilot_nightly-1.0.0.dev20250320.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250318.dist-info → skypilot_nightly-1.0.0.dev20250320.dist-info/licenses}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250318.dist-info → skypilot_nightly-1.0.0.dev20250320.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'a480f342522afcd17a3b30a20086f28333ddb7b5'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250320'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/cloudflare.py
CHANGED
@@ -149,6 +149,10 @@ def create_endpoint():
|
|
149
149
|
|
150
150
|
|
151
151
|
def check_credentials() -> Tuple[bool, Optional[str]]:
|
152
|
+
return check_storage_credentials()
|
153
|
+
|
154
|
+
|
155
|
+
def check_storage_credentials() -> Tuple[bool, Optional[str]]:
|
152
156
|
"""Checks if the user has access credentials to Cloudflare R2.
|
153
157
|
|
154
158
|
Returns:
|
sky/check.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
"""Credential checks: check cloud credentials and enable clouds."""
|
2
|
+
import enum
|
2
3
|
import os
|
3
4
|
import traceback
|
4
5
|
from types import ModuleType
|
5
|
-
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
6
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
6
7
|
|
7
8
|
import click
|
8
9
|
import colorama
|
@@ -20,44 +21,91 @@ CHECK_MARK_EMOJI = '\U00002714' # Heavy check mark unicode
|
|
20
21
|
PARTY_POPPER_EMOJI = '\U0001F389' # Party popper unicode
|
21
22
|
|
22
23
|
|
23
|
-
|
24
|
+
# Declaring CloudCapability as a subclass of str
|
25
|
+
# allows it to be JSON serializable.
|
26
|
+
class CloudCapability(str, enum.Enum):
|
27
|
+
# Compute capability.
|
28
|
+
COMPUTE = 'compute'
|
29
|
+
# Storage capability.
|
30
|
+
STORAGE = 'storage'
|
31
|
+
|
32
|
+
|
33
|
+
ALL_CAPABILITIES = [CloudCapability.COMPUTE, CloudCapability.STORAGE]
|
34
|
+
|
35
|
+
|
36
|
+
def check_capabilities(
|
24
37
|
quiet: bool = False,
|
25
38
|
verbose: bool = False,
|
26
39
|
clouds: Optional[Iterable[str]] = None,
|
27
|
-
|
40
|
+
capabilities: Optional[List[CloudCapability]] = None,
|
41
|
+
) -> Dict[str, List[CloudCapability]]:
|
28
42
|
echo = (lambda *_args, **_kwargs: None
|
29
43
|
) if quiet else lambda *args, **kwargs: click.echo(
|
30
44
|
*args, **kwargs, color=True)
|
31
45
|
echo('Checking credentials to enable clouds for SkyPilot.')
|
32
|
-
|
33
|
-
|
46
|
+
if capabilities is None:
|
47
|
+
capabilities = ALL_CAPABILITIES
|
48
|
+
assert capabilities is not None
|
49
|
+
enabled_clouds: Dict[str, List[CloudCapability]] = {}
|
50
|
+
disabled_clouds: Dict[str, List[CloudCapability]] = {}
|
51
|
+
|
52
|
+
def check_credentials(
|
53
|
+
cloud: Union[sky_clouds.Cloud, ModuleType],
|
54
|
+
capability: CloudCapability) -> Tuple[bool, Optional[str]]:
|
55
|
+
if capability == CloudCapability.COMPUTE:
|
56
|
+
return cloud.check_credentials()
|
57
|
+
elif capability == CloudCapability.STORAGE:
|
58
|
+
return cloud.check_storage_credentials()
|
59
|
+
else:
|
60
|
+
raise ValueError(f'Invalid capability: {capability}')
|
61
|
+
|
62
|
+
def get_cached_state(capability: CloudCapability) -> List[sky_clouds.Cloud]:
|
63
|
+
if capability == CloudCapability.COMPUTE:
|
64
|
+
return global_user_state.get_cached_enabled_clouds()
|
65
|
+
elif capability == CloudCapability.STORAGE:
|
66
|
+
return global_user_state.get_cached_enabled_storage_clouds()
|
67
|
+
else:
|
68
|
+
raise ValueError(f'Invalid capability: {capability}')
|
69
|
+
|
70
|
+
def set_cached_state(clouds: List[str],
|
71
|
+
capability: CloudCapability) -> None:
|
72
|
+
if capability == CloudCapability.COMPUTE:
|
73
|
+
global_user_state.set_enabled_clouds(clouds)
|
74
|
+
elif capability == CloudCapability.STORAGE:
|
75
|
+
global_user_state.set_enabled_storage_clouds(clouds)
|
76
|
+
else:
|
77
|
+
raise ValueError(f'Invalid capability: {capability}')
|
34
78
|
|
35
79
|
def check_one_cloud(
|
36
80
|
cloud_tuple: Tuple[str, Union[sky_clouds.Cloud,
|
37
81
|
ModuleType]]) -> None:
|
38
82
|
cloud_repr, cloud = cloud_tuple
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
83
|
+
assert capabilities is not None
|
84
|
+
for capability in capabilities:
|
85
|
+
with rich_utils.safe_status(f'Checking {cloud_repr}...'):
|
86
|
+
try:
|
87
|
+
ok, reason = check_credentials(cloud, capability)
|
88
|
+
except exceptions.NotSupportedError:
|
89
|
+
continue
|
90
|
+
except Exception: # pylint: disable=broad-except
|
91
|
+
# Catch all exceptions to prevent a single cloud
|
92
|
+
# from blocking the check for other clouds.
|
93
|
+
ok, reason = False, traceback.format_exc()
|
94
|
+
status_msg = ('enabled' if ok else 'disabled')
|
95
|
+
styles = {'fg': 'green', 'bold': False} if ok else {'dim': True}
|
96
|
+
echo(' ' + click.style(f'{cloud_repr}: {status_msg}', **styles) +
|
97
|
+
' ' * 30)
|
98
|
+
if ok:
|
99
|
+
enabled_clouds.setdefault(cloud_repr, []).append(capability)
|
100
|
+
if verbose and cloud is not cloudflare:
|
101
|
+
activated_account = cloud.get_active_user_identity_str()
|
102
|
+
if activated_account is not None:
|
103
|
+
echo(f' Activated account: {activated_account}')
|
104
|
+
if reason is not None:
|
105
|
+
echo(f' Hint: {reason}')
|
106
|
+
else:
|
107
|
+
disabled_clouds.setdefault(cloud_repr, []).append(capability)
|
108
|
+
echo(f' Reason: {reason}')
|
61
109
|
|
62
110
|
def get_cloud_tuple(
|
63
111
|
cloud_name: str) -> Tuple[str, Union[sky_clouds.Cloud, ModuleType]]:
|
@@ -99,33 +147,37 @@ def check(
|
|
99
147
|
for cloud_tuple in sorted(clouds_to_check):
|
100
148
|
check_one_cloud(cloud_tuple)
|
101
149
|
|
102
|
-
# Cloudflare is not a real cloud in registry.CLOUD_REGISTRY, and should
|
103
|
-
# not be inserted into the DB (otherwise `sky launch` and other code would
|
104
|
-
# error out when it's trying to look it up in the registry).
|
105
|
-
enabled_clouds_set = {
|
106
|
-
cloud for cloud in enabled_clouds if not cloud.startswith('Cloudflare')
|
107
|
-
}
|
108
|
-
disabled_clouds_set = {
|
109
|
-
cloud for cloud in disabled_clouds if not cloud.startswith('Cloudflare')
|
110
|
-
}
|
111
|
-
config_allowed_clouds_set = {
|
112
|
-
cloud for cloud in config_allowed_cloud_names
|
113
|
-
if not cloud.startswith('Cloudflare')
|
114
|
-
}
|
115
|
-
previously_enabled_clouds_set = {
|
116
|
-
repr(cloud) for cloud in global_user_state.get_cached_enabled_clouds()
|
117
|
-
}
|
118
|
-
|
119
150
|
# Determine the set of enabled clouds: (previously enabled clouds + newly
|
120
151
|
# enabled clouds - newly disabled clouds) intersected with
|
121
152
|
# config_allowed_clouds, if specified in config.yaml.
|
122
153
|
# This means that if a cloud is already enabled and is not included in
|
123
154
|
# allowed_clouds in config.yaml, it will be disabled.
|
124
|
-
all_enabled_clouds = (
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
155
|
+
all_enabled_clouds: Set[str] = set()
|
156
|
+
for capability in capabilities:
|
157
|
+
# Cloudflare is not a real cloud in registry.CLOUD_REGISTRY, and should
|
158
|
+
# not be inserted into the DB (otherwise `sky launch` and other code
|
159
|
+
# would error out when it's trying to look it up in the registry).
|
160
|
+
enabled_clouds_set = {
|
161
|
+
cloud for cloud, capabilities in enabled_clouds.items()
|
162
|
+
if capability in capabilities and not cloud.startswith('Cloudflare')
|
163
|
+
}
|
164
|
+
disabled_clouds_set = {
|
165
|
+
cloud for cloud, capabilities in disabled_clouds.items()
|
166
|
+
if capability in capabilities and not cloud.startswith('Cloudflare')
|
167
|
+
}
|
168
|
+
config_allowed_clouds_set = {
|
169
|
+
cloud for cloud in config_allowed_cloud_names
|
170
|
+
if not cloud.startswith('Cloudflare')
|
171
|
+
}
|
172
|
+
previously_enabled_clouds_set = {
|
173
|
+
repr(cloud) for cloud in get_cached_state(capability)
|
174
|
+
}
|
175
|
+
enabled_clouds_for_capability = (config_allowed_clouds_set & (
|
176
|
+
(previously_enabled_clouds_set | enabled_clouds_set) -
|
177
|
+
disabled_clouds_set))
|
178
|
+
set_cached_state(list(enabled_clouds_for_capability), capability)
|
179
|
+
all_enabled_clouds = all_enabled_clouds.union(
|
180
|
+
enabled_clouds_for_capability)
|
129
181
|
disallowed_clouds_hint = None
|
130
182
|
if disallowed_cloud_names:
|
131
183
|
disallowed_clouds_hint = (
|
@@ -160,8 +212,7 @@ def check(
|
|
160
212
|
# Pretty print for UX.
|
161
213
|
if not quiet:
|
162
214
|
enabled_clouds_str = '\n ' + '\n '.join([
|
163
|
-
_format_enabled_cloud(cloud)
|
164
|
-
for cloud in sorted(all_enabled_clouds)
|
215
|
+
_format_enabled_cloud(cloud) for cloud in sorted(enabled_clouds)
|
165
216
|
])
|
166
217
|
echo(f'\n{colorama.Fore.GREEN}{PARTY_POPPER_EMOJI} '
|
167
218
|
f'Enabled clouds {PARTY_POPPER_EMOJI}'
|
@@ -169,6 +220,23 @@ def check(
|
|
169
220
|
return enabled_clouds
|
170
221
|
|
171
222
|
|
223
|
+
# 'sky check' command and associated '/check' server endpoint
|
224
|
+
# only checks compute capability for backward compatibility.
|
225
|
+
# This necessitates setting default capability to CloudCapability.COMPUTE.
|
226
|
+
def check(
|
227
|
+
quiet: bool = False,
|
228
|
+
verbose: bool = False,
|
229
|
+
clouds: Optional[Iterable[str]] = None,
|
230
|
+
capability: CloudCapability = CloudCapability.COMPUTE,
|
231
|
+
) -> List[str]:
|
232
|
+
clouds_with_capability = []
|
233
|
+
enabled_clouds = check_capabilities(quiet, verbose, clouds, [capability])
|
234
|
+
for cloud, capabilities in enabled_clouds.items():
|
235
|
+
if capability in capabilities:
|
236
|
+
clouds_with_capability.append(cloud)
|
237
|
+
return clouds_with_capability
|
238
|
+
|
239
|
+
|
172
240
|
def get_cached_enabled_clouds_or_refresh(
|
173
241
|
raise_if_no_cloud_access: bool = False) -> List[sky_clouds.Cloud]:
|
174
242
|
"""Returns cached enabled clouds and if no cloud is enabled, refresh.
|
@@ -186,7 +254,7 @@ def get_cached_enabled_clouds_or_refresh(
|
|
186
254
|
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds()
|
187
255
|
if not cached_enabled_clouds:
|
188
256
|
try:
|
189
|
-
check(quiet=True)
|
257
|
+
check(quiet=True, capability=CloudCapability.COMPUTE)
|
190
258
|
except SystemExit:
|
191
259
|
# If no cloud is enabled, check() will raise SystemExit.
|
192
260
|
# Here we catch it and raise the exception later only if
|
@@ -201,6 +269,41 @@ def get_cached_enabled_clouds_or_refresh(
|
|
201
269
|
return cached_enabled_clouds
|
202
270
|
|
203
271
|
|
272
|
+
def get_cached_enabled_storage_clouds_or_refresh(
|
273
|
+
raise_if_no_cloud_access: bool = False) -> List[sky_clouds.Cloud]:
|
274
|
+
"""Returns cached enabled storage clouds and if no cloud is enabled,
|
275
|
+
refresh.
|
276
|
+
|
277
|
+
This function will perform a refresh if no public cloud is enabled.
|
278
|
+
|
279
|
+
Args:
|
280
|
+
raise_if_no_cloud_access: if True, raise an exception if no public
|
281
|
+
cloud is enabled.
|
282
|
+
|
283
|
+
Raises:
|
284
|
+
exceptions.NoCloudAccessError: if no public cloud is enabled and
|
285
|
+
raise_if_no_cloud_access is set to True.
|
286
|
+
"""
|
287
|
+
cached_enabled_storage_clouds = (
|
288
|
+
global_user_state.get_cached_enabled_storage_clouds())
|
289
|
+
if not cached_enabled_storage_clouds:
|
290
|
+
try:
|
291
|
+
check(quiet=True, capability=CloudCapability.STORAGE)
|
292
|
+
except SystemExit:
|
293
|
+
# If no cloud is enabled, check() will raise SystemExit.
|
294
|
+
# Here we catch it and raise the exception later only if
|
295
|
+
# raise_if_no_cloud_access is set to True.
|
296
|
+
pass
|
297
|
+
cached_enabled_storage_clouds = (
|
298
|
+
global_user_state.get_cached_enabled_storage_clouds())
|
299
|
+
if raise_if_no_cloud_access and not cached_enabled_storage_clouds:
|
300
|
+
with ux_utils.print_exception_no_traceback():
|
301
|
+
raise exceptions.NoCloudAccessError(
|
302
|
+
'Cloud access is not set up. Run: '
|
303
|
+
f'{colorama.Style.BRIGHT}sky check{colorama.Style.RESET_ALL}')
|
304
|
+
return cached_enabled_storage_clouds
|
305
|
+
|
306
|
+
|
204
307
|
def get_cloud_credential_file_mounts(
|
205
308
|
excluded_clouds: Optional[Iterable[sky_clouds.Cloud]]
|
206
309
|
) -> Dict[str, str]:
|
@@ -226,7 +329,7 @@ def get_cloud_credential_file_mounts(
|
|
226
329
|
# Currently, get_cached_enabled_clouds_or_refresh() does not support r2 as
|
227
330
|
# only clouds with computing instances are marked as enabled by skypilot.
|
228
331
|
# This will be removed when cloudflare/r2 is added as a 'cloud'.
|
229
|
-
r2_is_enabled, _ = cloudflare.
|
332
|
+
r2_is_enabled, _ = cloudflare.check_storage_credentials()
|
230
333
|
if r2_is_enabled:
|
231
334
|
r2_credential_mounts = cloudflare.get_credential_file_mounts()
|
232
335
|
file_mounts.update(r2_credential_mounts)
|
sky/clouds/aws.py
CHANGED
@@ -666,6 +666,11 @@ class AWS(clouds.Cloud):
|
|
666
666
|
f'{common_utils.format_exception(e, use_bracket=True)}')
|
667
667
|
return True, hints
|
668
668
|
|
669
|
+
@classmethod
|
670
|
+
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
671
|
+
# TODO(seungjin): Check if the user has access to S3.
|
672
|
+
return cls.check_credentials()
|
673
|
+
|
669
674
|
@classmethod
|
670
675
|
def _current_identity_type(cls) -> Optional[AWSIdentityType]:
|
671
676
|
stdout = cls._aws_configure_list()
|
sky/clouds/azure.py
CHANGED
@@ -574,6 +574,11 @@ class Azure(clouds.Cloud):
|
|
574
574
|
return service_catalog.instance_type_exists(instance_type,
|
575
575
|
clouds='azure')
|
576
576
|
|
577
|
+
@classmethod
|
578
|
+
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
579
|
+
# TODO(seungjin): Check if the user has access to Azure Blob Storage.
|
580
|
+
return cls.check_credentials()
|
581
|
+
|
577
582
|
@classmethod
|
578
583
|
@annotations.lru_cache(scope='global',
|
579
584
|
maxsize=1) # Cache since getting identity is slow.
|
sky/clouds/cloud.py
CHANGED
@@ -443,6 +443,18 @@ class Cloud:
|
|
443
443
|
"""
|
444
444
|
raise NotImplementedError
|
445
445
|
|
446
|
+
@classmethod
|
447
|
+
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
448
|
+
"""Checks if the user has access credentials to this cloud's storage.
|
449
|
+
|
450
|
+
Returns a boolean of whether the user can access this cloud's storage,
|
451
|
+
and a string describing the reason if the user cannot access.
|
452
|
+
"""
|
453
|
+
# A given cloud does not support storage
|
454
|
+
# unless it overrides this method.
|
455
|
+
raise exceptions.NotSupportedError(
|
456
|
+
f'{cls._REPR} does not support storage.')
|
457
|
+
|
446
458
|
# TODO(zhwu): Make the return type immutable.
|
447
459
|
@classmethod
|
448
460
|
def get_user_identities(cls) -> Optional[List[List[str]]]:
|
sky/clouds/gcp.py
CHANGED
@@ -124,6 +124,7 @@ def _run_output(cmd):
|
|
124
124
|
|
125
125
|
|
126
126
|
def is_api_disabled(endpoint: str, project_id: str) -> bool:
|
127
|
+
# requires serviceusage.services.list
|
127
128
|
proc = subprocess.run((f'gcloud services list --project {project_id} '
|
128
129
|
f' | grep {endpoint}.googleapis.com'),
|
129
130
|
check=False,
|
@@ -719,6 +720,28 @@ class GCP(clouds.Cloud):
|
|
719
720
|
|
720
721
|
@classmethod
|
721
722
|
def check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
723
|
+
"""Checks if the user has compute access credentials to this cloud."""
|
724
|
+
return cls._check_credentials( # Check APIs.
|
725
|
+
[
|
726
|
+
('compute', 'Compute Engine'),
|
727
|
+
('cloudresourcemanager', 'Cloud Resource Manager'),
|
728
|
+
('iam', 'Identity and Access Management (IAM)'),
|
729
|
+
('tpu', 'Cloud TPU'), # Keep as final element.
|
730
|
+
],
|
731
|
+
gcp_utils.get_minimal_compute_permissions())
|
732
|
+
|
733
|
+
@classmethod
|
734
|
+
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
735
|
+
"""Checks if the user has compute access credentials to this cloud."""
|
736
|
+
return cls._check_credentials( # Check APIs.
|
737
|
+
[
|
738
|
+
('storage', 'Cloud Storage'),
|
739
|
+
], gcp_utils.get_minimal_storage_permissions())
|
740
|
+
|
741
|
+
@classmethod
|
742
|
+
def _check_credentials(
|
743
|
+
cls, apis: List[Tuple[str, str]],
|
744
|
+
gcp_minimal_permissions: List[str]) -> Tuple[bool, Optional[str]]:
|
722
745
|
"""Checks if the user has access credentials to this cloud."""
|
723
746
|
try:
|
724
747
|
# pylint: disable=import-outside-toplevel,unused-import
|
@@ -783,13 +806,37 @@ class GCP(clouds.Cloud):
|
|
783
806
|
f'{cls._INDENT_PREFIX}Details: '
|
784
807
|
f'{common_utils.format_exception(e, use_bracket=True)}')
|
785
808
|
|
786
|
-
#
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
809
|
+
# pylint: disable=import-outside-toplevel,unused-import
|
810
|
+
import google.auth
|
811
|
+
|
812
|
+
# This takes user's credential info from "~/.config/gcloud/application_default_credentials.json". # pylint: disable=line-too-long
|
813
|
+
credentials, project = google.auth.default()
|
814
|
+
crm = gcp.build('cloudresourcemanager',
|
815
|
+
'v1',
|
816
|
+
credentials=credentials,
|
817
|
+
cache_discovery=False)
|
818
|
+
permissions = {'permissions': gcp_minimal_permissions}
|
819
|
+
request = crm.projects().testIamPermissions(resource=project,
|
820
|
+
body=permissions)
|
821
|
+
try:
|
822
|
+
ret_permissions = request.execute().get('permissions', [])
|
823
|
+
except gcp.gcp_auth_refresh_error_exception() as e:
|
824
|
+
return False, common_utils.format_exception(e, use_bracket=True)
|
825
|
+
|
826
|
+
diffs = set(gcp_minimal_permissions).difference(set(ret_permissions))
|
827
|
+
if diffs:
|
828
|
+
identity_str = identity[0] if identity else None
|
829
|
+
return False, (
|
830
|
+
'The following permissions are not enabled for the current '
|
831
|
+
f'GCP identity ({identity_str}):\n '
|
832
|
+
f'{diffs}\n '
|
833
|
+
'For more details, visit: https://docs.skypilot.co/en/latest/cloud-setup/cloud-permissions/gcp.html') # pylint: disable=line-too-long
|
834
|
+
|
835
|
+
# This code must be executed after the iam check above,
|
836
|
+
# as the check below for api enablement itself needs:
|
837
|
+
# - serviceusage.services.enable
|
838
|
+
# - serviceusage.services.list
|
839
|
+
# iam permissions.
|
793
840
|
enabled_api = False
|
794
841
|
for endpoint, display_name in apis:
|
795
842
|
if is_api_disabled(endpoint, project_id):
|
@@ -801,6 +848,7 @@ class GCP(clouds.Cloud):
|
|
801
848
|
suffix = ' (free of charge)'
|
802
849
|
print(f'\nEnabling {display_name} API{suffix}...')
|
803
850
|
t1 = time.time()
|
851
|
+
# requires serviceusage.services.enable
|
804
852
|
proc = subprocess.run(
|
805
853
|
f'gcloud services enable {endpoint}.googleapis.com '
|
806
854
|
f'--project {project_id}',
|
@@ -830,32 +878,6 @@ class GCP(clouds.Cloud):
|
|
830
878
|
'effect. If any SkyPilot commands/calls failed, retry after '
|
831
879
|
'some time.')
|
832
880
|
|
833
|
-
# pylint: disable=import-outside-toplevel,unused-import
|
834
|
-
import google.auth
|
835
|
-
|
836
|
-
# This takes user's credential info from "~/.config/gcloud/application_default_credentials.json". # pylint: disable=line-too-long
|
837
|
-
credentials, project = google.auth.default()
|
838
|
-
crm = gcp.build('cloudresourcemanager',
|
839
|
-
'v1',
|
840
|
-
credentials=credentials,
|
841
|
-
cache_discovery=False)
|
842
|
-
gcp_minimal_permissions = gcp_utils.get_minimal_permissions()
|
843
|
-
permissions = {'permissions': gcp_minimal_permissions}
|
844
|
-
request = crm.projects().testIamPermissions(resource=project,
|
845
|
-
body=permissions)
|
846
|
-
try:
|
847
|
-
ret_permissions = request.execute().get('permissions', [])
|
848
|
-
except gcp.gcp_auth_refresh_error_exception() as e:
|
849
|
-
return False, common_utils.format_exception(e, use_bracket=True)
|
850
|
-
|
851
|
-
diffs = set(gcp_minimal_permissions).difference(set(ret_permissions))
|
852
|
-
if diffs:
|
853
|
-
identity_str = identity[0] if identity else None
|
854
|
-
return False, (
|
855
|
-
'The following permissions are not enabled for the current '
|
856
|
-
f'GCP identity ({identity_str}):\n '
|
857
|
-
f'{diffs}\n '
|
858
|
-
'For more details, visit: https://docs.skypilot.co/en/latest/cloud-setup/cloud-permissions/gcp.html') # pylint: disable=line-too-long
|
859
881
|
return True, None
|
860
882
|
|
861
883
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
sky/clouds/ibm.py
CHANGED
@@ -433,6 +433,11 @@ class IBM(clouds.Cloud):
|
|
433
433
|
except Exception as e:
|
434
434
|
return (False, f'{str(e)}' + help_str)
|
435
435
|
|
436
|
+
@classmethod
|
437
|
+
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
438
|
+
# TODO(seungjin): Check if the user has access to IBM COS.
|
439
|
+
return cls.check_credentials()
|
440
|
+
|
436
441
|
def get_credential_file_mounts(self) -> Dict[str, str]:
|
437
442
|
"""Returns a {remote:local} credential path mapping
|
438
443
|
written to the cluster's file_mounts segment
|
sky/clouds/oci.py
CHANGED
@@ -456,6 +456,11 @@ class OCI(clouds.Cloud):
|
|
456
456
|
f'{cls._INDENT_PREFIX}Error details: '
|
457
457
|
f'{common_utils.format_exception(e, use_bracket=True)}')
|
458
458
|
|
459
|
+
@classmethod
|
460
|
+
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
461
|
+
# TODO(seungjin): Check if the user has access to OCI Object Storage.
|
462
|
+
return cls.check_credentials()
|
463
|
+
|
459
464
|
@classmethod
|
460
465
|
def check_disk_tier(
|
461
466
|
cls, instance_type: Optional[str],
|
sky/clouds/utils/gcp_utils.py
CHANGED
@@ -167,7 +167,7 @@ def _list_reservations_for_instance_type(
|
|
167
167
|
return [GCPReservation.from_dict(r) for r in json.loads(stdout)]
|
168
168
|
|
169
169
|
|
170
|
-
def
|
170
|
+
def get_minimal_compute_permissions() -> List[str]:
|
171
171
|
permissions = copy.copy(constants.VM_MINIMAL_PERMISSIONS)
|
172
172
|
if skypilot_config.get_nested(('gcp', 'vpc_name'), None) is None:
|
173
173
|
# If custom VPC is not specified, permissions to modify network are
|
@@ -179,4 +179,14 @@ def get_minimal_permissions() -> List[str]:
|
|
179
179
|
skypilot_config.get_nested(('gcp', 'specific_reservations'), [])):
|
180
180
|
permissions += constants.RESERVATION_PERMISSIONS
|
181
181
|
|
182
|
+
permissions += constants.GCP_MINIMAL_PERMISSIONS
|
183
|
+
|
184
|
+
return permissions
|
185
|
+
|
186
|
+
|
187
|
+
def get_minimal_storage_permissions() -> List[str]:
|
188
|
+
permissions = copy.copy(constants.STORAGE_MINIMAL_PERMISSIONS)
|
189
|
+
|
190
|
+
permissions += constants.GCP_MINIMAL_PERMISSIONS
|
191
|
+
|
182
192
|
return permissions
|
sky/core.py
CHANGED
@@ -1134,7 +1134,9 @@ def local_down() -> None:
|
|
1134
1134
|
# Run sky check
|
1135
1135
|
with rich_utils.safe_status(
|
1136
1136
|
ux_utils.spinner_message('Running sky check...')):
|
1137
|
-
sky_check.check(clouds=['kubernetes'],
|
1137
|
+
sky_check.check(clouds=['kubernetes'],
|
1138
|
+
quiet=True,
|
1139
|
+
capability=sky_check.CloudCapability.COMPUTE)
|
1138
1140
|
logger.info(
|
1139
1141
|
ux_utils.finishing_message('Local cluster removed.',
|
1140
1142
|
log_path=log_path,
|
sky/data/storage.py
CHANGED
@@ -82,20 +82,17 @@ def get_cached_enabled_storage_clouds_or_refresh(
|
|
82
82
|
raise_if_no_cloud_access: bool = False) -> List[str]:
|
83
83
|
# This is a temporary solution until https://github.com/skypilot-org/skypilot/issues/1943 # pylint: disable=line-too-long
|
84
84
|
# is resolved by implementing separate 'enabled_storage_clouds'
|
85
|
-
enabled_clouds = sky_check.
|
85
|
+
enabled_clouds = sky_check.get_cached_enabled_storage_clouds_or_refresh()
|
86
86
|
enabled_clouds = [str(cloud) for cloud in enabled_clouds]
|
87
87
|
|
88
|
-
|
89
|
-
cloud for cloud in enabled_clouds if cloud in STORE_ENABLED_CLOUDS
|
90
|
-
]
|
91
|
-
r2_is_enabled, _ = cloudflare.check_credentials()
|
88
|
+
r2_is_enabled, _ = cloudflare.check_storage_credentials()
|
92
89
|
if r2_is_enabled:
|
93
|
-
|
94
|
-
if raise_if_no_cloud_access and not
|
90
|
+
enabled_clouds.append(cloudflare.NAME)
|
91
|
+
if raise_if_no_cloud_access and not enabled_clouds:
|
95
92
|
raise exceptions.NoCloudAccessError(
|
96
93
|
'No cloud access available for storage. '
|
97
94
|
'Please check your cloud credentials.')
|
98
|
-
return
|
95
|
+
return enabled_clouds
|
99
96
|
|
100
97
|
|
101
98
|
def _is_storage_cloud_enabled(cloud_name: str,
|
@@ -105,7 +102,8 @@ def _is_storage_cloud_enabled(cloud_name: str,
|
|
105
102
|
return True
|
106
103
|
if try_fix_with_sky_check:
|
107
104
|
# TODO(zhwu): Only check the specified cloud to speed up.
|
108
|
-
sky_check.check(quiet=True
|
105
|
+
sky_check.check(quiet=True,
|
106
|
+
capability=sky_check.CloudCapability.STORAGE)
|
109
107
|
return _is_storage_cloud_enabled(cloud_name,
|
110
108
|
try_fix_with_sky_check=False)
|
111
109
|
return False
|
sky/execution.py
CHANGED
@@ -529,6 +529,11 @@ def launch(
|
|
529
529
|
]
|
530
530
|
skip_unnecessary_provisioning = True
|
531
531
|
|
532
|
+
# Attach to setup if the cluster is a controller, so that user can
|
533
|
+
# see the setup logs when inspecting the launch process to know
|
534
|
+
# excatly what the job is waiting for.
|
535
|
+
detach_setup = controller_utils.Controllers.from_name(cluster_name) is None
|
536
|
+
|
532
537
|
return _execute(
|
533
538
|
entrypoint=entrypoint,
|
534
539
|
dryrun=dryrun,
|
@@ -540,7 +545,7 @@ def launch(
|
|
540
545
|
optimize_target=optimize_target,
|
541
546
|
stages=stages,
|
542
547
|
cluster_name=cluster_name,
|
543
|
-
detach_setup=
|
548
|
+
detach_setup=detach_setup,
|
544
549
|
detach_run=True,
|
545
550
|
idle_minutes_to_autostop=idle_minutes_to_autostop,
|
546
551
|
no_setup=no_setup,
|
sky/global_user_state.py
CHANGED
@@ -31,6 +31,7 @@ if typing.TYPE_CHECKING:
|
|
31
31
|
logger = sky_logging.init_logger(__name__)
|
32
32
|
|
33
33
|
_ENABLED_CLOUDS_KEY = 'enabled_clouds'
|
34
|
+
_ENABLED_STORAGE_CLOUDS_KEY = 'enabled_storage_clouds'
|
34
35
|
|
35
36
|
_DB_PATH = os.path.expanduser('~/.sky/state.db')
|
36
37
|
pathlib.Path(_DB_PATH).parents[0].mkdir(parents=True, exist_ok=True)
|
@@ -817,12 +818,41 @@ def get_cached_enabled_clouds() -> List['clouds.Cloud']:
|
|
817
818
|
return enabled_clouds
|
818
819
|
|
819
820
|
|
821
|
+
def get_cached_enabled_storage_clouds() -> List['clouds.Cloud']:
|
822
|
+
rows = _DB.cursor.execute('SELECT value FROM config WHERE key = ?',
|
823
|
+
(_ENABLED_STORAGE_CLOUDS_KEY,))
|
824
|
+
ret = []
|
825
|
+
for (value,) in rows:
|
826
|
+
ret = json.loads(value)
|
827
|
+
break
|
828
|
+
enabled_clouds: List['clouds.Cloud'] = []
|
829
|
+
for c in ret:
|
830
|
+
try:
|
831
|
+
cloud = registry.CLOUD_REGISTRY.from_str(c)
|
832
|
+
except ValueError:
|
833
|
+
# Handle the case for the clouds whose support has been removed from
|
834
|
+
# SkyPilot, e.g., 'local' was a cloud in the past and may be stored
|
835
|
+
# in the database for users before #3037. We should ignore removed
|
836
|
+
# clouds and continue.
|
837
|
+
continue
|
838
|
+
if cloud is not None:
|
839
|
+
enabled_clouds.append(cloud)
|
840
|
+
return enabled_clouds
|
841
|
+
|
842
|
+
|
820
843
|
def set_enabled_clouds(enabled_clouds: List[str]) -> None:
|
821
844
|
_DB.cursor.execute('INSERT OR REPLACE INTO config VALUES (?, ?)',
|
822
845
|
(_ENABLED_CLOUDS_KEY, json.dumps(enabled_clouds)))
|
823
846
|
_DB.conn.commit()
|
824
847
|
|
825
848
|
|
849
|
+
def set_enabled_storage_clouds(enabled_storage_clouds: List[str]) -> None:
|
850
|
+
_DB.cursor.execute(
|
851
|
+
'INSERT OR REPLACE INTO config VALUES (?, ?)',
|
852
|
+
(_ENABLED_STORAGE_CLOUDS_KEY, json.dumps(enabled_storage_clouds)))
|
853
|
+
_DB.conn.commit()
|
854
|
+
|
855
|
+
|
826
856
|
def add_or_update_storage(storage_name: str,
|
827
857
|
storage_handle: 'Storage.StorageMetadata',
|
828
858
|
storage_status: status_lib.StorageStatus):
|