konduktor-nightly 0.1.0.dev20250209104336__py3-none-any.whl → 0.1.0.dev20250313070642__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +16 -6
- konduktor/adaptors/__init__.py +0 -0
- konduktor/adaptors/common.py +88 -0
- konduktor/adaptors/gcp.py +112 -0
- konduktor/backends/__init__.py +8 -0
- konduktor/backends/backend.py +86 -0
- konduktor/backends/jobset.py +218 -0
- konduktor/backends/jobset_utils.py +447 -0
- konduktor/check.py +192 -0
- konduktor/cli.py +790 -0
- konduktor/cloud_stores.py +158 -0
- konduktor/config.py +420 -0
- konduktor/constants.py +36 -0
- konduktor/controller/constants.py +6 -6
- konduktor/controller/launch.py +3 -3
- konduktor/controller/node.py +5 -5
- konduktor/controller/parse.py +23 -23
- konduktor/dashboard/backend/main.py +57 -57
- konduktor/dashboard/backend/sockets.py +19 -19
- konduktor/data/__init__.py +9 -0
- konduktor/data/constants.py +12 -0
- konduktor/data/data_utils.py +223 -0
- konduktor/data/gcp/__init__.py +19 -0
- konduktor/data/gcp/constants.py +42 -0
- konduktor/data/gcp/gcs.py +906 -0
- konduktor/data/gcp/utils.py +9 -0
- konduktor/data/storage.py +799 -0
- konduktor/data/storage_utils.py +500 -0
- konduktor/execution.py +444 -0
- konduktor/kube_client.py +153 -48
- konduktor/logging.py +49 -5
- konduktor/manifests/dmesg_daemonset.yaml +8 -0
- konduktor/manifests/pod_cleanup_controller.yaml +129 -0
- konduktor/resource.py +478 -0
- konduktor/task.py +867 -0
- konduktor/templates/jobset.yaml.j2 +31 -0
- konduktor/templates/pod.yaml.j2 +185 -0
- konduktor/usage/__init__.py +0 -0
- konduktor/usage/constants.py +21 -0
- konduktor/utils/__init__.py +0 -0
- konduktor/utils/accelerator_registry.py +21 -0
- konduktor/utils/annotations.py +62 -0
- konduktor/utils/base64_utils.py +93 -0
- konduktor/utils/common_utils.py +393 -0
- konduktor/utils/constants.py +5 -0
- konduktor/utils/env_options.py +55 -0
- konduktor/utils/exceptions.py +226 -0
- konduktor/utils/kubernetes_enums.py +8 -0
- konduktor/utils/kubernetes_utils.py +652 -0
- konduktor/utils/log_utils.py +251 -0
- konduktor/utils/loki_utils.py +85 -0
- konduktor/utils/rich_utils.py +123 -0
- konduktor/utils/schemas.py +581 -0
- konduktor/utils/subprocess_utils.py +273 -0
- konduktor/utils/ux_utils.py +216 -0
- konduktor/utils/validator.py +20 -0
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/LICENSE +0 -1
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/METADATA +13 -2
- konduktor_nightly-0.1.0.dev20250313070642.dist-info/RECORD +94 -0
- konduktor_nightly-0.1.0.dev20250209104336.dist-info/RECORD +0 -48
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250209104336.dist-info → konduktor_nightly-0.1.0.dev20250313070642.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,906 @@
|
|
1
|
+
# Proprietary Changes made for Trainy under the Trainy Software License
|
2
|
+
# Original source: skypilot: https://github.com/skypilot-org/skypilot
|
3
|
+
# which is Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
"""Google Cloud Platform Storage."""
|
14
|
+
|
15
|
+
import enum
|
16
|
+
import os
|
17
|
+
import re
|
18
|
+
import shlex
|
19
|
+
import subprocess
|
20
|
+
import time
|
21
|
+
import typing
|
22
|
+
from typing import List, Optional, Tuple
|
23
|
+
|
24
|
+
import colorama
|
25
|
+
|
26
|
+
if typing.TYPE_CHECKING:
|
27
|
+
from google.cloud import storage as gcs_storage
|
28
|
+
|
29
|
+
from konduktor import logging
|
30
|
+
from konduktor.adaptors import gcp
|
31
|
+
from konduktor.data import constants, data_utils, storage_utils
|
32
|
+
from konduktor.data.gcp import utils
|
33
|
+
from konduktor.utils import (
|
34
|
+
base64_utils,
|
35
|
+
common_utils,
|
36
|
+
exceptions,
|
37
|
+
kubernetes_utils,
|
38
|
+
rich_utils,
|
39
|
+
ux_utils,
|
40
|
+
)
|
41
|
+
|
42
|
+
logger = logging.get_logger(__name__)
|
43
|
+
|
44
|
+
# Maximum number of concurrent rsync upload processes
|
45
|
+
_MAX_CONCURRENT_UPLOADS = 32
|
46
|
+
|
47
|
+
# Env var pointing to any service account key. If it exists, this path takes
|
48
|
+
# priority over the DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH below, and will be
|
49
|
+
# used instead for Konduktro-launched instances. This is the same behavior as
|
50
|
+
# gcloud:
|
51
|
+
# https://cloud.google.com/docs/authentication/provide-credentials-adc#local-key
|
52
|
+
_GCP_APPLICATION_CREDENTIAL_ENV = 'GOOGLE_APPLICATION_CREDENTIALS'
|
53
|
+
# NOTE: do not expanduser() on this path. It's used as a destination path on the
|
54
|
+
# remote cluster.
|
55
|
+
DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH: str = (
|
56
|
+
'~/.config/gcloud/' 'application_default_credentials.json'
|
57
|
+
)
|
58
|
+
DEFAULT_GCP_CREDENTIALS_DIR = '~/.config/gcloud'
|
59
|
+
|
60
|
+
# TODO(wei-lin): config_default may not be the config in use.
|
61
|
+
# See: https://github.com/skypilot-org/skypilot/pull/1539
|
62
|
+
# NOTE: do not expanduser() on this path. It's used as a destination path on the
|
63
|
+
# remote cluster.
|
64
|
+
GCP_CONFIG_PATH = '~/.config/gcloud/configurations/config_default'
|
65
|
+
|
66
|
+
# Minimum set of files under ~/.config/gcloud that grant GCP access.
|
67
|
+
_CREDENTIAL_FILES = [
|
68
|
+
'credentials.db',
|
69
|
+
'access_tokens.db',
|
70
|
+
'configurations',
|
71
|
+
'legacy_credentials',
|
72
|
+
'active_config',
|
73
|
+
'application_default_credentials.json',
|
74
|
+
]
|
75
|
+
|
76
|
+
# k8s secret name for gcp credentials
|
77
|
+
GCP_SECRET_NAME = 'gcpcredentials'
|
78
|
+
GCP_CREDENTIALS_KEY = 'gcpcredentials'
|
79
|
+
|
80
|
+
# NOTE: do not expanduser() on this path. It's used as a destination path on the
|
81
|
+
# remote cluster.
|
82
|
+
_GCLOUD_INSTALLATION_LOG = '~/.konduktor/logs/gcloud_installation.log'
|
83
|
+
_GCLOUD_VERSION = '424.0.0'
|
84
|
+
# Need to be run with /bin/bash
|
85
|
+
# We factor out the installation logic to keep it align in both spot
|
86
|
+
# controller and cloud stores.
|
87
|
+
GOOGLE_SDK_INSTALLATION_COMMAND: str = f'pushd /tmp &>/dev/null && \
|
88
|
+
{{ gcloud --help > /dev/null 2>&1 || \
|
89
|
+
{{ mkdir -p {os.path.dirname(_GCLOUD_INSTALLATION_LOG)} && \
|
90
|
+
wget --quiet https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-{_GCLOUD_VERSION}-linux-x86_64.tar.gz > {_GCLOUD_INSTALLATION_LOG} && \
|
91
|
+
tar xzf google-cloud-sdk-{_GCLOUD_VERSION}-linux-x86_64.tar.gz >> {_GCLOUD_INSTALLATION_LOG} && \
|
92
|
+
rm -rf ~/google-cloud-sdk >> {_GCLOUD_INSTALLATION_LOG} && \
|
93
|
+
mv google-cloud-sdk ~/ && \
|
94
|
+
~/google-cloud-sdk/install.sh -q >> {_GCLOUD_INSTALLATION_LOG} 2>&1 && \
|
95
|
+
echo "source ~/google-cloud-sdk/path.bash.inc > /dev/null 2>&1" >> ~/.bashrc && \
|
96
|
+
source ~/google-cloud-sdk/path.bash.inc >> {_GCLOUD_INSTALLATION_LOG} 2>&1; }}; }} && \
|
97
|
+
popd &>/dev/null' # noqa: E501
|
98
|
+
|
99
|
+
|
100
|
+
class GCPIdentityType(enum.Enum):
|
101
|
+
"""GCP identity type.
|
102
|
+
|
103
|
+
The account type is determined by the current user identity, based on
|
104
|
+
the identity email.
|
105
|
+
"""
|
106
|
+
|
107
|
+
# Example of a service account email:
|
108
|
+
# skypilot-v1@xxxx.iam.gserviceaccount.com
|
109
|
+
SERVICE_ACCOUNT = 'iam.gserviceaccount.com'
|
110
|
+
|
111
|
+
SHARED_CREDENTIALS_FILE = ''
|
112
|
+
|
113
|
+
def can_credential_expire(self) -> bool:
|
114
|
+
return self == GCPIdentityType.SHARED_CREDENTIALS_FILE
|
115
|
+
|
116
|
+
|
117
|
+
def _run_output(cmd):
|
118
|
+
proc = subprocess.run(
|
119
|
+
cmd, shell=True, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE
|
120
|
+
)
|
121
|
+
return proc.stdout.decode('ascii')
|
122
|
+
|
123
|
+
|
124
|
+
def is_api_disabled(endpoint: str, project_id: str) -> bool:
|
125
|
+
proc = subprocess.run(
|
126
|
+
(
|
127
|
+
f'gcloud services list --project {project_id} '
|
128
|
+
f' | grep {endpoint}.googleapis.com'
|
129
|
+
),
|
130
|
+
check=False,
|
131
|
+
shell=True,
|
132
|
+
stderr=subprocess.PIPE,
|
133
|
+
stdout=subprocess.PIPE,
|
134
|
+
)
|
135
|
+
return proc.returncode != 0
|
136
|
+
|
137
|
+
|
138
|
+
class GcsStore(storage_utils.AbstractStore):
|
139
|
+
"""GcsStore inherits from Storage Object and represents the backend
|
140
|
+
for GCS buckets.
|
141
|
+
"""
|
142
|
+
|
143
|
+
# k8s secret name for gcp credentials
|
144
|
+
_GCP_SECRET_NAME = f'{GCP_SECRET_NAME}-{common_utils.user_and_hostname_hash()}'
|
145
|
+
_GCP_CREDENTIALS_KEY = GCP_CREDENTIALS_KEY
|
146
|
+
|
147
|
+
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
148
|
+
|
149
|
+
_INDENT_PREFIX = ' '
|
150
|
+
_DEPENDENCY_HINT = (
|
151
|
+
'GCP tools are not installed. Run the following commands:\n'
|
152
|
+
# Install the Google Cloud SDK:
|
153
|
+
f'{_INDENT_PREFIX} $ pip install google-api-python-client\n'
|
154
|
+
f'{_INDENT_PREFIX} $ conda install -c conda-forge '
|
155
|
+
'google-cloud-sdk -y'
|
156
|
+
)
|
157
|
+
|
158
|
+
_CREDENTIAL_HINT = (
|
159
|
+
'Run the following commands:\n'
|
160
|
+
# This authenticates the CLI to make `gsutil` work:
|
161
|
+
f'{_INDENT_PREFIX} $ gcloud init\n'
|
162
|
+
# This will generate
|
163
|
+
# ~/.config/gcloud/application_default_credentials.json.
|
164
|
+
f'{_INDENT_PREFIX} $ gcloud auth application-default login\n'
|
165
|
+
f'{_INDENT_PREFIX}For more info: '
|
166
|
+
'https://konduktor.readthedocs.io/en/latest/getting-started/installation.html#google-cloud-platform-gcp' # noqa: E501
|
167
|
+
)
|
168
|
+
_APPLICATION_CREDENTIAL_HINT = (
|
169
|
+
'Run the following commands:\n'
|
170
|
+
f'{_INDENT_PREFIX} $ gcloud auth application-default login\n'
|
171
|
+
f'{_INDENT_PREFIX}Or set the environment variable '
|
172
|
+
'GOOGLE_APPLICATION_CREDENTIALS '
|
173
|
+
'to the path of your service account key file.\n'
|
174
|
+
f'{_INDENT_PREFIX}For more info: '
|
175
|
+
'https://konduktor.readthedocs.ioo/en/latest/getting-started/installation.html#google-cloud-platform-gcp' # noqa: E501
|
176
|
+
)
|
177
|
+
|
178
|
+
_REPR = 'GcsStore'
|
179
|
+
|
180
|
+
def __init__(
|
181
|
+
self,
|
182
|
+
name: str,
|
183
|
+
source: str,
|
184
|
+
region: Optional[str] = 'us-central1',
|
185
|
+
is_sky_managed: Optional[bool] = False,
|
186
|
+
sync_on_reconstruction: Optional[bool] = True,
|
187
|
+
_bucket_sub_path: Optional[str] = None,
|
188
|
+
):
|
189
|
+
self.client: 'gcs_storage.Client'
|
190
|
+
self.bucket: 'constants.StorageHandle'
|
191
|
+
super().__init__(
|
192
|
+
name,
|
193
|
+
source,
|
194
|
+
region,
|
195
|
+
is_sky_managed,
|
196
|
+
sync_on_reconstruction,
|
197
|
+
_bucket_sub_path,
|
198
|
+
)
|
199
|
+
|
200
|
+
def __repr__(self):
|
201
|
+
return self._REPR
|
202
|
+
|
203
|
+
def _validate(self):
|
204
|
+
if self.source is not None and isinstance(self.source, str):
|
205
|
+
# if self.source.startswith('s3://'):
|
206
|
+
# assert self.name == data_utils.split_s3_path(self.source)[0], (
|
207
|
+
# 'S3 Bucket is specified as path, the name should be the'
|
208
|
+
# ' same as S3 bucket.')
|
209
|
+
# assert data_utils.verify_s3_bucket(self.name), (
|
210
|
+
# f'Source specified as {self.source}, an S3 bucket. ',
|
211
|
+
# 'S3 Bucket should exist.')
|
212
|
+
if self.source.startswith('gs://'):
|
213
|
+
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
214
|
+
'GCS Bucket is specified as path, the name should be '
|
215
|
+
'the same as GCS bucket.'
|
216
|
+
)
|
217
|
+
# elif data_utils.is_az_container_endpoint(self.source):
|
218
|
+
# storage_account_name, container_name, _ = (
|
219
|
+
# data_utils.split_az_path(self.source))
|
220
|
+
# assert self.name == container_name, (
|
221
|
+
# 'Azure bucket is specified as path, the name should be '
|
222
|
+
# 'the same as Azure bucket.')
|
223
|
+
# assert data_utils.verify_az_bucket(
|
224
|
+
# storage_account_name, self.name), (
|
225
|
+
# f'Source specified as {self.source}, an Azure bucket. '
|
226
|
+
# 'Azure bucket should exist.')
|
227
|
+
# elif self.source.startswith('r2://'):
|
228
|
+
# assert self.name == data_utils.split_r2_path(self.source)[0], (
|
229
|
+
# 'R2 Bucket is specified as path, the name should be '
|
230
|
+
# 'the same as R2 bucket.')
|
231
|
+
# assert data_utils.verify_r2_bucket(self.name), (
|
232
|
+
# f'Source specified as {self.source}, a R2 bucket. ',
|
233
|
+
# 'R2 Bucket should exist.')
|
234
|
+
# elif self.source.startswith('cos://'):
|
235
|
+
# assert self.name == data_utils.split_cos_path(self.source)[0], (
|
236
|
+
# 'COS Bucket is specified as path, the name should be '
|
237
|
+
# 'the same as COS bucket.')
|
238
|
+
# assert data_utils.verify_ibm_cos_bucket(self.name), (
|
239
|
+
# f'Source specified as {self.source}, a COS bucket. ',
|
240
|
+
# 'COS Bucket should exist.')
|
241
|
+
# Validate name
|
242
|
+
self.name = self.validate_name(self.name)
|
243
|
+
|
244
|
+
@classmethod
|
245
|
+
def validate_name(cls, name: str) -> str:
|
246
|
+
"""Validates the name of the GCS store.
|
247
|
+
|
248
|
+
Source for rules: https://cloud.google.com/storage/docs/buckets#naming
|
249
|
+
"""
|
250
|
+
|
251
|
+
def _raise_no_traceback_name_error(err_str):
|
252
|
+
with ux_utils.print_exception_no_traceback():
|
253
|
+
raise exceptions.StorageNameError(err_str)
|
254
|
+
|
255
|
+
if name is not None and isinstance(name, str):
|
256
|
+
# Check for overall length
|
257
|
+
if not 3 <= len(name) <= 222:
|
258
|
+
_raise_no_traceback_name_error(
|
259
|
+
f'Invalid store name: name {name} must contain 3-222 ' 'characters.'
|
260
|
+
)
|
261
|
+
|
262
|
+
# Check for valid characters and start/end with a number or letter
|
263
|
+
pattern = r'^[a-z0-9][-a-z0-9._]*[a-z0-9]$'
|
264
|
+
if not re.match(pattern, name):
|
265
|
+
_raise_no_traceback_name_error(
|
266
|
+
f'Invalid store name: name {name} can only contain '
|
267
|
+
'lowercase letters, numeric characters, dashes (-), '
|
268
|
+
'underscores (_), and dots (.). Spaces are not allowed. '
|
269
|
+
'Names must start and end with a number or letter.'
|
270
|
+
)
|
271
|
+
|
272
|
+
# Check for 'goog' prefix and 'google' in the name
|
273
|
+
if name.startswith('goog') or any(
|
274
|
+
s in name for s in ['google', 'g00gle', 'go0gle', 'g0ogle']
|
275
|
+
):
|
276
|
+
_raise_no_traceback_name_error(
|
277
|
+
f'Invalid store name: name {name} cannot begin with the '
|
278
|
+
'"goog" prefix or contain "google" in various forms.'
|
279
|
+
)
|
280
|
+
|
281
|
+
# Check for dot-separated components length
|
282
|
+
components = name.split('.')
|
283
|
+
if any(len(component) > 63 for component in components):
|
284
|
+
_raise_no_traceback_name_error(
|
285
|
+
'Invalid store name: Dot-separated components in name '
|
286
|
+
f'{name} can be no longer than 63 characters.'
|
287
|
+
)
|
288
|
+
|
289
|
+
if '..' in name or '.-' in name or '-.' in name:
|
290
|
+
_raise_no_traceback_name_error(
|
291
|
+
f'Invalid store name: name {name} must not contain two '
|
292
|
+
'adjacent periods or a dot next to a hyphen.'
|
293
|
+
)
|
294
|
+
|
295
|
+
# Check for IP address format
|
296
|
+
ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
|
297
|
+
if re.match(ip_pattern, name):
|
298
|
+
_raise_no_traceback_name_error(
|
299
|
+
f'Invalid store name: name {name} cannot be represented as '
|
300
|
+
'an IP address in dotted-decimal notation '
|
301
|
+
'(for example, 192.168.5.4).'
|
302
|
+
)
|
303
|
+
else:
|
304
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
305
|
+
return name
|
306
|
+
|
307
|
+
def initialize(self):
|
308
|
+
"""Initializes the GCS store object on the cloud.
|
309
|
+
|
310
|
+
Initialization involves fetching bucket if exists, or creating it if
|
311
|
+
it does not.
|
312
|
+
|
313
|
+
Raises:
|
314
|
+
StorageBucketCreateError: If bucket creation fails
|
315
|
+
StorageBucketGetError: If fetching existing bucket fails
|
316
|
+
StorageInitError: If general initialization fails.
|
317
|
+
"""
|
318
|
+
self.client = gcp.storage_client()
|
319
|
+
self.bucket, is_new_bucket = self._get_bucket()
|
320
|
+
if self.is_sky_managed is None:
|
321
|
+
# If is_sky_managed is not specified, then this is a new storage
|
322
|
+
# object (i.e., did not exist in global_user_state) and we should
|
323
|
+
# set the is_sky_managed property.
|
324
|
+
# If is_sky_managed is specified, then we take no action.
|
325
|
+
self.is_sky_managed = is_new_bucket
|
326
|
+
|
327
|
+
def upload(self):
|
328
|
+
"""Uploads source to store bucket.
|
329
|
+
|
330
|
+
Upload must be called by the Storage handler - it is not called on
|
331
|
+
Store initialization.
|
332
|
+
|
333
|
+
Raises:
|
334
|
+
StorageUploadError: if upload fails.
|
335
|
+
"""
|
336
|
+
try:
|
337
|
+
if isinstance(self.source, list):
|
338
|
+
self.batch_gsutil_rsync(self.source, create_dirs=True)
|
339
|
+
elif self.source is not None:
|
340
|
+
if self.source.startswith('gs://'):
|
341
|
+
pass
|
342
|
+
elif self.source.startswith('s3://'):
|
343
|
+
self._transfer_to_gcs()
|
344
|
+
elif self.source.startswith('r2://'):
|
345
|
+
self._transfer_to_gcs()
|
346
|
+
else:
|
347
|
+
# If a single directory is specified in source, upload
|
348
|
+
# contents to root of bucket by suffixing /*.
|
349
|
+
self.batch_gsutil_rsync([self.source])
|
350
|
+
except exceptions.StorageUploadError:
|
351
|
+
raise
|
352
|
+
except Exception as e:
|
353
|
+
raise exceptions.StorageUploadError(
|
354
|
+
f'Upload failed for store {self.name}'
|
355
|
+
) from e
|
356
|
+
|
357
|
+
def delete(self) -> None:
|
358
|
+
deleted_by_skypilot = self._delete_gcs_bucket(self.name)
|
359
|
+
if deleted_by_skypilot:
|
360
|
+
msg_str = f'Deleted GCS bucket {self.name}.'
|
361
|
+
else:
|
362
|
+
msg_str = (
|
363
|
+
f'GCS bucket {self.name} may have been deleted '
|
364
|
+
f'externally. Removing from local state.'
|
365
|
+
)
|
366
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}' f'{colorama.Style.RESET_ALL}')
|
367
|
+
|
368
|
+
def get_handle(self) -> 'constants.StorageHandle':
|
369
|
+
return self.client.get_bucket(self.name)
|
370
|
+
|
371
|
+
def batch_gsutil_cp(
|
372
|
+
self, source_path_list: List['constants.Path'], create_dirs: bool = False
|
373
|
+
) -> None:
|
374
|
+
"""Invokes gsutil cp -n to batch upload a list of local paths
|
375
|
+
|
376
|
+
-n flag to gsutil cp checks the existence of an object before uploading,
|
377
|
+
making it similar to gsutil rsync. Since it allows specification of a
|
378
|
+
list of files, it is faster than calling gsutil rsync on each file.
|
379
|
+
However, unlike rsync, files are compared based on just their filename,
|
380
|
+
and any updates to a file would not be copied to the bucket.
|
381
|
+
"""
|
382
|
+
# Generate message for upload
|
383
|
+
if len(source_path_list) > 1:
|
384
|
+
source_message = f'{len(source_path_list)} paths'
|
385
|
+
else:
|
386
|
+
source_message = source_path_list[0]
|
387
|
+
|
388
|
+
# If the source_path list contains a directory, then gsutil cp -n
|
389
|
+
# copies the dir as is to the root of the bucket. To copy the
|
390
|
+
# contents of directory to the root, add /* to the directory path
|
391
|
+
# e.g., /mydir/*
|
392
|
+
source_path_list = [
|
393
|
+
str(path) + '/*' if (os.path.isdir(path) and not create_dirs) else str(path)
|
394
|
+
for path in source_path_list
|
395
|
+
]
|
396
|
+
copy_list = '\n'.join(
|
397
|
+
os.path.abspath(os.path.expanduser(p)) for p in source_path_list
|
398
|
+
)
|
399
|
+
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
400
|
+
sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
|
401
|
+
sync_command = (
|
402
|
+
f'{alias_gen}; echo "{copy_list}" | {gsutil_alias} '
|
403
|
+
f'cp -e -n -r -I gs://{self.name}{sub_path}'
|
404
|
+
)
|
405
|
+
|
406
|
+
log_path = logging.generate_tmp_logging_file_path(
|
407
|
+
constants._STORAGE_LOG_FILE_NAME
|
408
|
+
)
|
409
|
+
|
410
|
+
with rich_utils.safe_status(
|
411
|
+
ux_utils.spinner_message(
|
412
|
+
f'Syncing {source_message} -> ' f'gs://{self.name}{sub_path}'
|
413
|
+
)
|
414
|
+
):
|
415
|
+
data_utils.run_upload_cli(
|
416
|
+
sync_command,
|
417
|
+
self._ACCESS_DENIED_MESSAGE,
|
418
|
+
bucket_name=self.name,
|
419
|
+
log_path=log_path,
|
420
|
+
)
|
421
|
+
|
422
|
+
def batch_gsutil_rsync(
|
423
|
+
self, source_path_list: List['constants.Path'], create_dirs: bool = False
|
424
|
+
) -> None:
|
425
|
+
"""Invokes gsutil rsync to batch upload a list of local paths
|
426
|
+
|
427
|
+
Since gsutil rsync does not support include commands, We use negative
|
428
|
+
look-ahead regex to exclude everything else than the path(s) we want to
|
429
|
+
upload.
|
430
|
+
|
431
|
+
Since gsutil rsync does not support batch operations, we construct
|
432
|
+
multiple commands to be run in parallel.
|
433
|
+
|
434
|
+
Args:
|
435
|
+
source_path_list: List of paths to local files or directories
|
436
|
+
create_dirs: If the local_path is a directory and this is set to
|
437
|
+
False, the contents of the directory are directly uploaded to
|
438
|
+
root of the bucket. If the local_path is a directory and this is
|
439
|
+
set to True, the directory is created in the bucket root and
|
440
|
+
contents are uploaded to it.
|
441
|
+
"""
|
442
|
+
|
443
|
+
def get_file_sync_command(base_dir_path, file_names):
|
444
|
+
sync_format = '|'.join(file_names)
|
445
|
+
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
446
|
+
base_dir_path = shlex.quote(base_dir_path)
|
447
|
+
sync_command = (
|
448
|
+
f'{alias_gen}; {gsutil_alias} '
|
449
|
+
f"rsync -e -x '^(?!{sync_format}$).*' "
|
450
|
+
f'{base_dir_path} gs://{self.name}{sub_path}'
|
451
|
+
)
|
452
|
+
return sync_command
|
453
|
+
|
454
|
+
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
455
|
+
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
456
|
+
# we exclude .git directory from the sync
|
457
|
+
excluded_list.append(r'^\.git/.*$')
|
458
|
+
excludes = '|'.join(excluded_list)
|
459
|
+
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
460
|
+
src_dir_path = shlex.quote(src_dir_path)
|
461
|
+
sync_command = (
|
462
|
+
f'{alias_gen}; {gsutil_alias} '
|
463
|
+
f"rsync -e -r -x '({excludes})' {src_dir_path} "
|
464
|
+
f'gs://{self.name}{sub_path}/{dest_dir_name}'
|
465
|
+
)
|
466
|
+
return sync_command
|
467
|
+
|
468
|
+
sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
|
469
|
+
# Generate message for upload
|
470
|
+
if len(source_path_list) > 1:
|
471
|
+
source_message = f'{len(source_path_list)} paths'
|
472
|
+
else:
|
473
|
+
source_message = source_path_list[0]
|
474
|
+
|
475
|
+
log_path = logging.generate_tmp_logging_file_path(
|
476
|
+
constants._STORAGE_LOG_FILE_NAME
|
477
|
+
)
|
478
|
+
sync_path = f'{source_message} -> gs://{self.name}{sub_path}/'
|
479
|
+
with rich_utils.safe_status(
|
480
|
+
ux_utils.spinner_message(f'Syncing {sync_path}', log_path=log_path)
|
481
|
+
):
|
482
|
+
data_utils.parallel_upload(
|
483
|
+
source_path_list,
|
484
|
+
get_file_sync_command,
|
485
|
+
get_dir_sync_command,
|
486
|
+
log_path,
|
487
|
+
self.name,
|
488
|
+
self._ACCESS_DENIED_MESSAGE,
|
489
|
+
create_dirs=create_dirs,
|
490
|
+
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS,
|
491
|
+
)
|
492
|
+
logger.info(
|
493
|
+
ux_utils.finishing_message(f'Storage synced: {sync_path}', log_path)
|
494
|
+
)
|
495
|
+
|
496
|
+
def _get_bucket(self) -> Tuple['constants.StorageHandle', bool]:
|
497
|
+
"""Obtains the GCS bucket.
|
498
|
+
If the bucket exists, this method will connect to the bucket.
|
499
|
+
|
500
|
+
If the bucket does not exist, there are three cases:
|
501
|
+
1) Raise an error if the bucket source starts with gs://
|
502
|
+
2) Return None if bucket has been externally deleted and
|
503
|
+
sync_on_reconstruction is False
|
504
|
+
3) Create and return a new bucket otherwise
|
505
|
+
|
506
|
+
Raises:
|
507
|
+
StorageSpecError: If externally created bucket is attempted to be
|
508
|
+
mounted without specifying storage source.
|
509
|
+
StorageBucketCreateError: If creating the bucket fails
|
510
|
+
StorageBucketGetError: If fetching a bucket fails
|
511
|
+
StorageExternalDeletionError: If externally deleted storage is
|
512
|
+
attempted to be fetched while reconstructing the storage for
|
513
|
+
'sky storage delete' or 'sky start'
|
514
|
+
"""
|
515
|
+
try:
|
516
|
+
bucket = self.client.get_bucket(self.name)
|
517
|
+
self._validate_existing_bucket()
|
518
|
+
return bucket, False
|
519
|
+
except gcp.not_found_exception() as e:
|
520
|
+
if isinstance(self.source, str) and self.source.startswith('gs://'):
|
521
|
+
with ux_utils.print_exception_no_traceback():
|
522
|
+
raise exceptions.StorageBucketGetError(
|
523
|
+
'Attempted to use a non-existent bucket as a source: '
|
524
|
+
f'{self.source}'
|
525
|
+
) from e
|
526
|
+
else:
|
527
|
+
# If bucket cannot be found (i.e., does not exist), it is to be
|
528
|
+
# created by Sky. However, creation is skipped if Store object
|
529
|
+
# is being reconstructed for deletion or re-mount with
|
530
|
+
# sky start, and error is raised instead.
|
531
|
+
if self.sync_on_reconstruction:
|
532
|
+
bucket = self._create_gcs_bucket(self.name, self.region)
|
533
|
+
return bucket, True
|
534
|
+
else:
|
535
|
+
# This is raised when Storage object is reconstructed for
|
536
|
+
# sky storage delete or to re-mount Storages with sky start
|
537
|
+
# but the storage is already removed externally.
|
538
|
+
raise exceptions.StorageExternalDeletionError(
|
539
|
+
'Attempted to fetch a non-existent bucket: ' f'{self.name}'
|
540
|
+
) from e
|
541
|
+
except gcp.forbidden_exception():
|
542
|
+
# Try public bucket to see if bucket exists
|
543
|
+
logger.info('External Bucket detected; Connecting to external bucket...')
|
544
|
+
try:
|
545
|
+
a_client = gcp.anonymous_storage_client()
|
546
|
+
bucket = a_client.bucket(self.name)
|
547
|
+
# Check if bucket can be listed/read from
|
548
|
+
next(bucket.list_blobs())
|
549
|
+
return bucket, False
|
550
|
+
except (gcp.not_found_exception(), ValueError) as e:
|
551
|
+
command = f'gsutil ls gs://{self.name}'
|
552
|
+
with ux_utils.print_exception_no_traceback():
|
553
|
+
raise exceptions.StorageBucketGetError(
|
554
|
+
f'Bucket {self.name} does not exist.'
|
555
|
+
+ f' To debug, consider running `{command}`.'
|
556
|
+
) from e
|
557
|
+
|
558
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
559
|
+
"""Downloads file from remote to local on GS bucket
|
560
|
+
|
561
|
+
Args:
|
562
|
+
remote_path: str; Remote path on GS bucket
|
563
|
+
local_path: str; Local path on user's device
|
564
|
+
"""
|
565
|
+
blob = self.bucket.blob(remote_path)
|
566
|
+
blob.download_to_filename(local_path, timeout=None)
|
567
|
+
|
568
|
+
def _create_gcs_bucket(
|
569
|
+
self, bucket_name: str, region='us-central1'
|
570
|
+
) -> 'constants.StorageHandle':
|
571
|
+
"""Creates GCS bucket with specific name in specific region
|
572
|
+
|
573
|
+
Args:
|
574
|
+
bucket_name: str; Name of bucket
|
575
|
+
region: str; Region name, e.g. us-central1, us-west1
|
576
|
+
"""
|
577
|
+
try:
|
578
|
+
bucket = self.client.bucket(bucket_name)
|
579
|
+
bucket.storage_class = 'STANDARD'
|
580
|
+
new_bucket = self.client.create_bucket(bucket, location=region)
|
581
|
+
except Exception as e: # pylint: disable=broad-except
|
582
|
+
with ux_utils.print_exception_no_traceback():
|
583
|
+
raise exceptions.StorageBucketCreateError(
|
584
|
+
f'Attempted to create a bucket {self.name} but failed.'
|
585
|
+
) from e
|
586
|
+
logger.info(
|
587
|
+
f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
|
588
|
+
f'{new_bucket.location} with storage class '
|
589
|
+
f'{new_bucket.storage_class}{colorama.Style.RESET_ALL}'
|
590
|
+
)
|
591
|
+
return new_bucket
|
592
|
+
|
593
|
+
def _delete_gcs_bucket(self, bucket_name: str) -> bool:
|
594
|
+
"""Deletes GCS bucket, including all objects in bucket
|
595
|
+
|
596
|
+
Args:
|
597
|
+
bucket_name: str; Name of bucket
|
598
|
+
|
599
|
+
Returns:
|
600
|
+
bool; True if bucket was deleted, False if it was deleted externally.
|
601
|
+
"""
|
602
|
+
|
603
|
+
with rich_utils.safe_status(
|
604
|
+
ux_utils.spinner_message(f'Deleting GCS bucket [green]{bucket_name}')
|
605
|
+
):
|
606
|
+
try:
|
607
|
+
self.client.get_bucket(bucket_name)
|
608
|
+
except gcp.forbidden_exception() as e:
|
609
|
+
# Try public bucket to see if bucket exists
|
610
|
+
with ux_utils.print_exception_no_traceback():
|
611
|
+
raise PermissionError(
|
612
|
+
'External Bucket detected. User not allowed to delete '
|
613
|
+
'external bucket.'
|
614
|
+
) from e
|
615
|
+
except gcp.not_found_exception():
|
616
|
+
# If bucket does not exist, it may have been deleted externally.
|
617
|
+
# Do a no-op in that case.
|
618
|
+
logger.debug(f'Bucket {bucket_name} does not exist.')
|
619
|
+
return False
|
620
|
+
try:
|
621
|
+
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
622
|
+
remove_obj_command = (
|
623
|
+
f'{alias_gen};{gsutil_alias} ' f'rm -r gs://{bucket_name}'
|
624
|
+
)
|
625
|
+
subprocess.check_output(
|
626
|
+
remove_obj_command,
|
627
|
+
stderr=subprocess.STDOUT,
|
628
|
+
shell=True,
|
629
|
+
executable='/bin/bash',
|
630
|
+
)
|
631
|
+
return True
|
632
|
+
except subprocess.CalledProcessError as e:
|
633
|
+
with ux_utils.print_exception_no_traceback():
|
634
|
+
raise exceptions.StorageBucketDeleteError(
|
635
|
+
f'Failed to delete GCS bucket {bucket_name}.'
|
636
|
+
f'Detailed error: {e.output}'
|
637
|
+
)
|
638
|
+
|
639
|
+
@classmethod
|
640
|
+
def _find_application_key_path(cls) -> str:
|
641
|
+
# Check the application default credentials in the environment variable.
|
642
|
+
# If the file does not exist, fallback to the default path.
|
643
|
+
application_key_path = os.environ.get(_GCP_APPLICATION_CREDENTIAL_ENV, None)
|
644
|
+
if application_key_path is not None:
|
645
|
+
if not os.path.isfile(os.path.expanduser(application_key_path)):
|
646
|
+
raise FileNotFoundError(
|
647
|
+
f'{_GCP_APPLICATION_CREDENTIAL_ENV}={application_key_path},'
|
648
|
+
' but the file does not exist.'
|
649
|
+
)
|
650
|
+
return application_key_path
|
651
|
+
if not os.path.isfile(
|
652
|
+
os.path.expanduser(DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH)
|
653
|
+
):
|
654
|
+
# Fallback to the default application credential path.
|
655
|
+
raise FileNotFoundError(DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH)
|
656
|
+
return DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH
|
657
|
+
|
658
|
+
@classmethod
|
659
|
+
def _get_identity_type(cls) -> Optional[GCPIdentityType]:
|
660
|
+
try:
|
661
|
+
account = cls.get_active_user_identity()
|
662
|
+
except exceptions.CloudUserIdentityError:
|
663
|
+
return None
|
664
|
+
if account is None:
|
665
|
+
return None
|
666
|
+
assert account is not None
|
667
|
+
if GCPIdentityType.SERVICE_ACCOUNT.value in account[0]:
|
668
|
+
return GCPIdentityType.SERVICE_ACCOUNT
|
669
|
+
return GCPIdentityType.SHARED_CREDENTIALS_FILE
|
670
|
+
|
671
|
+
@classmethod
|
672
|
+
def get_project_id(cls, dryrun: bool = False) -> str:
|
673
|
+
if dryrun:
|
674
|
+
return 'dryrun-project-id'
|
675
|
+
# pylint: disable=import-outside-toplevel
|
676
|
+
from google import auth # type: ignore
|
677
|
+
|
678
|
+
_, project_id = auth.default()
|
679
|
+
if project_id is None:
|
680
|
+
raise exceptions.CloudUserIdentityError(
|
681
|
+
'Failed to get GCP project id. Please make sure you have '
|
682
|
+
'run the following: \n'
|
683
|
+
f'{cls._INDENT_PREFIX}gcloud init; \n'
|
684
|
+
f'{cls._INDENT_PREFIX}gcloud auth application-default login'
|
685
|
+
)
|
686
|
+
return project_id
|
687
|
+
|
688
|
+
@classmethod
|
689
|
+
def get_user_identities(cls) -> List[List[str]]:
|
690
|
+
"""Returns the email address + project id of the active user."""
|
691
|
+
try:
|
692
|
+
account = _run_output(
|
693
|
+
'gcloud auth list --filter=status:ACTIVE ' '--format="value(account)"'
|
694
|
+
)
|
695
|
+
account = account.strip()
|
696
|
+
except subprocess.CalledProcessError as e:
|
697
|
+
with ux_utils.print_exception_no_traceback():
|
698
|
+
raise exceptions.CloudUserIdentityError(
|
699
|
+
f'Failed to get GCP user identity with unknown '
|
700
|
+
f'exception.\n'
|
701
|
+
' Reason: '
|
702
|
+
f'{common_utils.format_exception(e, use_bracket=True)}'
|
703
|
+
) from e
|
704
|
+
if not account:
|
705
|
+
with ux_utils.print_exception_no_traceback():
|
706
|
+
raise exceptions.CloudUserIdentityError(
|
707
|
+
'No GCP account is activated. Try running `gcloud '
|
708
|
+
'auth list --filter=status:ACTIVE '
|
709
|
+
'--format="value(account)"` and ensure it correctly '
|
710
|
+
'returns the current user.'
|
711
|
+
)
|
712
|
+
try:
|
713
|
+
project_id = cls.get_project_id()
|
714
|
+
except Exception as e: # pylint: disable=broad-except
|
715
|
+
with ux_utils.print_exception_no_traceback():
|
716
|
+
raise exceptions.CloudUserIdentityError(
|
717
|
+
f'Failed to get GCP user identity with unknown '
|
718
|
+
f'exception.\n'
|
719
|
+
' Reason: '
|
720
|
+
f'{common_utils.format_exception(e, use_bracket=True)}'
|
721
|
+
) from e
|
722
|
+
# TODO: Return a list of identities in the profile when we support
|
723
|
+
# automatic switching for GCP. Currently we only support one identity.
|
724
|
+
return [[f'{account} [project_id={project_id}]']]
|
725
|
+
|
726
|
+
def get_active_user_identity_str(cls) -> Optional[str]:
|
727
|
+
user_identity = cls.get_active_user_identity()
|
728
|
+
if user_identity is None:
|
729
|
+
return None
|
730
|
+
return user_identity[0].replace('\n', '')
|
731
|
+
|
732
|
+
@classmethod
|
733
|
+
def check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
734
|
+
"""
|
735
|
+
Check if the credentials are valid for GCS store.
|
736
|
+
"""
|
737
|
+
try:
|
738
|
+
# Check google-api-python-client installation.
|
739
|
+
import googleapiclient # noqa: F401
|
740
|
+
from google import auth # type: ignore
|
741
|
+
|
742
|
+
# Check the installation of google-cloud-sdk.
|
743
|
+
_run_output('gcloud --version')
|
744
|
+
except (ImportError, subprocess.CalledProcessError) as e:
|
745
|
+
return False, (
|
746
|
+
f'{cls._DEPENDENCY_HINT}\n'
|
747
|
+
f'{cls._INDENT_PREFIX}Credentials may also need to be set. '
|
748
|
+
f'{cls._CREDENTIAL_HINT}\n'
|
749
|
+
f'{cls._INDENT_PREFIX}Details: '
|
750
|
+
f'{common_utils.format_exception(e, use_bracket=True)}'
|
751
|
+
)
|
752
|
+
|
753
|
+
identity_type = cls._get_identity_type()
|
754
|
+
if identity_type == GCPIdentityType.SHARED_CREDENTIALS_FILE:
|
755
|
+
# This files are only required when using the shared credentials
|
756
|
+
# to access GCP. They are not required when using service account.
|
757
|
+
try:
|
758
|
+
# These files are required because they will be synced to remote
|
759
|
+
# VMs for `gsutil` to access private storage buckets.
|
760
|
+
# `auth.default()` does not guarantee these files exist.
|
761
|
+
for file in [
|
762
|
+
'~/.config/gcloud/access_tokens.db',
|
763
|
+
'~/.config/gcloud/credentials.db',
|
764
|
+
]:
|
765
|
+
if not os.path.isfile(os.path.expanduser(file)):
|
766
|
+
raise FileNotFoundError(file)
|
767
|
+
except FileNotFoundError as e:
|
768
|
+
return False, (
|
769
|
+
f'Credentails are not set. '
|
770
|
+
f'{cls._CREDENTIAL_HINT}\n'
|
771
|
+
f'{cls._INDENT_PREFIX}Details: '
|
772
|
+
f'{common_utils.format_exception(e, use_bracket=True)}'
|
773
|
+
)
|
774
|
+
|
775
|
+
try:
|
776
|
+
cls._find_application_key_path()
|
777
|
+
except FileNotFoundError as e:
|
778
|
+
return False, (
|
779
|
+
f'Application credentials are not set. '
|
780
|
+
f'{cls._APPLICATION_CREDENTIAL_HINT}\n'
|
781
|
+
f'{cls._INDENT_PREFIX}Details: '
|
782
|
+
f'{common_utils.format_exception(e, use_bracket=True)}'
|
783
|
+
)
|
784
|
+
|
785
|
+
try:
|
786
|
+
# Check if application default credentials are set.
|
787
|
+
project_id = cls.get_project_id()
|
788
|
+
|
789
|
+
# Check if the user is activated.
|
790
|
+
identity = cls.get_active_user_identity()
|
791
|
+
except (
|
792
|
+
auth.exceptions.DefaultCredentialsError,
|
793
|
+
exceptions.CloudUserIdentityError,
|
794
|
+
) as e:
|
795
|
+
# See also: https://stackoverflow.com/a/53307505/1165051
|
796
|
+
return False, (
|
797
|
+
'Getting project ID or user identity failed. You can debug '
|
798
|
+
'with `gcloud auth list`. To fix this, '
|
799
|
+
f'{cls._CREDENTIAL_HINT[0].lower()}'
|
800
|
+
f'{cls._CREDENTIAL_HINT[1:]}\n'
|
801
|
+
f'{cls._INDENT_PREFIX}Details: '
|
802
|
+
f'{common_utils.format_exception(e, use_bracket=True)}'
|
803
|
+
)
|
804
|
+
|
805
|
+
# Check APIs.
|
806
|
+
apis = (
|
807
|
+
('cloudresourcemanager', 'Cloud Resource Manager'),
|
808
|
+
('iam', 'Identity and Access Management (IAM)'),
|
809
|
+
('storage', 'Cloud Storage'),
|
810
|
+
)
|
811
|
+
enabled_api = False
|
812
|
+
for endpoint, display_name in apis:
|
813
|
+
if is_api_disabled(endpoint, project_id):
|
814
|
+
# For 'compute': ~55-60 seconds for the first run. If already
|
815
|
+
# enabled, ~1s. Other API endpoints take ~1-5s to enable.
|
816
|
+
if endpoint == 'compute':
|
817
|
+
suffix = ' (free of charge; this may take a minute)'
|
818
|
+
else:
|
819
|
+
suffix = ' (free of charge)'
|
820
|
+
print(f'\nEnabling {display_name} API{suffix}...')
|
821
|
+
t1 = time.time()
|
822
|
+
proc = subprocess.run(
|
823
|
+
f'gcloud services enable {endpoint}.googleapis.com '
|
824
|
+
f'--project {project_id}',
|
825
|
+
check=False,
|
826
|
+
shell=True,
|
827
|
+
stdout=subprocess.PIPE,
|
828
|
+
stderr=subprocess.STDOUT,
|
829
|
+
)
|
830
|
+
if proc.returncode == 0:
|
831
|
+
enabled_api = True
|
832
|
+
print(f'Done. Took {time.time() - t1:.1f} secs.')
|
833
|
+
|
834
|
+
if enabled_api:
|
835
|
+
print(
|
836
|
+
'\nHint: Enabled GCP API(s) may take a few minutes to take '
|
837
|
+
'effect. If any Konduktor commands/calls failed, retry after '
|
838
|
+
'some time.'
|
839
|
+
)
|
840
|
+
|
841
|
+
# noqa: F401
|
842
|
+
import google.auth
|
843
|
+
|
844
|
+
# This takes user's credential info from "~/.config/gcloud/application_default_credentials.json". # noqa: E501
|
845
|
+
credentials, project = google.auth.default()
|
846
|
+
crm = gcp.build(
|
847
|
+
'cloudresourcemanager', 'v1', credentials=credentials, cache_discovery=False
|
848
|
+
)
|
849
|
+
gcp_minimal_permissions = utils.get_minimal_permissions()
|
850
|
+
permissions = {'permissions': gcp_minimal_permissions}
|
851
|
+
request = crm.projects().testIamPermissions(resource=project, body=permissions)
|
852
|
+
with ux_utils.print_exception_no_traceback():
|
853
|
+
ret_permissions = request.execute().get('permissions', [])
|
854
|
+
diffs = set(gcp_minimal_permissions).difference(set(ret_permissions))
|
855
|
+
if diffs:
|
856
|
+
identity_str = identity[0] if identity else None
|
857
|
+
return False, (
|
858
|
+
'The following permissions are not enabled for the current '
|
859
|
+
f'GCP identity ({identity_str}):\n '
|
860
|
+
f'{diffs}\n '
|
861
|
+
'For more details, visit: https://konduktor.readthedocs.io//en/latest/cloud-setup/cloud-permissions/gcp.html'
|
862
|
+
) # noqa: E501
|
863
|
+
logger.info(
|
864
|
+
f'GCP credentials are valid '
|
865
|
+
f'for the current identity {logging.CHECK_MARK_EMOJI}'
|
866
|
+
)
|
867
|
+
logger.info('Creating k8s secret with GCP credentials...')
|
868
|
+
set_ok, result = cls.set_secret_credentials()
|
869
|
+
if not set_ok:
|
870
|
+
logger.error(f'Failed to create k8s secret with GCP credentials: {result}')
|
871
|
+
return False, result
|
872
|
+
return True, None
|
873
|
+
|
874
|
+
@classmethod
|
875
|
+
def set_secret_credentials(cls) -> Tuple[bool, Optional[str]]:
|
876
|
+
"""
|
877
|
+
Set the k8s secret storing the GCP credentials
|
878
|
+
"""
|
879
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
880
|
+
namespace = kubernetes_utils.get_kube_config_context_namespace()
|
881
|
+
credentials_dir = os.environ.get('CLOUDSDK_CONFIG', DEFAULT_GCP_CREDENTIALS_DIR)
|
882
|
+
credentials_files = [
|
883
|
+
os.path.expanduser(os.path.join(credentials_dir, f))
|
884
|
+
for f in _CREDENTIAL_FILES
|
885
|
+
]
|
886
|
+
ok, result = kubernetes_utils.set_secret(
|
887
|
+
secret_name=cls._GCP_SECRET_NAME,
|
888
|
+
namespace=namespace,
|
889
|
+
context=context,
|
890
|
+
secret_key=cls._GCP_CREDENTIALS_KEY,
|
891
|
+
secret_value=base64_utils.zip_base64encode(credentials_files),
|
892
|
+
)
|
893
|
+
if not ok:
|
894
|
+
logger.error(f'Failed to set GCP credentials in k8s secret: \n{result}')
|
895
|
+
return False, result
|
896
|
+
else:
|
897
|
+
logger.info(
|
898
|
+
f'GCP credentials set in k8s secret: {cls._GCP_SECRET_NAME} '
|
899
|
+
f'in namespace {namespace} in context {context} '
|
900
|
+
f'{logging.CHECK_MARK_EMOJI}'
|
901
|
+
)
|
902
|
+
return True, None
|
903
|
+
|
904
|
+
@classmethod
|
905
|
+
def get_k8s_credential_name(cls) -> str:
|
906
|
+
return cls._GCP_SECRET_NAME
|