konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1138 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Amazon Web Services (AWS) S3 Storage."""
14
+
15
+ import enum
16
+ import hashlib
17
+ import os
18
+ import re
19
+ import shlex
20
+ import subprocess
21
+ import tempfile
22
+ import time
23
+ import typing
24
+ import uuid
25
+ from typing import Any, Dict, List, Optional, Tuple
26
+
27
+ import colorama
28
+
29
+ from konduktor import config, logging
30
+ from konduktor.adaptors import aws
31
+ from konduktor.adaptors.aws import boto3
32
+ from konduktor.backends import constants as backend_constants
33
+ from konduktor.data import constants, data_utils, storage_utils
34
+ from konduktor.utils import (
35
+ annotations,
36
+ base64_utils,
37
+ common_utils,
38
+ exceptions,
39
+ kubernetes_utils,
40
+ rich_utils,
41
+ ux_utils,
42
+ )
43
+
44
+ if typing.TYPE_CHECKING:
45
+ from konduktor.adaptors.aws import boto3
46
+
47
+ logger = logging.get_logger(__name__)
48
+
49
+ # Maximum number of concurrent rsync upload processes
50
+ _MAX_CONCURRENT_UPLOADS = 32
51
+
52
+ _CREDENTIAL_FILES = ['credentials', 'config']
53
+
54
+ AWS_SECRET_NAME = 'awscredentials'
55
+ AWS_CREDENTIALS_KEY = 'awscredentials'
56
+
57
+ DEFAULT_AWS_CREDENTIALS_DIR = '~/.aws/'
58
+ DEFAULT_AWS_CREDENTIAL_PATH = '~/.aws/credentials'
59
+ DEFAULT_AWS_CONFIG_PATH = '~/.aws/config'
60
+
61
+ _LOCK_PATH = '~/.konduktor/s3_storage.lock'
62
+
63
+
64
+ class AWSIdentityType(enum.Enum):
65
+ """AWS identity type.
66
+
67
+ The account type is determined by the current user identity, based on `aws
68
+ configure list`. We will check the existence of the value in the output of
69
+ `aws configure list` to determine the account type.
70
+ """
71
+
72
+ # Name Value Type Location
73
+ # ---- ----- ---- --------
74
+ # profile 1234 env ...
75
+ # access_key ****************abcd sso
76
+ # secret_key ****************abcd sso
77
+ # region <not set> None None
78
+ SSO = 'sso'
79
+ ENV = 'env'
80
+ IAM_ROLE = 'iam-role'
81
+ CONTAINER_ROLE = 'container-role'
82
+ CUSTOM_PROCESS = 'custom-process'
83
+ ASSUME_ROLE = 'assume-role'
84
+
85
+ # Name Value Type Location
86
+ # ---- ----- ---- --------
87
+ # profile <not set> None None
88
+ # access_key ****************abcd shared-credentials-file
89
+ # secret_key ****************abcd shared-credentials-file
90
+ # region us-east-1 config-file ~/.aws/config
91
+ SHARED_CREDENTIALS_FILE = 'shared-credentials-file'
92
+
93
+ # IN GCS.PY
94
+ def can_credential_expire(self) -> bool:
95
+ """Check if the AWS identity type can expire.
96
+
97
+ SSO,IAM_ROLE and CONTAINER_ROLE are temporary credentials and refreshed
98
+ automatically. ENV and SHARED_CREDENTIALS_FILE are short-lived
99
+ credentials without refresh.
100
+ IAM ROLE:
101
+ https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
102
+ SSO/Container-role refresh token:
103
+ https://docs.aws.amazon.com/solutions/latest/dea-api/auth-refreshtoken.html
104
+ """
105
+ # TODO(hong): Add a CLI based check for the expiration of the temporary
106
+ # credentials
107
+ expirable_types = {AWSIdentityType.ENV, AWSIdentityType.SHARED_CREDENTIALS_FILE}
108
+ return self in expirable_types
109
+
110
+
111
+ class S3Store(storage_utils.AbstractStore):
112
+ """S3Store inherits from Storage Object and represents the backend
113
+ for S3 buckets.
114
+ """
115
+
116
+ # k8s secret name for aws credentials
117
+ _AWS_SECRET_NAME = f'{AWS_SECRET_NAME}-{common_utils.user_and_hostname_hash()}'
118
+ _AWS_CREDENTIALS_KEY = AWS_CREDENTIALS_KEY
119
+
120
+ _DEFAULT_REGION = 'us-east-1'
121
+ _ACCESS_DENIED_MESSAGE = 'Access Denied'
122
+ _CUSTOM_ENDPOINT_REGIONS = [
123
+ 'ap-east-1',
124
+ 'me-south-1',
125
+ 'af-south-1',
126
+ 'eu-south-1',
127
+ 'eu-south-2',
128
+ 'ap-south-2',
129
+ 'ap-southeast-3',
130
+ 'ap-southeast-4',
131
+ 'me-central-1',
132
+ 'il-central-1',
133
+ ]
134
+
135
+ _INDENT_PREFIX = ' '
136
+
137
+ _STATIC_CREDENTIAL_HELP_STR = (
138
+ 'Run the following commands:'
139
+ f'\n{_INDENT_PREFIX} $ aws configure'
140
+ f'\n{_INDENT_PREFIX} $ aws configure list '
141
+ '# Ensure that this shows identity is set.'
142
+ f'\n{_INDENT_PREFIX}For more info: '
143
+ 'https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html' # pylint: disable=line-too-long
144
+ )
145
+
146
+ _REPR = 'S3Store'
147
+
148
+ def __init__(
149
+ self,
150
+ name: str,
151
+ source: str,
152
+ region: Optional[str] = _DEFAULT_REGION,
153
+ is_sky_managed: Optional[bool] = False,
154
+ sync_on_reconstruction: Optional[bool] = True,
155
+ _bucket_sub_path: Optional[str] = None,
156
+ ):
157
+ self.client: 'boto3.client.Client' # type: ignore[name-defined]
158
+ self.bucket: 'constants.StorageHandle'
159
+ if region in self._CUSTOM_ENDPOINT_REGIONS:
160
+ logger.warning(
161
+ 'AWS opt-in regions are not supported for S3. '
162
+ f'Falling back to default region '
163
+ f'{self._DEFAULT_REGION} for bucket {name!r}.'
164
+ )
165
+ region = self._DEFAULT_REGION
166
+ super().__init__(
167
+ name,
168
+ source,
169
+ region,
170
+ is_sky_managed,
171
+ sync_on_reconstruction,
172
+ _bucket_sub_path,
173
+ )
174
+
175
+ def __repr__(self):
176
+ return self._REPR
177
+
178
+ # IN GCS.PY
179
+ def _validate(self):
180
+ if self.source is not None and isinstance(self.source, str):
181
+ if self.source.startswith('s3://'):
182
+ assert self.name == data_utils.split_s3_path(self.source)[0], (
183
+ 'S3 Bucket is specified as path, the name should be the'
184
+ ' same as S3 bucket.'
185
+ )
186
+ assert data_utils.verify_s3_bucket(self.name), (
187
+ f'Source specified as {self.source}, an S3 bucket. ',
188
+ 'S3 Bucket should exist.',
189
+ )
190
+ # if self.source.startswith('gs://'):
191
+ # assert self.name == data_utils.split_gcs_path(self.source)[0], (
192
+ # 'GCS Bucket is specified as path, the name should be '
193
+ # 'the same as GCS bucket.'
194
+ # )
195
+ # elif data_utils.is_az_container_endpoint(self.source):
196
+ # storage_account_name, container_name, _ = (
197
+ # data_utils.split_az_path(self.source))
198
+ # assert self.name == container_name, (
199
+ # 'Azure bucket is specified as path, the name should be '
200
+ # 'the same as Azure bucket.')
201
+ # assert data_utils.verify_az_bucket(
202
+ # storage_account_name, self.name), (
203
+ # f'Source specified as {self.source}, an Azure bucket. '
204
+ # 'Azure bucket should exist.')
205
+ # elif self.source.startswith('r2://'):
206
+ # assert self.name == data_utils.split_r2_path(self.source)[0], (
207
+ # 'R2 Bucket is specified as path, the name should be '
208
+ # 'the same as R2 bucket.')
209
+ # assert data_utils.verify_r2_bucket(self.name), (
210
+ # f'Source specified as {self.source}, a R2 bucket. ',
211
+ # 'R2 Bucket should exist.')
212
+ # elif self.source.startswith('cos://'):
213
+ # assert self.name == data_utils.split_cos_path(self.source)[0], (
214
+ # 'COS Bucket is specified as path, the name should be '
215
+ # 'the same as COS bucket.')
216
+ # assert data_utils.verify_ibm_cos_bucket(self.name), (
217
+ # f'Source specified as {self.source}, a COS bucket. ',
218
+ # 'COS Bucket should exist.')
219
+ # Validate name
220
+ self.name = self.validate_name(self.name)
221
+
222
+ # IN GCS.PY
223
+ @classmethod
224
+ def validate_name(cls, name: str) -> str:
225
+ """Validates the name of the S3 store.
226
+
227
+ Source for rules:
228
+ https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
229
+ """
230
+
231
+ def _raise_no_traceback_name_error(err_str):
232
+ with ux_utils.print_exception_no_traceback():
233
+ raise exceptions.StorageNameError(err_str)
234
+
235
+ if name is not None and isinstance(name, str):
236
+ # Check for overall length
237
+ if not 3 <= len(name) <= 63:
238
+ _raise_no_traceback_name_error(
239
+ f'Invalid store name: name {name} must be between 3 (min) '
240
+ 'and 63 (max) characters long.'
241
+ )
242
+
243
+ # Check for valid characters and start/end with a number or letter
244
+ pattern = r'^[a-z0-9][-a-z0-9._]*[a-z0-9]$'
245
+ if not re.match(pattern, name):
246
+ _raise_no_traceback_name_error(
247
+ f'Invalid store name: name {name} can consist only of '
248
+ 'lowercase letters, numbers, dots (.), and hyphens (-). '
249
+ 'It must begin and end with a letter or number.'
250
+ )
251
+
252
+ # Check for two adjacent periods
253
+ if '..' in name:
254
+ _raise_no_traceback_name_error(
255
+ f'Invalid store name: name {name} must not contain '
256
+ 'two adjacent periods.'
257
+ )
258
+
259
+ # Check for IP address format
260
+ ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
261
+ if re.match(ip_pattern, name):
262
+ _raise_no_traceback_name_error(
263
+ f'Invalid store name: name {name} must not be formatted as '
264
+ 'an IP address (for example, 192.168.5.4).'
265
+ )
266
+
267
+ # Check for 'xn--' prefix
268
+ if name.startswith('xn--'):
269
+ _raise_no_traceback_name_error(
270
+ f'Invalid store name: name {name} must not start with the '
271
+ 'prefix "xn--".'
272
+ )
273
+
274
+ # Check for '-s3alias' suffix
275
+ if name.endswith('-s3alias'):
276
+ _raise_no_traceback_name_error(
277
+ f'Invalid store name: name {name} must not end with the '
278
+ 'suffix "-s3alias".'
279
+ )
280
+
281
+ # Check for '--ol-s3' suffix
282
+ if name.endswith('--ol-s3'):
283
+ _raise_no_traceback_name_error(
284
+ f'Invalid store name: name {name} must not end with the '
285
+ 'suffix "--ol-s3".'
286
+ )
287
+ else:
288
+ _raise_no_traceback_name_error('Store name must be specified.')
289
+ return name
290
+
291
+ # IN GCS.PY
292
+ def initialize(self):
293
+ """Initializes the S3 store object on the cloud.
294
+
295
+ Initialization involves fetching bucket if exists, or creating it if
296
+ it does not.
297
+
298
+ Raises:
299
+ StorageBucketCreateError: If bucket creation fails
300
+ StorageBucketGetError: If fetching existing bucket fails
301
+ StorageInitError: If general initialization fails.
302
+ """
303
+ self.client = data_utils.create_s3_client(self.region)
304
+ self.bucket, is_new_bucket = self._get_bucket()
305
+ if self.is_sky_managed is None:
306
+ # If is_sky_managed is not specified, then this is a new storage
307
+ # object (i.e., did not exist in global_user_state) and we should
308
+ # set the is_sky_managed property.
309
+ # If is_sky_managed is specified, then we take no action.
310
+ self.is_sky_managed = is_new_bucket
311
+
312
+ # IN GCS.PY
313
+ def upload(self):
314
+ """Uploads source to store bucket.
315
+
316
+ Upload must be called by the Storage handler - it is not called on
317
+ Store initialization.
318
+
319
+ Raises:
320
+ StorageUploadError: if upload fails.
321
+ """
322
+ try:
323
+ if isinstance(self.source, list):
324
+ self.batch_aws_rsync(self.source, create_dirs=True)
325
+ elif self.source is not None:
326
+ if self.source.startswith('s3://'):
327
+ pass
328
+ # elif self.source.startswith('gs://'):
329
+ # self._transfer_to_s3()
330
+ # elif self.source.startswith('r2://'):
331
+ # self._transfer_to_s3()
332
+ else:
333
+ self.batch_aws_rsync([self.source])
334
+ except exceptions.StorageUploadError:
335
+ raise
336
+ except Exception as e:
337
+ raise exceptions.StorageUploadError(
338
+ f'Upload failed for store {self.name}'
339
+ ) from e
340
+
341
+ # IN GCS.PY
342
+ def delete(self) -> None:
343
+ deleted_by_skypilot = self._delete_s3_bucket(self.name)
344
+ if deleted_by_skypilot:
345
+ msg_str = f'Deleted S3 bucket {self.name}.'
346
+ else:
347
+ msg_str = (
348
+ f'S3 bucket {self.name} may have been deleted '
349
+ f'externally. Removing from local state.'
350
+ )
351
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}' f'{colorama.Style.RESET_ALL}')
352
+
353
+ # IN GCS.PY
354
+ def get_handle(self) -> 'constants.StorageHandle':
355
+ return aws.resource('s3').Bucket(self.name)
356
+
357
+ # FROM data/storage.py but matches GCS.PY batch_gsutil_rsync() (s3 specific)
358
+ def batch_aws_rsync(
359
+ self, source_path_list: List['constants.Path'], create_dirs: bool = False
360
+ ) -> None:
361
+ """Invokes aws s3 sync to batch upload a list of local paths to S3
362
+
363
+ AWS Sync by default uses 10 threads to upload files to the bucket. To
364
+ increase parallelism, modify max_concurrent_requests in your aws config
365
+ file (Default path: ~/.aws/config).
366
+
367
+ Since aws s3 sync does not support batch operations, we construct
368
+ multiple commands to be run in parallel.
369
+
370
+ Args:
371
+ source_path_list: List of paths to local files or directories
372
+ create_dirs: If the local_path is a directory and this is set to
373
+ False, the contents of the directory are directly uploaded to
374
+ root of the bucket. If the local_path is a directory and this is
375
+ set to True, the directory is created in the bucket root and
376
+ contents are uploaded to it.
377
+ """
378
+ sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
379
+
380
+ def get_file_sync_command(base_dir_path, file_names):
381
+ includes = ' '.join(
382
+ [f'--include {shlex.quote(file_name)}' for file_name in file_names]
383
+ )
384
+ base_dir_path = shlex.quote(base_dir_path)
385
+ sync_command = (
386
+ 'aws s3 sync --no-follow-symlinks --exclude="*" '
387
+ f'{includes} {base_dir_path} '
388
+ f's3://{self.name}{sub_path}'
389
+ )
390
+ return sync_command
391
+
392
+ def get_dir_sync_command(src_dir_path, dest_dir_name):
393
+ # we exclude .git directory from the sync
394
+ excluded_list = storage_utils.get_excluded_files(src_dir_path)
395
+ excluded_list.append('.git/*')
396
+ excludes = ' '.join(
397
+ [f'--exclude {shlex.quote(file_name)}' for file_name in excluded_list]
398
+ )
399
+ src_dir_path = shlex.quote(src_dir_path)
400
+ sync_command = (
401
+ f'aws s3 sync --no-follow-symlinks {excludes} '
402
+ f'{src_dir_path} '
403
+ f's3://{self.name}{sub_path}/{dest_dir_name}'
404
+ )
405
+ return sync_command
406
+
407
+ # Generate message for upload
408
+ if len(source_path_list) > 1:
409
+ source_message = f'{len(source_path_list)} paths'
410
+ else:
411
+ source_message = source_path_list[0]
412
+
413
+ log_path = logging.generate_tmp_logging_file_path(
414
+ constants._STORAGE_LOG_FILE_NAME
415
+ )
416
+ sync_path = f'{source_message} -> s3://{self.name}{sub_path}/'
417
+ with rich_utils.safe_status(
418
+ ux_utils.spinner_message(f'Syncing {sync_path}', log_path=log_path)
419
+ ):
420
+ data_utils.parallel_upload(
421
+ source_path_list,
422
+ get_file_sync_command,
423
+ get_dir_sync_command,
424
+ log_path,
425
+ self.name,
426
+ self._ACCESS_DENIED_MESSAGE,
427
+ create_dirs=create_dirs,
428
+ max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS,
429
+ )
430
+ logger.info(
431
+ ux_utils.finishing_message(f'Storage synced: {sync_path}', log_path)
432
+ )
433
+
434
+ # IN GCS.PY
435
+ def _get_bucket(self) -> Tuple['constants.StorageHandle', bool]:
436
+ """Obtains the S3 bucket.
437
+
438
+ If the bucket exists, this method will return the bucket.
439
+ If the bucket does not exist, there are three cases:
440
+ 1) Raise an error if the bucket source starts with s3://
441
+ 2) Return None if bucket has been externally deleted and
442
+ sync_on_reconstruction is False
443
+ 3) Create and return a new bucket otherwise
444
+
445
+ Raises:
446
+ StorageSpecError: If externally created bucket is attempted to be
447
+ mounted without specifying storage source.
448
+ StorageBucketCreateError: If creating the bucket fails
449
+ StorageBucketGetError: If fetching a bucket fails
450
+ StorageExternalDeletionError: If externally deleted storage is
451
+ attempted to be fetched while reconstructing the storage for
452
+ 'sky storage delete' or 'sky start'
453
+ """
454
+ s3 = aws.resource('s3')
455
+ bucket = s3.Bucket(self.name)
456
+
457
+ try:
458
+ # Try Public bucket case.
459
+ # This line does not error out if the bucket is an external public
460
+ # bucket or if it is a user's bucket that is publicly
461
+ # accessible.
462
+ self.client.head_bucket(Bucket=self.name)
463
+ self._validate_existing_bucket()
464
+ return bucket, False
465
+ except aws.botocore_exceptions().ClientError as e:
466
+ error_code = e.response['Error']['Code']
467
+ # AccessDenied error for buckets that are private and not owned by
468
+ # user.
469
+ if error_code == '403':
470
+ command = f'aws s3 ls {self.name}'
471
+ with ux_utils.print_exception_no_traceback():
472
+ raise exceptions.StorageBucketGetError(
473
+ f'Bucket {self.name} does not exist.'
474
+ + f' To debug, consider running `{command}`.'
475
+ ) from e
476
+ # Bucket already exists but we tried to create it. Continue
477
+ elif error_code == '409':
478
+ command = f'aws s3 ls {self.name}'
479
+ logger.info(
480
+ f'Bucket {self.name} already exists. Skipping '
481
+ f'creation. To check, consider running `{command}`'
482
+ )
483
+
484
+ if isinstance(self.source, str) and self.source.startswith('s3://'):
485
+ with ux_utils.print_exception_no_traceback():
486
+ raise exceptions.StorageBucketGetError(
487
+ 'Attempted to use a non-existent bucket as a source: '
488
+ f'{self.source}. Consider using `aws s3 ls '
489
+ f'{self.source}` to debug.'
490
+ )
491
+
492
+ # If bucket cannot be found in both private and public settings,
493
+ # the bucket is to be created by Sky. However, creation is skipped if
494
+ # Store object is being reconstructed for deletion or re-mount with
495
+ # sky start, and error is raised instead.
496
+ if self.sync_on_reconstruction:
497
+ bucket = self._create_s3_bucket(self.name, self.region)
498
+ return bucket, True
499
+ else:
500
+ # Raised when Storage object is reconstructed for sky storage
501
+ # delete or to re-mount Storages with sky start but the storage
502
+ # is already removed externally.
503
+ raise exceptions.StorageExternalDeletionError(
504
+ 'Attempted to fetch a non-existent bucket: ' f'{self.name}'
505
+ )
506
+
507
+ # IN GCS.PY
508
+ def _download_file(self, remote_path: str, local_path: str) -> None:
509
+ """Downloads file from remote to local on s3 bucket
510
+ using the boto3 API
511
+
512
+ Args:
513
+ remote_path: str; Remote path on S3 bucket
514
+ local_path: str; Local path on user's device
515
+ """
516
+ self.bucket.download_file(remote_path, local_path)
517
+
518
+ # IN GCS.PY
519
+ def _create_s3_bucket(
520
+ self, bucket_name: str, region=_DEFAULT_REGION
521
+ ) -> 'constants.StorageHandle':
522
+ """Creates S3 bucket with specific name in specific region
523
+
524
+ Args:
525
+ bucket_name: str; Name of bucket
526
+ region: str; Region name, e.g. us-west-1, us-east-2
527
+ Raises:
528
+ StorageBucketCreateError: If bucket creation fails.
529
+ """
530
+ s3_client = self.client
531
+ try:
532
+ create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
533
+ # If default us-east-1 region of create_bucket API is used,
534
+ # the LocationConstraint must not be specified.
535
+ # Reference: https://stackoverflow.com/a/51912090
536
+ if region is not None and region != 'us-east-1':
537
+ create_bucket_config['CreateBucketConfiguration'] = {
538
+ 'LocationConstraint': region
539
+ }
540
+ s3_client.create_bucket(**create_bucket_config)
541
+ logger.info(
542
+ f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
543
+ f'{region or "us-east-1"}{colorama.Style.RESET_ALL}'
544
+ )
545
+
546
+ # Add AWS tags configured in config.yaml to the bucket.
547
+ # This is useful for cost tracking and external cleanup.
548
+ bucket_tags = config.get_nested(('aws', 'labels'), {})
549
+ if bucket_tags:
550
+ s3_client.put_bucket_tagging(
551
+ Bucket=bucket_name,
552
+ Tagging={
553
+ 'TagSet': [
554
+ {'Key': k, 'Value': v} for k, v in bucket_tags.items()
555
+ ]
556
+ },
557
+ )
558
+
559
+ except aws.botocore_exceptions().ClientError as e:
560
+ with ux_utils.print_exception_no_traceback():
561
+ raise exceptions.StorageBucketCreateError(
562
+ f'Attempted to create a bucket {self.name} but failed.'
563
+ ) from e
564
+ return aws.resource('s3').Bucket(bucket_name)
565
+
566
+ # NOT IN GCS.PY but FROM data/storage.py (s3 specific)
567
+ def _execute_s3_remove_command(
568
+ self, command: str, bucket_name: str, hint_operating: str, hint_failed: str
569
+ ) -> bool:
570
+ try:
571
+ with rich_utils.safe_status(ux_utils.spinner_message(hint_operating)):
572
+ subprocess.check_output(command.split(' '), stderr=subprocess.STDOUT)
573
+ except subprocess.CalledProcessError as e:
574
+ if 'NoSuchBucket' in e.output.decode('utf-8'):
575
+ logger.debug(f'Bucket {bucket_name} does not exist.')
576
+ return False
577
+ else:
578
+ with ux_utils.print_exception_no_traceback():
579
+ raise exceptions.StorageBucketDeleteError(
580
+ f'{hint_failed}' f'Detailed error: {e.output}'
581
+ )
582
+ return True
583
+
584
+ # IN GCS.PY
585
+ def _delete_s3_bucket(self, bucket_name: str) -> bool:
586
+ """Deletes S3 bucket, including all objects in bucket
587
+
588
+ Args:
589
+ bucket_name: str; Name of bucket
590
+
591
+ Returns:
592
+ bool; True if bucket was deleted, False if it was deleted externally.
593
+
594
+ Raises:
595
+ StorageBucketDeleteError: If deleting the bucket fails.
596
+ """
597
+ # Deleting objects is very slow programatically
598
+ # (i.e. bucket.objects.all().delete() is slow).
599
+ # In addition, standard delete operations (i.e. via `aws s3 rm`)
600
+ # are slow, since AWS puts deletion markers.
601
+ # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
602
+ # The fastest way to delete is to run `aws s3 rb --force`,
603
+ # which removes the bucket by force.
604
+ remove_command = f'aws s3 rb s3://{bucket_name} --force'
605
+ success = self._execute_s3_remove_command(
606
+ remove_command,
607
+ bucket_name,
608
+ f'Deleting S3 bucket [green]{bucket_name}[/]',
609
+ f'Failed to delete S3 bucket {bucket_name}.',
610
+ )
611
+ if not success:
612
+ return False
613
+
614
+ # Wait until bucket deletion propagates on AWS servers
615
+ while data_utils.verify_s3_bucket(bucket_name):
616
+ time.sleep(0.1)
617
+ return True
618
+
619
+ # NOT IN GCS.PY but FROM data/storage.py (s3 specific)
620
+ def _delete_s3_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
621
+ """Deletes the sub path from the bucket."""
622
+ remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
623
+ return self._execute_s3_remove_command(
624
+ remove_command,
625
+ bucket_name,
626
+ f'Removing objects from S3 bucket ' f'[green]{bucket_name}/{sub_path}[/]',
627
+ f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.',
628
+ )
629
+
630
+ @classmethod
631
+ @annotations.lru_cache(scope='global', maxsize=1)
632
+ def _aws_configure_list(cls) -> Optional[bytes]:
633
+ proc = subprocess.run(
634
+ 'aws configure list',
635
+ shell=True,
636
+ check=False,
637
+ stdout=subprocess.PIPE,
638
+ stderr=subprocess.PIPE,
639
+ )
640
+ if proc.returncode != 0:
641
+ return None
642
+ return proc.stdout
643
+
644
+ @classmethod
645
+ def _sso_credentials_help_str(cls, expired: bool = False) -> str:
646
+ help_str = 'Run the following commands (must use AWS CLI v2):'
647
+ if not expired:
648
+ help_str += f'\n{cls._INDENT_PREFIX} $ aws configure sso'
649
+ help_str += (
650
+ f'\n{cls._INDENT_PREFIX} $ aws sso login --profile <profile_name>'
651
+ f'\n{cls._INDENT_PREFIX}For more info: '
652
+ 'https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-sso.html'
653
+ )
654
+ return help_str
655
+
656
+ @classmethod
657
+ @annotations.lru_cache(
658
+ scope='global', maxsize=1
659
+ ) # Cache since getting identity is slow.
660
+ def _sts_get_caller_identity(cls) -> Optional[List[List[str]]]:
661
+ try:
662
+ sts = aws.client('sts', check_credentials=False)
663
+ # The caller identity contains 3 fields: UserId, Account, Arn.
664
+ # 1. 'UserId' is unique across all AWS entity, which looks like
665
+ # "AROADBQP57FF2AEXAMPLE:role-session-name"
666
+ # 2. 'Account' can be shared by multiple users under the same
667
+ # organization
668
+ # 3. 'Arn' is the full path to the user, which can be reused when
669
+ # the user is deleted and recreated.
670
+ # Refer to: <https://docs.aws.amazon.com/cli/latest/reference/sts/get-caller-identity.html>
671
+ # and <https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_variables.html#principaltable>
672
+ user_info = sts.get_caller_identity()
673
+ # Allow fallback to AccountId if UserId does not match, because:
674
+ # 1. In the case where multiple IAM users belong a single root account,
675
+ # those users normally share the visibility of the VMs, so we do not
676
+ # need to identity them with each other. (There can be some cases,
677
+ # when an IAM user is given a limited permission by the admin, we may
678
+ # ignore that case for now, or print out a warning if the underlying
679
+ # userid changed for a cluster).
680
+ # 2. In the case where the multiple users belong to an organization,
681
+ # those users will have different account id, so fallback works.
682
+ user_ids = [user_info['UserId'], user_info['Account']]
683
+ except aws.botocore_exceptions().NoCredentialsError as e:
684
+ with ux_utils.print_exception_no_traceback():
685
+ raise exceptions.CloudUserIdentityError(
686
+ 'AWS credentials are not set. '
687
+ f'{cls._STATIC_CREDENTIAL_HELP_STR}\n'
688
+ f'{cls._INDENT_PREFIX}Details: `aws sts '
689
+ 'get-caller-identity` failed with error:'
690
+ f' {common_utils.format_exception(e, use_bracket=True)}.'
691
+ ) from None
692
+ except aws.botocore_exceptions().ClientError as e:
693
+ with ux_utils.print_exception_no_traceback():
694
+ raise exceptions.CloudUserIdentityError(
695
+ 'Failed to access AWS services with credentials. '
696
+ 'Make sure that the access and secret keys are correct.'
697
+ f' {cls._STATIC_CREDENTIAL_HELP_STR}\n'
698
+ f'{cls._INDENT_PREFIX}Details: `aws sts '
699
+ 'get-caller-identity` failed with error:'
700
+ f' {common_utils.format_exception(e, use_bracket=True)}.'
701
+ ) from None
702
+ except aws.botocore_exceptions().InvalidConfigError as e:
703
+ # pylint: disable=import-outside-toplevel
704
+ import awscli
705
+ from packaging import version
706
+
707
+ awscli_version = version.parse(awscli.__version__)
708
+ if awscli_version < version.parse(
709
+ '1.27.10'
710
+ ) and 'configured to use SSO' in str(e):
711
+ with ux_utils.print_exception_no_traceback():
712
+ raise exceptions.CloudUserIdentityError(
713
+ 'awscli is too old to use SSO.'
714
+ 'Run the following command to upgrade:'
715
+ f'\n{cls._INDENT_PREFIX} $ pip install awscli>=1.27.10'
716
+ f'\n{cls._INDENT_PREFIX}You may need to log into SSO again '
717
+ f'after upgrading. {cls._sso_credentials_help_str()}'
718
+ ) from None
719
+ with ux_utils.print_exception_no_traceback():
720
+ raise exceptions.CloudUserIdentityError(
721
+ f'Invalid AWS configuration.\n'
722
+ f' Reason: {common_utils.format_exception(e, use_bracket=True)}.'
723
+ ) from None
724
+ except aws.botocore_exceptions().TokenRetrievalError:
725
+ # This is raised when the access token is expired, which mainly
726
+ # happens when the user is using temporary credentials or SSO
727
+ # login.
728
+ with ux_utils.print_exception_no_traceback():
729
+ raise exceptions.CloudUserIdentityError(
730
+ 'AWS access token is expired.'
731
+ f' {cls._sso_credentials_help_str(expired=True)}'
732
+ ) from None
733
+ except Exception as e: # pylint: disable=broad-except
734
+ with ux_utils.print_exception_no_traceback():
735
+ raise exceptions.CloudUserIdentityError(
736
+ f'Failed to get AWS user.\n'
737
+ f' Reason: {common_utils.format_exception(e, use_bracket=True)}.'
738
+ ) from None
739
+ # TODO: Return a list of identities in the profile when we support
740
+ # automatic switching for AWS. Currently we only support one identity.
741
+ return [user_ids]
742
+
743
+ # IN GCS.PY
744
+ @classmethod
745
+ @annotations.lru_cache(
746
+ scope='global', maxsize=1
747
+ ) # Cache since getting identity is slow.
748
+ def get_user_identities(cls) -> List[List[str]]:
749
+ """Returns a [UserId, Account] list that uniquely identifies the user.
750
+
751
+ These fields come from `aws sts get-caller-identity` and are cached
752
+ locally by `aws configure list` output. The identities are assumed to
753
+ be stable for the duration of the `sky` process. Modifying the
754
+ credentials while the `sky` process is running will not affect the
755
+ identity returned by this function.
756
+
757
+ We permit the same actual user to:
758
+
759
+ - switch between different root accounts (after which both elements
760
+ of the list will be different) and have their clusters owned by
761
+ each account be protected; or
762
+
763
+ - within the same root account, switch between different IAM
764
+ users, and treat [user_id=1234, account=A] and
765
+ [user_id=4567, account=A] to be the *same*. Namely, switching
766
+ between these IAM roles within the same root account will cause
767
+ the first element of the returned list to differ, and will allow
768
+ the same actual user to continue to interact with their clusters.
769
+ Note: this is not 100% safe, since the IAM users can have very
770
+ specific permissions, that disallow them to access the clusters
771
+ but it is a reasonable compromise as that could be rare.
772
+
773
+ Returns:
774
+ A list of strings that uniquely identifies the user on this cloud.
775
+ For identity check, we will fallback through the list of strings
776
+ until we find a match, and print a warning if we fail for the
777
+ first string.
778
+
779
+ Raises:
780
+ exceptions.CloudUserIdentityError: if the user identity cannot be
781
+ retrieved.
782
+ """
783
+ stdout = cls._aws_configure_list()
784
+ if stdout is None:
785
+ # `aws configure list` is not available, possible reasons:
786
+ # - awscli is not installed but credentials are valid, e.g. run from
787
+ # an EC2 instance with IAM role
788
+ # - aws credentials are not set, proceed anyway to get unified error
789
+ # message for users
790
+ return cls._sts_get_caller_identity()
791
+ config_hash = hashlib.md5(stdout).hexdigest()[:8] # noqa: F841
792
+ # Getting aws identity cost ~1s, so we cache the result with the output of
793
+ # `aws configure list` as cache key. Different `aws configure list` output
794
+ # can have same aws identity, our assumption is the output would be stable
795
+ # in real world, so the number of cache files would be limited.
796
+ # TODO(aylei): consider using a more stable cache key and evalute eviction.
797
+ # TODO:(ryan) ??? Ignoring caching for now (returning early)
798
+ return cls._sts_get_caller_identity()
799
+ # cache_path = catalog_common.get_catalog_path(
800
+ # f'aws/.cache/user-identity-{config_hash}.txt')
801
+ # if os.path.exists(cache_path):
802
+ # try:
803
+ # with open(cache_path, 'r', encoding='utf-8') as f:
804
+ # return json.loads(f.read())
805
+ # except json.JSONDecodeError:
806
+ # # cache is invalid, ignore it and fetch identity again
807
+ # pass
808
+ #
809
+ # result = cls._sts_get_caller_identity()
810
+ # with open(cache_path, 'w', encoding='utf-8') as f:
811
+ # f.write(json.dumps(result))
812
+ # return result
813
+
814
+ # IN GCS.PY
815
+ @classmethod
816
+ def get_active_user_identity_str(cls) -> Optional[str]:
817
+ user_identity = cls.get_active_user_identity()
818
+ if user_identity is None:
819
+ return None
820
+ identity_str = f'{user_identity[0]} [account={user_identity[1]}]'
821
+ return identity_str
822
+
823
+ # IN GCS.PY
824
+ @classmethod
825
+ @annotations.lru_cache(scope='global', maxsize=1)
826
+ def check_credentials(cls) -> Tuple[bool, Optional[str]]:
827
+ """Checks if the user has access credentials to AWS."""
828
+
829
+ dependency_installation_hints = (
830
+ 'AWS dependencies are not installed. '
831
+ 'Run the following commands:'
832
+ f'\n{cls._INDENT_PREFIX} $ pip install boto3 botocore awscli'
833
+ f'\n{cls._INDENT_PREFIX}Credentials may also need to be set. '
834
+ f'{cls._STATIC_CREDENTIAL_HELP_STR}'
835
+ )
836
+
837
+ # Checks if the AWS CLI is installed properly
838
+ proc = subprocess.run(
839
+ 'aws --version',
840
+ shell=True,
841
+ check=False,
842
+ stdout=subprocess.PIPE,
843
+ stderr=subprocess.PIPE,
844
+ )
845
+ if proc.returncode != 0:
846
+ return False, dependency_installation_hints
847
+ try:
848
+ # Checks if aws boto is installed properly
849
+ # pylint: disable=import-outside-toplevel, unused-import
850
+ import boto3 # noqa: F401
851
+ import botocore # noqa: F401
852
+ except ImportError:
853
+ return False, dependency_installation_hints
854
+
855
+ # Checks if AWS credentials 1) exist and 2) are valid.
856
+ # https://stackoverflow.com/questions/53548737/verify-aws-credentials-with-boto3
857
+ try:
858
+ identity_str = cls.get_active_user_identity_str() # noqa: F841
859
+ except exceptions.CloudUserIdentityError as e:
860
+ return False, str(e)
861
+
862
+ static_credential_exists = os.path.isfile(
863
+ os.path.expanduser('~/.aws/credentials')
864
+ )
865
+ hints = None
866
+ identity_type = cls._current_identity_type()
867
+ single_cloud_hint = (
868
+ ' It will work if you use AWS only, but will cause problems '
869
+ 'if you want to use multiple clouds. To set up static credentials, '
870
+ 'try: aws configure'
871
+ )
872
+ if identity_type == AWSIdentityType.SSO:
873
+ hints = 'AWS SSO is set.'
874
+ if static_credential_exists:
875
+ hints += (
876
+ ' To ensure multiple clouds work correctly, please use Konduktor '
877
+ 'with static credentials (e.g., ~/.aws/credentials) by unsetting '
878
+ 'the AWS_PROFILE environment variable.'
879
+ )
880
+ else:
881
+ hints += single_cloud_hint
882
+ elif identity_type == AWSIdentityType.IAM_ROLE:
883
+ # When using an IAM role, the credentials may not exist in the
884
+ # ~/.aws/credentials file. So we don't check for the existence of the
885
+ # file. This will happen when the user is on a VM (or
886
+ # jobs-controller) created by an SSO account, i.e. the VM will be
887
+ # assigned the IAM role: skypilot-v1.
888
+ hints = f'AWS IAM role is set.{single_cloud_hint}'
889
+ elif identity_type == AWSIdentityType.CONTAINER_ROLE:
890
+ # Similar to the IAM ROLE, an ECS container may not store credentials
891
+ # in the ~/.aws/credentials file. So we don't check for the existence of
892
+ # the file. i.e. the container will be assigned the IAM role of the
893
+ # task: skypilot-v1.
894
+ hints = f'AWS container-role is set.{single_cloud_hint}'
895
+ elif identity_type == AWSIdentityType.CUSTOM_PROCESS:
896
+ # Similar to the IAM ROLE, a custom process may not store credentials
897
+ # in the ~/.aws/credentials file. So we don't check for the existence of
898
+ # the file. i.e. the custom process will be assigned the IAM role of the
899
+ # task: skypilot-v1.
900
+ hints = f'AWS custom-process is set.{single_cloud_hint}'
901
+ elif identity_type == AWSIdentityType.ASSUME_ROLE:
902
+ # When using ASSUME ROLE, the credentials are coming from a different
903
+ # source profile. So we don't check for the existence of ~/.aws/credentials.
904
+ # i.e. the assumed role will be assigned the IAM role of the
905
+ # task: skypilot-v1.
906
+ hints = f'AWS assume-role is set.{single_cloud_hint}'
907
+ elif identity_type == AWSIdentityType.ENV:
908
+ # When using ENV vars, the credentials are coming from the environment
909
+ # variables. So we don't check for the existence of ~/.aws/credentials.
910
+ # i.e. the identity is not determined by the file.
911
+ hints = f'AWS env is set.{single_cloud_hint}'
912
+ else:
913
+ # This file is required because it is required by the VMs launched on
914
+ # other clouds to access private s3 buckets and resources like EC2.
915
+ # `get_active_user_identity` does not guarantee this file exists.
916
+ if not static_credential_exists:
917
+ return (
918
+ False,
919
+ '~/.aws/credentials does not exist. '
920
+ + cls._STATIC_CREDENTIAL_HELP_STR,
921
+ )
922
+
923
+ try:
924
+ s3 = aws.client('s3')
925
+
926
+ suffix = uuid.uuid4().hex[:6]
927
+ test_bucket = f'konduktor-check-s3-{int(time.time())}-{suffix}'
928
+
929
+ try:
930
+ s3.create_bucket(Bucket=test_bucket)
931
+
932
+ time.sleep(1)
933
+
934
+ s3.get_bucket_location(Bucket=test_bucket)
935
+ s3.list_objects_v2(Bucket=test_bucket, MaxKeys=1)
936
+
937
+ # Object-related checks
938
+ s3.put_object(Bucket=test_bucket, Key='test.txt', Body='hello')
939
+ s3.get_object(Bucket=test_bucket, Key='test.txt')
940
+ s3.delete_object(Bucket=test_bucket, Key='test.txt')
941
+
942
+ finally:
943
+ # Always attempt to clean up, even if earlier checks failed
944
+ try:
945
+ s3.delete_bucket(Bucket=test_bucket)
946
+ except Exception:
947
+ raise RuntimeError(
948
+ 'AWS S3 delete bucket permission is missing. '
949
+ 'Please check your policy.\n'
950
+ )
951
+
952
+ except Exception:
953
+ return False, (
954
+ 'One or more AWS S3 permissions are missing. '
955
+ 'Please check your policy.\n'
956
+ )
957
+
958
+ logger.info(
959
+ f'AWS credentials are valid '
960
+ f'for the current identity {logging.CHECK_MARK_EMOJI}'
961
+ )
962
+ logger.info('Creating k8s secret with AWS credentials...')
963
+ set_ok, result = cls.set_secret_credentials()
964
+ if not set_ok:
965
+ logger.error(f'Failed to create k8s secret with AWS credentials: {result}')
966
+ return False, result
967
+ return True, hints
968
+
969
+ @classmethod
970
+ def _current_identity_type(cls) -> Optional[AWSIdentityType]:
971
+ stdout = cls._aws_configure_list()
972
+ if stdout is None:
973
+ return None
974
+ output = stdout.decode()
975
+
976
+ # We determine the identity type by looking at the output of
977
+ # `aws configure list`. The output looks like:
978
+ # Name Value Type Location
979
+ # ---- ----- ---- --------
980
+ # profile <not set> None None
981
+ # access_key * <not set> sso None
982
+ # secret_key * <not set> sso None
983
+ # region <not set> None None
984
+ # We try to determine the identity type by looking for the
985
+ # string "sso"/"iam-role" in the output, i.e. the "Type" column.
986
+
987
+ def _is_access_key_of_type(type_str: str) -> bool:
988
+ # The dot (.) does not match line separators.
989
+ results = re.findall(rf'access_key.*{type_str}', output)
990
+ if len(results) > 1:
991
+ raise RuntimeError(f'Unexpected `aws configure list` output:\n{output}')
992
+ return len(results) == 1
993
+
994
+ for identity_type in AWSIdentityType:
995
+ if _is_access_key_of_type(identity_type.value):
996
+ return identity_type
997
+ return AWSIdentityType.SHARED_CREDENTIALS_FILE
998
+
999
+ # IN GCS.PY
1000
+ @classmethod
1001
+ def set_secret_credentials(cls) -> Tuple[bool, Optional[str]]:
1002
+ """
1003
+ Set the k8s secret storing the AWS credentials
1004
+ """
1005
+ context = kubernetes_utils.get_current_kube_config_context_name()
1006
+ namespace = kubernetes_utils.get_kube_config_context_namespace()
1007
+
1008
+ # Check if credentials are provided via environment
1009
+ access_key = os.environ.get('AWS_ACCESS_KEY_ID')
1010
+ secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
1011
+ region = os.environ.get('AWS_DEFAULT_REGION', 'us-east-1')
1012
+
1013
+ if access_key and secret_key:
1014
+ logger.info('Using AWS credentials from env')
1015
+ credentials_dir = tempfile.mkdtemp()
1016
+ credentials_path = os.path.join(credentials_dir, 'credentials')
1017
+ config_path = os.path.join(credentials_dir, 'config')
1018
+
1019
+ with open(credentials_path, 'w') as f:
1020
+ f.write(f"""[default]
1021
+ aws_access_key_id = {access_key}
1022
+ aws_secret_access_key = {secret_key}
1023
+ """)
1024
+
1025
+ with open(config_path, 'w') as f:
1026
+ f.write(f"""[default]
1027
+ region = {region}
1028
+ """)
1029
+ else:
1030
+ logger.info('Using AWS credentils from ~/.aws/')
1031
+ credentials_dir = DEFAULT_AWS_CREDENTIALS_DIR
1032
+
1033
+ credentials_files = [
1034
+ os.path.expanduser(os.path.join(credentials_dir, f))
1035
+ for f in _CREDENTIAL_FILES
1036
+ ]
1037
+
1038
+ secret_metadata = {
1039
+ 'labels': {
1040
+ backend_constants.SECRET_KIND_LABEL: 'S3',
1041
+ },
1042
+ }
1043
+
1044
+ ok, result = kubernetes_utils.set_secret(
1045
+ secret_name=cls._AWS_SECRET_NAME,
1046
+ namespace=namespace,
1047
+ context=context,
1048
+ data={
1049
+ cls._AWS_CREDENTIALS_KEY: base64_utils.zip_base64encode(
1050
+ credentials_files
1051
+ )
1052
+ },
1053
+ secret_metadata=secret_metadata,
1054
+ )
1055
+ if not ok:
1056
+ logger.error(f'Failed to set AWS credentials in k8s secret: \n{result}')
1057
+ return False, result
1058
+ else:
1059
+ logger.info(
1060
+ f'AWS credentials set in k8s secret: {cls._AWS_SECRET_NAME} '
1061
+ f'in namespace {namespace} in context {context} '
1062
+ f'{logging.CHECK_MARK_EMOJI}'
1063
+ )
1064
+
1065
+ try:
1066
+ identity = aws.client('sts').get_caller_identity()
1067
+ logger.info(
1068
+ f"Using AWS credentials for ARN: {identity['Arn']} "
1069
+ f"(UserId: {identity['UserId']}, Account: {identity['Account']})"
1070
+ )
1071
+ except Exception as e:
1072
+ logger.warning(f'Could not fetch caller identity: {e}')
1073
+
1074
+ return True, None
1075
+
1076
+ # IN GCS.PY
1077
+ @classmethod
1078
+ def get_k8s_credential_name(cls) -> str:
1079
+ return cls._AWS_SECRET_NAME
1080
+
1081
+
1082
+ class S3CloudStorage(storage_utils.CloudStorage):
1083
+ """S3 Storage."""
1084
+
1085
+ # List of commands to install AWS CLI
1086
+ _GET_AWSCLI = [
1087
+ 'command -v aws >/dev/null 2>&1 || ('
1088
+ 'apt-get update && apt-get install -y curl unzip && '
1089
+ 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && ' # noqa: E501
1090
+ 'unzip awscliv2.zip && '
1091
+ './aws/install -i ~/aws-cli -b ~/bin && '
1092
+ 'export PATH=$HOME/bin:$PATH && '
1093
+ 'rm -rf aws awscliv2.zip'
1094
+ ') && export PATH=$HOME/bin:$PATH'
1095
+ ]
1096
+
1097
+ _STORE: typing.Type[storage_utils.AbstractStore] = S3Store
1098
+
1099
+ # IN GCS.PY
1100
+ def is_directory(self, url: str) -> bool:
1101
+ """Returns whether S3 'url' is a directory.
1102
+
1103
+ In cloud object stores, a "directory" refers to a regular object whose
1104
+ name is a prefix of other objects.
1105
+ """
1106
+ s3 = aws.resource('s3')
1107
+ bucket_name, path = data_utils.split_s3_path(url)
1108
+ bucket = s3.Bucket(bucket_name)
1109
+
1110
+ num_objects = 0
1111
+ for obj in bucket.objects.filter(Prefix=path):
1112
+ num_objects += 1
1113
+ if obj.key == path:
1114
+ return False
1115
+ # If there are more than 1 object in filter, then it is a directory
1116
+ if num_objects == 3:
1117
+ return True
1118
+
1119
+ # A directory with few or no items
1120
+ return True
1121
+
1122
+ # IN GCS.PY
1123
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
1124
+ """Downloads using AWS CLI."""
1125
+ # AWS Sync by default uses 10 threads to upload files to the bucket.
1126
+ # To increase parallelism, modify max_concurrent_requests in your
1127
+ # aws config file (Default path: ~/.aws/config).
1128
+ all_commands = list(self._GET_AWSCLI)
1129
+
1130
+ all_commands.append(f'aws s3 sync --no-follow-symlinks {source} {destination}')
1131
+ return ' && '.join(all_commands)
1132
+
1133
+ # IN GCS.PY
1134
+ def make_sync_file_command(self, source: str, destination: str) -> str:
1135
+ all_commands = list(self._GET_AWSCLI)
1136
+
1137
+ all_commands.append(f'aws s3 cp {source} {destination}')
1138
+ return ' && '.join(all_commands)