konduktor-nightly 0.1.0.dev20250409105017__py3-none-any.whl → 0.1.0.dev20250410104738__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1114 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Amazon Web Services (AWS) S3 Storage."""
14
+
15
+ import enum
16
+ import hashlib
17
+ import os
18
+ import re
19
+ import shlex
20
+ import subprocess
21
+ import tempfile
22
+ import time
23
+ import typing
24
+ import uuid
25
+ from typing import Any, Dict, List, Optional, Tuple
26
+
27
+ import boto3
28
+ import colorama
29
+
30
+ from konduktor import config, logging
31
+ from konduktor.adaptors import aws
32
+ from konduktor.data import constants, data_utils, storage_utils
33
+ from konduktor.utils import (
34
+ annotations,
35
+ base64_utils,
36
+ common_utils,
37
+ exceptions,
38
+ kubernetes_utils,
39
+ rich_utils,
40
+ ux_utils,
41
+ )
42
+
43
+ logger = logging.get_logger(__name__)
44
+
45
+ # Maximum number of concurrent rsync upload processes
46
+ _MAX_CONCURRENT_UPLOADS = 32
47
+
48
+ _CREDENTIAL_FILES = ['credentials', 'config']
49
+
50
+ AWS_SECRET_NAME = 'awscredentials'
51
+ AWS_CREDENTIALS_KEY = 'awscredentials'
52
+
53
+ DEFAULT_AWS_CREDENTIALS_DIR = '~/.aws/'
54
+ DEFAULT_AWS_CREDENTIAL_PATH = '~/.aws/credentials'
55
+ DEFAULT_AWS_CONFIG_PATH = '~/.aws/config'
56
+
57
+
58
+ class AWSIdentityType(enum.Enum):
59
+ """AWS identity type.
60
+
61
+ The account type is determined by the current user identity, based on `aws
62
+ configure list`. We will check the existence of the value in the output of
63
+ `aws configure list` to determine the account type.
64
+ """
65
+
66
+ # Name Value Type Location
67
+ # ---- ----- ---- --------
68
+ # profile 1234 env ...
69
+ # access_key ****************abcd sso
70
+ # secret_key ****************abcd sso
71
+ # region <not set> None None
72
+ SSO = 'sso'
73
+ ENV = 'env'
74
+ IAM_ROLE = 'iam-role'
75
+ CONTAINER_ROLE = 'container-role'
76
+ CUSTOM_PROCESS = 'custom-process'
77
+ ASSUME_ROLE = 'assume-role'
78
+
79
+ # Name Value Type Location
80
+ # ---- ----- ---- --------
81
+ # profile <not set> None None
82
+ # access_key ****************abcd shared-credentials-file
83
+ # secret_key ****************abcd shared-credentials-file
84
+ # region us-east-1 config-file ~/.aws/config
85
+ SHARED_CREDENTIALS_FILE = 'shared-credentials-file'
86
+
87
+ # IN GCS.PY
88
+ def can_credential_expire(self) -> bool:
89
+ """Check if the AWS identity type can expire.
90
+
91
+ SSO,IAM_ROLE and CONTAINER_ROLE are temporary credentials and refreshed
92
+ automatically. ENV and SHARED_CREDENTIALS_FILE are short-lived
93
+ credentials without refresh.
94
+ IAM ROLE:
95
+ https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
96
+ SSO/Container-role refresh token:
97
+ https://docs.aws.amazon.com/solutions/latest/dea-api/auth-refreshtoken.html
98
+ """
99
+ # TODO(hong): Add a CLI based check for the expiration of the temporary
100
+ # credentials
101
+ expirable_types = {AWSIdentityType.ENV, AWSIdentityType.SHARED_CREDENTIALS_FILE}
102
+ return self in expirable_types
103
+
104
+
105
+ class S3Store(storage_utils.AbstractStore):
106
+ """S3Store inherits from Storage Object and represents the backend
107
+ for S3 buckets.
108
+ """
109
+
110
+ # k8s secret name for aws credentials
111
+ _AWS_SECRET_NAME = f'{AWS_SECRET_NAME}-{common_utils.user_and_hostname_hash()}'
112
+ _AWS_CREDENTIALS_KEY = AWS_CREDENTIALS_KEY
113
+
114
+ _DEFAULT_REGION = 'us-east-1'
115
+ _ACCESS_DENIED_MESSAGE = 'Access Denied'
116
+ _CUSTOM_ENDPOINT_REGIONS = [
117
+ 'ap-east-1',
118
+ 'me-south-1',
119
+ 'af-south-1',
120
+ 'eu-south-1',
121
+ 'eu-south-2',
122
+ 'ap-south-2',
123
+ 'ap-southeast-3',
124
+ 'ap-southeast-4',
125
+ 'me-central-1',
126
+ 'il-central-1',
127
+ ]
128
+
129
+ _INDENT_PREFIX = ' '
130
+
131
+ _STATIC_CREDENTIAL_HELP_STR = (
132
+ 'Run the following commands:'
133
+ f'\n{_INDENT_PREFIX} $ aws configure'
134
+ f'\n{_INDENT_PREFIX} $ aws configure list '
135
+ '# Ensure that this shows identity is set.'
136
+ f'\n{_INDENT_PREFIX}For more info: '
137
+ 'https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html' # pylint: disable=line-too-long
138
+ )
139
+
140
+ _REPR = 'S3Store'
141
+
142
+ def __init__(
143
+ self,
144
+ name: str,
145
+ source: str,
146
+ region: Optional[str] = _DEFAULT_REGION,
147
+ is_sky_managed: Optional[bool] = False,
148
+ sync_on_reconstruction: Optional[bool] = True,
149
+ _bucket_sub_path: Optional[str] = None,
150
+ ):
151
+ self.client: 'boto3.client.Client'
152
+ self.bucket: 'constants.StorageHandle'
153
+ if region in self._CUSTOM_ENDPOINT_REGIONS:
154
+ logger.warning(
155
+ 'AWS opt-in regions are not supported for S3. '
156
+ f'Falling back to default region '
157
+ f'{self._DEFAULT_REGION} for bucket {name!r}.'
158
+ )
159
+ region = self._DEFAULT_REGION
160
+ super().__init__(
161
+ name,
162
+ source,
163
+ region,
164
+ is_sky_managed,
165
+ sync_on_reconstruction,
166
+ _bucket_sub_path,
167
+ )
168
+
169
+ def __repr__(self):
170
+ return self._REPR
171
+
172
+ # IN GCS.PY
173
+ def _validate(self):
174
+ if self.source is not None and isinstance(self.source, str):
175
+ if self.source.startswith('s3://'):
176
+ assert self.name == data_utils.split_s3_path(self.source)[0], (
177
+ 'S3 Bucket is specified as path, the name should be the'
178
+ ' same as S3 bucket.'
179
+ )
180
+ assert data_utils.verify_s3_bucket(self.name), (
181
+ f'Source specified as {self.source}, an S3 bucket. ',
182
+ 'S3 Bucket should exist.',
183
+ )
184
+ # if self.source.startswith('gs://'):
185
+ # assert self.name == data_utils.split_gcs_path(self.source)[0], (
186
+ # 'GCS Bucket is specified as path, the name should be '
187
+ # 'the same as GCS bucket.'
188
+ # )
189
+ # elif data_utils.is_az_container_endpoint(self.source):
190
+ # storage_account_name, container_name, _ = (
191
+ # data_utils.split_az_path(self.source))
192
+ # assert self.name == container_name, (
193
+ # 'Azure bucket is specified as path, the name should be '
194
+ # 'the same as Azure bucket.')
195
+ # assert data_utils.verify_az_bucket(
196
+ # storage_account_name, self.name), (
197
+ # f'Source specified as {self.source}, an Azure bucket. '
198
+ # 'Azure bucket should exist.')
199
+ # elif self.source.startswith('r2://'):
200
+ # assert self.name == data_utils.split_r2_path(self.source)[0], (
201
+ # 'R2 Bucket is specified as path, the name should be '
202
+ # 'the same as R2 bucket.')
203
+ # assert data_utils.verify_r2_bucket(self.name), (
204
+ # f'Source specified as {self.source}, a R2 bucket. ',
205
+ # 'R2 Bucket should exist.')
206
+ # elif self.source.startswith('cos://'):
207
+ # assert self.name == data_utils.split_cos_path(self.source)[0], (
208
+ # 'COS Bucket is specified as path, the name should be '
209
+ # 'the same as COS bucket.')
210
+ # assert data_utils.verify_ibm_cos_bucket(self.name), (
211
+ # f'Source specified as {self.source}, a COS bucket. ',
212
+ # 'COS Bucket should exist.')
213
+ # Validate name
214
+ self.name = self.validate_name(self.name)
215
+
216
+ # IN GCS.PY
217
+ @classmethod
218
+ def validate_name(cls, name: str) -> str:
219
+ """Validates the name of the S3 store.
220
+
221
+ Source for rules:
222
+ https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
223
+ """
224
+
225
+ def _raise_no_traceback_name_error(err_str):
226
+ with ux_utils.print_exception_no_traceback():
227
+ raise exceptions.StorageNameError(err_str)
228
+
229
+ if name is not None and isinstance(name, str):
230
+ # Check for overall length
231
+ if not 3 <= len(name) <= 63:
232
+ _raise_no_traceback_name_error(
233
+ f'Invalid store name: name {name} must be between 3 (min) '
234
+ 'and 63 (max) characters long.'
235
+ )
236
+
237
+ # Check for valid characters and start/end with a number or letter
238
+ pattern = r'^[a-z0-9][-a-z0-9._]*[a-z0-9]$'
239
+ if not re.match(pattern, name):
240
+ _raise_no_traceback_name_error(
241
+ f'Invalid store name: name {name} can consist only of '
242
+ 'lowercase letters, numbers, dots (.), and hyphens (-). '
243
+ 'It must begin and end with a letter or number.'
244
+ )
245
+
246
+ # Check for two adjacent periods
247
+ if '..' in name:
248
+ _raise_no_traceback_name_error(
249
+ f'Invalid store name: name {name} must not contain '
250
+ 'two adjacent periods.'
251
+ )
252
+
253
+ # Check for IP address format
254
+ ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
255
+ if re.match(ip_pattern, name):
256
+ _raise_no_traceback_name_error(
257
+ f'Invalid store name: name {name} must not be formatted as '
258
+ 'an IP address (for example, 192.168.5.4).'
259
+ )
260
+
261
+ # Check for 'xn--' prefix
262
+ if name.startswith('xn--'):
263
+ _raise_no_traceback_name_error(
264
+ f'Invalid store name: name {name} must not start with the '
265
+ 'prefix "xn--".'
266
+ )
267
+
268
+ # Check for '-s3alias' suffix
269
+ if name.endswith('-s3alias'):
270
+ _raise_no_traceback_name_error(
271
+ f'Invalid store name: name {name} must not end with the '
272
+ 'suffix "-s3alias".'
273
+ )
274
+
275
+ # Check for '--ol-s3' suffix
276
+ if name.endswith('--ol-s3'):
277
+ _raise_no_traceback_name_error(
278
+ f'Invalid store name: name {name} must not end with the '
279
+ 'suffix "--ol-s3".'
280
+ )
281
+ else:
282
+ _raise_no_traceback_name_error('Store name must be specified.')
283
+ return name
284
+
285
+ # IN GCS.PY
286
+ def initialize(self):
287
+ """Initializes the S3 store object on the cloud.
288
+
289
+ Initialization involves fetching bucket if exists, or creating it if
290
+ it does not.
291
+
292
+ Raises:
293
+ StorageBucketCreateError: If bucket creation fails
294
+ StorageBucketGetError: If fetching existing bucket fails
295
+ StorageInitError: If general initialization fails.
296
+ """
297
+ self.client = data_utils.create_s3_client(self.region)
298
+ self.bucket, is_new_bucket = self._get_bucket()
299
+ if self.is_sky_managed is None:
300
+ # If is_sky_managed is not specified, then this is a new storage
301
+ # object (i.e., did not exist in global_user_state) and we should
302
+ # set the is_sky_managed property.
303
+ # If is_sky_managed is specified, then we take no action.
304
+ self.is_sky_managed = is_new_bucket
305
+
306
+ # IN GCS.PY
307
+ def upload(self):
308
+ """Uploads source to store bucket.
309
+
310
+ Upload must be called by the Storage handler - it is not called on
311
+ Store initialization.
312
+
313
+ Raises:
314
+ StorageUploadError: if upload fails.
315
+ """
316
+ try:
317
+ if isinstance(self.source, list):
318
+ self.batch_aws_rsync(self.source, create_dirs=True)
319
+ elif self.source is not None:
320
+ if self.source.startswith('s3://'):
321
+ pass
322
+ # elif self.source.startswith('gs://'):
323
+ # self._transfer_to_s3()
324
+ # elif self.source.startswith('r2://'):
325
+ # self._transfer_to_s3()
326
+ else:
327
+ self.batch_aws_rsync([self.source])
328
+ except exceptions.StorageUploadError:
329
+ raise
330
+ except Exception as e:
331
+ raise exceptions.StorageUploadError(
332
+ f'Upload failed for store {self.name}'
333
+ ) from e
334
+
335
+ # IN GCS.PY
336
+ def delete(self) -> None:
337
+ deleted_by_skypilot = self._delete_s3_bucket(self.name)
338
+ if deleted_by_skypilot:
339
+ msg_str = f'Deleted S3 bucket {self.name}.'
340
+ else:
341
+ msg_str = (
342
+ f'S3 bucket {self.name} may have been deleted '
343
+ f'externally. Removing from local state.'
344
+ )
345
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}' f'{colorama.Style.RESET_ALL}')
346
+
347
+ # IN GCS.PY
348
+ def get_handle(self) -> 'constants.StorageHandle':
349
+ return aws.resource('s3').Bucket(self.name)
350
+
351
+ # FROM data/storage.py but matches GCS.PY batch_gsutil_rsync() (s3 specific)
352
+ def batch_aws_rsync(
353
+ self, source_path_list: List['constants.Path'], create_dirs: bool = False
354
+ ) -> None:
355
+ """Invokes aws s3 sync to batch upload a list of local paths to S3
356
+
357
+ AWS Sync by default uses 10 threads to upload files to the bucket. To
358
+ increase parallelism, modify max_concurrent_requests in your aws config
359
+ file (Default path: ~/.aws/config).
360
+
361
+ Since aws s3 sync does not support batch operations, we construct
362
+ multiple commands to be run in parallel.
363
+
364
+ Args:
365
+ source_path_list: List of paths to local files or directories
366
+ create_dirs: If the local_path is a directory and this is set to
367
+ False, the contents of the directory are directly uploaded to
368
+ root of the bucket. If the local_path is a directory and this is
369
+ set to True, the directory is created in the bucket root and
370
+ contents are uploaded to it.
371
+ """
372
+ sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
373
+
374
+ def get_file_sync_command(base_dir_path, file_names):
375
+ includes = ' '.join(
376
+ [f'--include {shlex.quote(file_name)}' for file_name in file_names]
377
+ )
378
+ base_dir_path = shlex.quote(base_dir_path)
379
+ sync_command = (
380
+ 'aws s3 sync --no-follow-symlinks --exclude="*" '
381
+ f'{includes} {base_dir_path} '
382
+ f's3://{self.name}{sub_path}'
383
+ )
384
+ return sync_command
385
+
386
+ def get_dir_sync_command(src_dir_path, dest_dir_name):
387
+ # we exclude .git directory from the sync
388
+ excluded_list = storage_utils.get_excluded_files(src_dir_path)
389
+ excluded_list.append('.git/*')
390
+ excludes = ' '.join(
391
+ [f'--exclude {shlex.quote(file_name)}' for file_name in excluded_list]
392
+ )
393
+ src_dir_path = shlex.quote(src_dir_path)
394
+ sync_command = (
395
+ f'aws s3 sync --no-follow-symlinks {excludes} '
396
+ f'{src_dir_path} '
397
+ f's3://{self.name}{sub_path}/{dest_dir_name}'
398
+ )
399
+ return sync_command
400
+
401
+ # Generate message for upload
402
+ if len(source_path_list) > 1:
403
+ source_message = f'{len(source_path_list)} paths'
404
+ else:
405
+ source_message = source_path_list[0]
406
+
407
+ log_path = logging.generate_tmp_logging_file_path(
408
+ constants._STORAGE_LOG_FILE_NAME
409
+ )
410
+ sync_path = f'{source_message} -> s3://{self.name}{sub_path}/'
411
+ with rich_utils.safe_status(
412
+ ux_utils.spinner_message(f'Syncing {sync_path}', log_path=log_path)
413
+ ):
414
+ data_utils.parallel_upload(
415
+ source_path_list,
416
+ get_file_sync_command,
417
+ get_dir_sync_command,
418
+ log_path,
419
+ self.name,
420
+ self._ACCESS_DENIED_MESSAGE,
421
+ create_dirs=create_dirs,
422
+ max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS,
423
+ )
424
+ logger.info(
425
+ ux_utils.finishing_message(f'Storage synced: {sync_path}', log_path)
426
+ )
427
+
428
+ # IN GCS.PY
429
+ def _get_bucket(self) -> Tuple['constants.StorageHandle', bool]:
430
+ """Obtains the S3 bucket.
431
+
432
+ If the bucket exists, this method will return the bucket.
433
+ If the bucket does not exist, there are three cases:
434
+ 1) Raise an error if the bucket source starts with s3://
435
+ 2) Return None if bucket has been externally deleted and
436
+ sync_on_reconstruction is False
437
+ 3) Create and return a new bucket otherwise
438
+
439
+ Raises:
440
+ StorageSpecError: If externally created bucket is attempted to be
441
+ mounted without specifying storage source.
442
+ StorageBucketCreateError: If creating the bucket fails
443
+ StorageBucketGetError: If fetching a bucket fails
444
+ StorageExternalDeletionError: If externally deleted storage is
445
+ attempted to be fetched while reconstructing the storage for
446
+ 'sky storage delete' or 'sky start'
447
+ """
448
+ s3 = aws.resource('s3')
449
+ bucket = s3.Bucket(self.name)
450
+
451
+ try:
452
+ # Try Public bucket case.
453
+ # This line does not error out if the bucket is an external public
454
+ # bucket or if it is a user's bucket that is publicly
455
+ # accessible.
456
+ self.client.head_bucket(Bucket=self.name)
457
+ self._validate_existing_bucket()
458
+ return bucket, False
459
+ except aws.botocore_exceptions().ClientError as e:
460
+ error_code = e.response['Error']['Code']
461
+ # AccessDenied error for buckets that are private and not owned by
462
+ # user.
463
+ if error_code == '403':
464
+ command = f'aws s3 ls {self.name}'
465
+ with ux_utils.print_exception_no_traceback():
466
+ raise exceptions.StorageBucketGetError(
467
+ f'Bucket {self.name} does not exist.'
468
+ + f' To debug, consider running `{command}`.'
469
+ ) from e
470
+
471
+ if isinstance(self.source, str) and self.source.startswith('s3://'):
472
+ with ux_utils.print_exception_no_traceback():
473
+ raise exceptions.StorageBucketGetError(
474
+ 'Attempted to use a non-existent bucket as a source: '
475
+ f'{self.source}. Consider using `aws s3 ls '
476
+ f'{self.source}` to debug.'
477
+ )
478
+
479
+ # If bucket cannot be found in both private and public settings,
480
+ # the bucket is to be created by Sky. However, creation is skipped if
481
+ # Store object is being reconstructed for deletion or re-mount with
482
+ # sky start, and error is raised instead.
483
+ if self.sync_on_reconstruction:
484
+ bucket = self._create_s3_bucket(self.name, self.region)
485
+ return bucket, True
486
+ else:
487
+ # Raised when Storage object is reconstructed for sky storage
488
+ # delete or to re-mount Storages with sky start but the storage
489
+ # is already removed externally.
490
+ raise exceptions.StorageExternalDeletionError(
491
+ 'Attempted to fetch a non-existent bucket: ' f'{self.name}'
492
+ )
493
+
494
+ # IN GCS.PY
495
+ def _download_file(self, remote_path: str, local_path: str) -> None:
496
+ """Downloads file from remote to local on s3 bucket
497
+ using the boto3 API
498
+
499
+ Args:
500
+ remote_path: str; Remote path on S3 bucket
501
+ local_path: str; Local path on user's device
502
+ """
503
+ self.bucket.download_file(remote_path, local_path)
504
+
505
+ # IN GCS.PY
506
+ def _create_s3_bucket(
507
+ self, bucket_name: str, region=_DEFAULT_REGION
508
+ ) -> 'constants.StorageHandle':
509
+ """Creates S3 bucket with specific name in specific region
510
+
511
+ Args:
512
+ bucket_name: str; Name of bucket
513
+ region: str; Region name, e.g. us-west-1, us-east-2
514
+ Raises:
515
+ StorageBucketCreateError: If bucket creation fails.
516
+ """
517
+ s3_client = self.client
518
+ try:
519
+ create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
520
+ # If default us-east-1 region of create_bucket API is used,
521
+ # the LocationConstraint must not be specified.
522
+ # Reference: https://stackoverflow.com/a/51912090
523
+ if region is not None and region != 'us-east-1':
524
+ create_bucket_config['CreateBucketConfiguration'] = {
525
+ 'LocationConstraint': region
526
+ }
527
+ s3_client.create_bucket(**create_bucket_config)
528
+ logger.info(
529
+ f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
530
+ f'{region or "us-east-1"}{colorama.Style.RESET_ALL}'
531
+ )
532
+
533
+ # Add AWS tags configured in config.yaml to the bucket.
534
+ # This is useful for cost tracking and external cleanup.
535
+ bucket_tags = config.get_nested(('aws', 'labels'), {})
536
+ if bucket_tags:
537
+ s3_client.put_bucket_tagging(
538
+ Bucket=bucket_name,
539
+ Tagging={
540
+ 'TagSet': [
541
+ {'Key': k, 'Value': v} for k, v in bucket_tags.items()
542
+ ]
543
+ },
544
+ )
545
+
546
+ except aws.botocore_exceptions().ClientError as e:
547
+ with ux_utils.print_exception_no_traceback():
548
+ raise exceptions.StorageBucketCreateError(
549
+ f'Attempted to create a bucket {self.name} but failed.'
550
+ ) from e
551
+ return aws.resource('s3').Bucket(bucket_name)
552
+
553
+ # NOT IN GCS.PY but FROM data/storage.py (s3 specific)
554
+ def _execute_s3_remove_command(
555
+ self, command: str, bucket_name: str, hint_operating: str, hint_failed: str
556
+ ) -> bool:
557
+ try:
558
+ with rich_utils.safe_status(ux_utils.spinner_message(hint_operating)):
559
+ subprocess.check_output(command.split(' '), stderr=subprocess.STDOUT)
560
+ except subprocess.CalledProcessError as e:
561
+ if 'NoSuchBucket' in e.output.decode('utf-8'):
562
+ logger.debug(f'Bucket {bucket_name} does not exist.')
563
+ return False
564
+ else:
565
+ with ux_utils.print_exception_no_traceback():
566
+ raise exceptions.StorageBucketDeleteError(
567
+ f'{hint_failed}' f'Detailed error: {e.output}'
568
+ )
569
+ return True
570
+
571
+ # IN GCS.PY
572
+ def _delete_s3_bucket(self, bucket_name: str) -> bool:
573
+ """Deletes S3 bucket, including all objects in bucket
574
+
575
+ Args:
576
+ bucket_name: str; Name of bucket
577
+
578
+ Returns:
579
+ bool; True if bucket was deleted, False if it was deleted externally.
580
+
581
+ Raises:
582
+ StorageBucketDeleteError: If deleting the bucket fails.
583
+ """
584
+ # Deleting objects is very slow programatically
585
+ # (i.e. bucket.objects.all().delete() is slow).
586
+ # In addition, standard delete operations (i.e. via `aws s3 rm`)
587
+ # are slow, since AWS puts deletion markers.
588
+ # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
589
+ # The fastest way to delete is to run `aws s3 rb --force`,
590
+ # which removes the bucket by force.
591
+ remove_command = f'aws s3 rb s3://{bucket_name} --force'
592
+ success = self._execute_s3_remove_command(
593
+ remove_command,
594
+ bucket_name,
595
+ f'Deleting S3 bucket [green]{bucket_name}[/]',
596
+ f'Failed to delete S3 bucket {bucket_name}.',
597
+ )
598
+ if not success:
599
+ return False
600
+
601
+ # Wait until bucket deletion propagates on AWS servers
602
+ while data_utils.verify_s3_bucket(bucket_name):
603
+ time.sleep(0.1)
604
+ return True
605
+
606
+ # NOT IN GCS.PY but FROM data/storage.py (s3 specific)
607
+ def _delete_s3_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
608
+ """Deletes the sub path from the bucket."""
609
+ remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
610
+ return self._execute_s3_remove_command(
611
+ remove_command,
612
+ bucket_name,
613
+ f'Removing objects from S3 bucket ' f'[green]{bucket_name}/{sub_path}[/]',
614
+ f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.',
615
+ )
616
+
617
+ @classmethod
618
+ @annotations.lru_cache(scope='global', maxsize=1)
619
+ def _aws_configure_list(cls) -> Optional[bytes]:
620
+ proc = subprocess.run(
621
+ 'aws configure list',
622
+ shell=True,
623
+ check=False,
624
+ stdout=subprocess.PIPE,
625
+ stderr=subprocess.PIPE,
626
+ )
627
+ if proc.returncode != 0:
628
+ return None
629
+ return proc.stdout
630
+
631
+ @classmethod
632
+ def _sso_credentials_help_str(cls, expired: bool = False) -> str:
633
+ help_str = 'Run the following commands (must use AWS CLI v2):'
634
+ if not expired:
635
+ help_str += f'\n{cls._INDENT_PREFIX} $ aws configure sso'
636
+ help_str += (
637
+ f'\n{cls._INDENT_PREFIX} $ aws sso login --profile <profile_name>'
638
+ f'\n{cls._INDENT_PREFIX}For more info: '
639
+ 'https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-sso.html'
640
+ )
641
+ return help_str
642
+
643
+ @classmethod
644
+ @annotations.lru_cache(
645
+ scope='global', maxsize=1
646
+ ) # Cache since getting identity is slow.
647
+ def _sts_get_caller_identity(cls) -> Optional[List[List[str]]]:
648
+ try:
649
+ sts = aws.client('sts', check_credentials=False)
650
+ # The caller identity contains 3 fields: UserId, Account, Arn.
651
+ # 1. 'UserId' is unique across all AWS entity, which looks like
652
+ # "AROADBQP57FF2AEXAMPLE:role-session-name"
653
+ # 2. 'Account' can be shared by multiple users under the same
654
+ # organization
655
+ # 3. 'Arn' is the full path to the user, which can be reused when
656
+ # the user is deleted and recreated.
657
+ # Refer to: <https://docs.aws.amazon.com/cli/latest/reference/sts/get-caller-identity.html>
658
+ # and <https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_variables.html#principaltable>
659
+ user_info = sts.get_caller_identity()
660
+ # Allow fallback to AccountId if UserId does not match, because:
661
+ # 1. In the case where multiple IAM users belong a single root account,
662
+ # those users normally share the visibility of the VMs, so we do not
663
+ # need to identity them with each other. (There can be some cases,
664
+ # when an IAM user is given a limited permission by the admin, we may
665
+ # ignore that case for now, or print out a warning if the underlying
666
+ # userid changed for a cluster).
667
+ # 2. In the case where the multiple users belong to an organization,
668
+ # those users will have different account id, so fallback works.
669
+ user_ids = [user_info['UserId'], user_info['Account']]
670
+ except aws.botocore_exceptions().NoCredentialsError as e:
671
+ with ux_utils.print_exception_no_traceback():
672
+ raise exceptions.CloudUserIdentityError(
673
+ 'AWS credentials are not set. '
674
+ f'{cls._STATIC_CREDENTIAL_HELP_STR}\n'
675
+ f'{cls._INDENT_PREFIX}Details: `aws sts '
676
+ 'get-caller-identity` failed with error:'
677
+ f' {common_utils.format_exception(e, use_bracket=True)}.'
678
+ ) from None
679
+ except aws.botocore_exceptions().ClientError as e:
680
+ with ux_utils.print_exception_no_traceback():
681
+ raise exceptions.CloudUserIdentityError(
682
+ 'Failed to access AWS services with credentials. '
683
+ 'Make sure that the access and secret keys are correct.'
684
+ f' {cls._STATIC_CREDENTIAL_HELP_STR}\n'
685
+ f'{cls._INDENT_PREFIX}Details: `aws sts '
686
+ 'get-caller-identity` failed with error:'
687
+ f' {common_utils.format_exception(e, use_bracket=True)}.'
688
+ ) from None
689
+ except aws.botocore_exceptions().InvalidConfigError as e:
690
+ # pylint: disable=import-outside-toplevel
691
+ import awscli
692
+ from packaging import version
693
+
694
+ awscli_version = version.parse(awscli.__version__)
695
+ if awscli_version < version.parse(
696
+ '1.27.10'
697
+ ) and 'configured to use SSO' in str(e):
698
+ with ux_utils.print_exception_no_traceback():
699
+ raise exceptions.CloudUserIdentityError(
700
+ 'awscli is too old to use SSO.'
701
+ 'Run the following command to upgrade:'
702
+ f'\n{cls._INDENT_PREFIX} $ pip install awscli>=1.27.10'
703
+ f'\n{cls._INDENT_PREFIX}You may need to log into SSO again '
704
+ f'after upgrading. {cls._sso_credentials_help_str()}'
705
+ ) from None
706
+ with ux_utils.print_exception_no_traceback():
707
+ raise exceptions.CloudUserIdentityError(
708
+ f'Invalid AWS configuration.\n'
709
+ f' Reason: {common_utils.format_exception(e, use_bracket=True)}.'
710
+ ) from None
711
+ except aws.botocore_exceptions().TokenRetrievalError:
712
+ # This is raised when the access token is expired, which mainly
713
+ # happens when the user is using temporary credentials or SSO
714
+ # login.
715
+ with ux_utils.print_exception_no_traceback():
716
+ raise exceptions.CloudUserIdentityError(
717
+ 'AWS access token is expired.'
718
+ f' {cls._sso_credentials_help_str(expired=True)}'
719
+ ) from None
720
+ except Exception as e: # pylint: disable=broad-except
721
+ with ux_utils.print_exception_no_traceback():
722
+ raise exceptions.CloudUserIdentityError(
723
+ f'Failed to get AWS user.\n'
724
+ f' Reason: {common_utils.format_exception(e, use_bracket=True)}.'
725
+ ) from None
726
+ # TODO: Return a list of identities in the profile when we support
727
+ # automatic switching for AWS. Currently we only support one identity.
728
+ return [user_ids]
729
+
730
+ # IN GCS.PY
731
+ @classmethod
732
+ @annotations.lru_cache(
733
+ scope='global', maxsize=1
734
+ ) # Cache since getting identity is slow.
735
+ def get_user_identities(cls) -> List[List[str]]:
736
+ """Returns a [UserId, Account] list that uniquely identifies the user.
737
+
738
+ These fields come from `aws sts get-caller-identity` and are cached
739
+ locally by `aws configure list` output. The identities are assumed to
740
+ be stable for the duration of the `sky` process. Modifying the
741
+ credentials while the `sky` process is running will not affect the
742
+ identity returned by this function.
743
+
744
+ We permit the same actual user to:
745
+
746
+ - switch between different root accounts (after which both elements
747
+ of the list will be different) and have their clusters owned by
748
+ each account be protected; or
749
+
750
+ - within the same root account, switch between different IAM
751
+ users, and treat [user_id=1234, account=A] and
752
+ [user_id=4567, account=A] to be the *same*. Namely, switching
753
+ between these IAM roles within the same root account will cause
754
+ the first element of the returned list to differ, and will allow
755
+ the same actual user to continue to interact with their clusters.
756
+ Note: this is not 100% safe, since the IAM users can have very
757
+ specific permissions, that disallow them to access the clusters
758
+ but it is a reasonable compromise as that could be rare.
759
+
760
+ Returns:
761
+ A list of strings that uniquely identifies the user on this cloud.
762
+ For identity check, we will fallback through the list of strings
763
+ until we find a match, and print a warning if we fail for the
764
+ first string.
765
+
766
+ Raises:
767
+ exceptions.CloudUserIdentityError: if the user identity cannot be
768
+ retrieved.
769
+ """
770
+ stdout = cls._aws_configure_list()
771
+ if stdout is None:
772
+ # `aws configure list` is not available, possible reasons:
773
+ # - awscli is not installed but credentials are valid, e.g. run from
774
+ # an EC2 instance with IAM role
775
+ # - aws credentials are not set, proceed anyway to get unified error
776
+ # message for users
777
+ return cls._sts_get_caller_identity()
778
+ config_hash = hashlib.md5(stdout).hexdigest()[:8] # noqa: F841
779
+ # Getting aws identity cost ~1s, so we cache the result with the output of
780
+ # `aws configure list` as cache key. Different `aws configure list` output
781
+ # can have same aws identity, our assumption is the output would be stable
782
+ # in real world, so the number of cache files would be limited.
783
+ # TODO(aylei): consider using a more stable cache key and evalute eviction.
784
+ # TODO:(ryan) ??? Ignoring caching for now (returning early)
785
+ return cls._sts_get_caller_identity()
786
+ # cache_path = catalog_common.get_catalog_path(
787
+ # f'aws/.cache/user-identity-{config_hash}.txt')
788
+ # if os.path.exists(cache_path):
789
+ # try:
790
+ # with open(cache_path, 'r', encoding='utf-8') as f:
791
+ # return json.loads(f.read())
792
+ # except json.JSONDecodeError:
793
+ # # cache is invalid, ignore it and fetch identity again
794
+ # pass
795
+ #
796
+ # result = cls._sts_get_caller_identity()
797
+ # with open(cache_path, 'w', encoding='utf-8') as f:
798
+ # f.write(json.dumps(result))
799
+ # return result
800
+
801
+ # IN GCS.PY
802
+ @classmethod
803
+ def get_active_user_identity_str(cls) -> Optional[str]:
804
+ user_identity = cls.get_active_user_identity()
805
+ if user_identity is None:
806
+ return None
807
+ identity_str = f'{user_identity[0]} [account={user_identity[1]}]'
808
+ return identity_str
809
+
810
+ # IN GCS.PY
811
+ @classmethod
812
+ @annotations.lru_cache(scope='global', maxsize=1)
813
+ def check_credentials(cls) -> Tuple[bool, Optional[str]]:
814
+ """Checks if the user has access credentials to AWS."""
815
+
816
+ dependency_installation_hints = (
817
+ 'AWS dependencies are not installed. '
818
+ 'Run the following commands:'
819
+ f'\n{cls._INDENT_PREFIX} $ pip install boto3 botocore awscli'
820
+ f'\n{cls._INDENT_PREFIX}Credentials may also need to be set. '
821
+ f'{cls._STATIC_CREDENTIAL_HELP_STR}'
822
+ )
823
+
824
+ # Checks if the AWS CLI is installed properly
825
+ proc = subprocess.run(
826
+ 'aws --version',
827
+ shell=True,
828
+ check=False,
829
+ stdout=subprocess.PIPE,
830
+ stderr=subprocess.PIPE,
831
+ )
832
+ if proc.returncode != 0:
833
+ return False, dependency_installation_hints
834
+ try:
835
+ # Checks if aws boto is installed properly
836
+ # pylint: disable=import-outside-toplevel, unused-import
837
+ import boto3 # noqa: F401
838
+ import botocore # noqa: F401
839
+ except ImportError:
840
+ return False, dependency_installation_hints
841
+
842
+ # Checks if AWS credentials 1) exist and 2) are valid.
843
+ # https://stackoverflow.com/questions/53548737/verify-aws-credentials-with-boto3
844
+ try:
845
+ identity_str = cls.get_active_user_identity_str() # noqa: F841
846
+ except exceptions.CloudUserIdentityError as e:
847
+ return False, str(e)
848
+
849
+ static_credential_exists = os.path.isfile(
850
+ os.path.expanduser('~/.aws/credentials')
851
+ )
852
+ hints = None
853
+ identity_type = cls._current_identity_type()
854
+ single_cloud_hint = (
855
+ ' It will work if you use AWS only, but will cause problems '
856
+ 'if you want to use multiple clouds. To set up static credentials, '
857
+ 'try: aws configure'
858
+ )
859
+ if identity_type == AWSIdentityType.SSO:
860
+ hints = 'AWS SSO is set.'
861
+ if static_credential_exists:
862
+ hints += (
863
+ ' To ensure multiple clouds work correctly, please use SkyPilot '
864
+ 'with static credentials (e.g., ~/.aws/credentials) by unsetting '
865
+ 'the AWS_PROFILE environment variable.'
866
+ )
867
+ else:
868
+ hints += single_cloud_hint
869
+ elif identity_type == AWSIdentityType.IAM_ROLE:
870
+ # When using an IAM role, the credentials may not exist in the
871
+ # ~/.aws/credentials file. So we don't check for the existence of the
872
+ # file. This will happen when the user is on a VM (or
873
+ # jobs-controller) created by an SSO account, i.e. the VM will be
874
+ # assigned the IAM role: skypilot-v1.
875
+ hints = f'AWS IAM role is set.{single_cloud_hint}'
876
+ elif identity_type == AWSIdentityType.CONTAINER_ROLE:
877
+ # Similar to the IAM ROLE, an ECS container may not store credentials
878
+ # in the ~/.aws/credentials file. So we don't check for the existence of
879
+ # the file. i.e. the container will be assigned the IAM role of the
880
+ # task: skypilot-v1.
881
+ hints = f'AWS container-role is set.{single_cloud_hint}'
882
+ elif identity_type == AWSIdentityType.CUSTOM_PROCESS:
883
+ # Similar to the IAM ROLE, a custom process may not store credentials
884
+ # in the ~/.aws/credentials file. So we don't check for the existence of
885
+ # the file. i.e. the custom process will be assigned the IAM role of the
886
+ # task: skypilot-v1.
887
+ hints = f'AWS custom-process is set.{single_cloud_hint}'
888
+ elif identity_type == AWSIdentityType.ASSUME_ROLE:
889
+ # When using ASSUME ROLE, the credentials are coming from a different
890
+ # source profile. So we don't check for the existence of ~/.aws/credentials.
891
+ # i.e. the assumed role will be assigned the IAM role of the
892
+ # task: skypilot-v1.
893
+ hints = f'AWS assume-role is set.{single_cloud_hint}'
894
+ elif identity_type == AWSIdentityType.ENV:
895
+ # When using ENV vars, the credentials are coming from the environment
896
+ # variables. So we don't check for the existence of ~/.aws/credentials.
897
+ # i.e. the identity is not determined by the file.
898
+ hints = f'AWS env is set.{single_cloud_hint}'
899
+ else:
900
+ # This file is required because it is required by the VMs launched on
901
+ # other clouds to access private s3 buckets and resources like EC2.
902
+ # `get_active_user_identity` does not guarantee this file exists.
903
+ if not static_credential_exists:
904
+ return (
905
+ False,
906
+ '~/.aws/credentials does not exist. '
907
+ + cls._STATIC_CREDENTIAL_HELP_STR,
908
+ )
909
+
910
+ try:
911
+ s3 = boto3.client('s3')
912
+
913
+ suffix = uuid.uuid4().hex[:6]
914
+ test_bucket = f'konduktor-check-s3-{int(time.time())}-{suffix}'
915
+
916
+ try:
917
+ s3.create_bucket(Bucket=test_bucket)
918
+
919
+ time.sleep(1)
920
+
921
+ s3.get_bucket_location(Bucket=test_bucket)
922
+ s3.list_objects_v2(Bucket=test_bucket, MaxKeys=1)
923
+
924
+ # Object-related checks
925
+ s3.put_object(Bucket=test_bucket, Key='test.txt', Body='hello')
926
+ s3.get_object(Bucket=test_bucket, Key='test.txt')
927
+ s3.delete_object(Bucket=test_bucket, Key='test.txt')
928
+
929
+ finally:
930
+ # Always attempt to clean up, even if earlier checks failed
931
+ try:
932
+ s3.delete_bucket(Bucket=test_bucket)
933
+ except Exception:
934
+ raise RuntimeError(
935
+ 'AWS S3 delete bucket permission is missing. '
936
+ 'Please check your policy.\n'
937
+ )
938
+
939
+ except Exception:
940
+ return False, (
941
+ 'One or more AWS S3 permissions are missing. '
942
+ 'Please check your policy.\n'
943
+ )
944
+
945
+ logger.info(
946
+ f'AWS credentials are valid '
947
+ f'for the current identity {logging.CHECK_MARK_EMOJI}'
948
+ )
949
+ logger.info('Creating k8s secret with AWS credentials...')
950
+ set_ok, result = cls.set_secret_credentials()
951
+ if not set_ok:
952
+ logger.error(f'Failed to create k8s secret with AWS credentials: {result}')
953
+ return False, result
954
+ return True, hints
955
+
956
+ @classmethod
957
+ def _current_identity_type(cls) -> Optional[AWSIdentityType]:
958
+ stdout = cls._aws_configure_list()
959
+ if stdout is None:
960
+ return None
961
+ output = stdout.decode()
962
+
963
+ # We determine the identity type by looking at the output of
964
+ # `aws configure list`. The output looks like:
965
+ # Name Value Type Location
966
+ # ---- ----- ---- --------
967
+ # profile <not set> None None
968
+ # access_key * <not set> sso None
969
+ # secret_key * <not set> sso None
970
+ # region <not set> None None
971
+ # We try to determine the identity type by looking for the
972
+ # string "sso"/"iam-role" in the output, i.e. the "Type" column.
973
+
974
+ def _is_access_key_of_type(type_str: str) -> bool:
975
+ # The dot (.) does not match line separators.
976
+ results = re.findall(rf'access_key.*{type_str}', output)
977
+ if len(results) > 1:
978
+ raise RuntimeError(f'Unexpected `aws configure list` output:\n{output}')
979
+ return len(results) == 1
980
+
981
+ for identity_type in AWSIdentityType:
982
+ if _is_access_key_of_type(identity_type.value):
983
+ return identity_type
984
+ return AWSIdentityType.SHARED_CREDENTIALS_FILE
985
+
986
+ # IN GCS.PY
987
+ @classmethod
988
+ def set_secret_credentials(cls) -> Tuple[bool, Optional[str]]:
989
+ """
990
+ Set the k8s secret storing the AWS credentials
991
+ """
992
+ context = kubernetes_utils.get_current_kube_config_context_name()
993
+ namespace = kubernetes_utils.get_kube_config_context_namespace()
994
+
995
+ # Check if credentials are provided via environment
996
+ access_key = os.environ.get('AWS_ACCESS_KEY_ID')
997
+ secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
998
+ region = os.environ.get('AWS_DEFAULT_REGION', 'us-east-1')
999
+
1000
+ if access_key and secret_key:
1001
+ logger.info('Using AWS credentials from env')
1002
+ credentials_dir = tempfile.mkdtemp()
1003
+ credentials_path = os.path.join(credentials_dir, 'credentials')
1004
+ config_path = os.path.join(credentials_dir, 'config')
1005
+
1006
+ with open(credentials_path, 'w') as f:
1007
+ f.write(f"""[default]
1008
+ aws_access_key_id = {access_key}
1009
+ aws_secret_access_key = {secret_key}
1010
+ """)
1011
+
1012
+ with open(config_path, 'w') as f:
1013
+ f.write(f"""[default]
1014
+ region = {region}
1015
+ """)
1016
+ else:
1017
+ logger.info('Using AWS credentils from ~/.aws/')
1018
+ credentials_dir = DEFAULT_AWS_CREDENTIALS_DIR
1019
+
1020
+ credentials_files = [
1021
+ os.path.expanduser(os.path.join(credentials_dir, f))
1022
+ for f in _CREDENTIAL_FILES
1023
+ ]
1024
+ ok, result = kubernetes_utils.set_secret(
1025
+ secret_name=cls._AWS_SECRET_NAME,
1026
+ namespace=namespace,
1027
+ context=context,
1028
+ secret_key=cls._AWS_CREDENTIALS_KEY,
1029
+ secret_value=base64_utils.zip_base64encode(credentials_files),
1030
+ )
1031
+ if not ok:
1032
+ logger.error(f'Failed to set AWS credentials in k8s secret: \n{result}')
1033
+ return False, result
1034
+ else:
1035
+ logger.info(
1036
+ f'AWS credentials set in k8s secret: {cls._AWS_SECRET_NAME} '
1037
+ f'in namespace {namespace} in context {context} '
1038
+ f'{logging.CHECK_MARK_EMOJI}'
1039
+ )
1040
+
1041
+ try:
1042
+ identity = boto3.client('sts').get_caller_identity()
1043
+ logger.info(
1044
+ f"Using AWS credentials for ARN: {identity['Arn']} "
1045
+ f"(UserId: {identity['UserId']}, Account: {identity['Account']})"
1046
+ )
1047
+ except Exception as e:
1048
+ logger.warning(f'Could not fetch caller identity: {e}')
1049
+
1050
+ return True, None
1051
+
1052
+ # IN GCS.PY
1053
+ @classmethod
1054
+ def get_k8s_credential_name(cls) -> str:
1055
+ return cls._AWS_SECRET_NAME
1056
+
1057
+
1058
+ class S3CloudStorage(storage_utils.CloudStorage):
1059
+ """S3 Storage."""
1060
+
1061
+ # List of commands to install AWS CLI
1062
+ _GET_AWSCLI = [
1063
+ 'command -v aws >/dev/null 2>&1 || ('
1064
+ 'apt-get update && apt-get install -y curl unzip && '
1065
+ 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && ' # noqa: E501
1066
+ 'unzip awscliv2.zip && '
1067
+ './aws/install -i ~/aws-cli -b ~/bin && '
1068
+ 'export PATH=$HOME/bin:$PATH && '
1069
+ 'rm -rf aws awscliv2.zip'
1070
+ ') && export PATH=$HOME/bin:$PATH'
1071
+ ]
1072
+
1073
+ _STORE: typing.Type[storage_utils.AbstractStore] = S3Store
1074
+
1075
+ # IN GCS.PY
1076
+ def is_directory(self, url: str) -> bool:
1077
+ """Returns whether S3 'url' is a directory.
1078
+
1079
+ In cloud object stores, a "directory" refers to a regular object whose
1080
+ name is a prefix of other objects.
1081
+ """
1082
+ s3 = aws.resource('s3')
1083
+ bucket_name, path = data_utils.split_s3_path(url)
1084
+ bucket = s3.Bucket(bucket_name)
1085
+
1086
+ num_objects = 0
1087
+ for obj in bucket.objects.filter(Prefix=path):
1088
+ num_objects += 1
1089
+ if obj.key == path:
1090
+ return False
1091
+ # If there are more than 1 object in filter, then it is a directory
1092
+ if num_objects == 3:
1093
+ return True
1094
+
1095
+ # A directory with few or no items
1096
+ return True
1097
+
1098
+ # IN GCS.PY
1099
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
1100
+ """Downloads using AWS CLI."""
1101
+ # AWS Sync by default uses 10 threads to upload files to the bucket.
1102
+ # To increase parallelism, modify max_concurrent_requests in your
1103
+ # aws config file (Default path: ~/.aws/config).
1104
+ all_commands = list(self._GET_AWSCLI)
1105
+
1106
+ all_commands.append(f'aws s3 sync --no-follow-symlinks {source} {destination}')
1107
+ return ' && '.join(all_commands)
1108
+
1109
+ # IN GCS.PY
1110
+ def make_sync_file_command(self, source: str, destination: str) -> str:
1111
+ all_commands = list(self._GET_AWSCLI)
1112
+
1113
+ all_commands.append(f'aws s3 cp {source} {destination}')
1114
+ return ' && '.join(all_commands)