skilleter-thingy 0.0.24__py3-none-any.whl → 0.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skilleter-thingy might be problematic. Click here for more details.
- skilleter_thingy/git_review.py +11 -5
- skilleter_thingy/photodupe.py +42 -16
- {skilleter_thingy-0.0.24.dist-info → skilleter_thingy-0.0.26.dist-info}/METADATA +1 -2
- {skilleter_thingy-0.0.24.dist-info → skilleter_thingy-0.0.26.dist-info}/RECORD +8 -10
- skilleter_thingy/aws.py +0 -548
- skilleter_thingy/s3_sync.py +0 -383
- {skilleter_thingy-0.0.24.dist-info → skilleter_thingy-0.0.26.dist-info}/LICENSE +0 -0
- {skilleter_thingy-0.0.24.dist-info → skilleter_thingy-0.0.26.dist-info}/WHEEL +0 -0
- {skilleter_thingy-0.0.24.dist-info → skilleter_thingy-0.0.26.dist-info}/entry_points.txt +0 -0
- {skilleter_thingy-0.0.24.dist-info → skilleter_thingy-0.0.26.dist-info}/top_level.txt +0 -0
skilleter_thingy/git_review.py
CHANGED
|
@@ -514,6 +514,12 @@ class GitReview():
|
|
|
514
514
|
def show_file_list(self):
|
|
515
515
|
""" Draw the current page of the file list """
|
|
516
516
|
|
|
517
|
+
def format_change(prefix, value):
|
|
518
|
+
"""If value is 0 just return it as a string, otherwise apply the prefix and
|
|
519
|
+
return it (e.g. '+' or '-')"""
|
|
520
|
+
|
|
521
|
+
return f'{prefix}{value}' if value else '0'
|
|
522
|
+
|
|
517
523
|
for ypos in range(0, self.file_list_h):
|
|
518
524
|
|
|
519
525
|
normal_colour = curses.color_pair(COLOUR_NORMAL)
|
|
@@ -535,13 +541,13 @@ class GitReview():
|
|
|
535
541
|
# Diff stats, with or without non-whitespace changes
|
|
536
542
|
|
|
537
543
|
if self.show_none_whitespace_stats:
|
|
538
|
-
added =
|
|
539
|
-
deleted =
|
|
544
|
+
added = format_change('+', current_file["non-ws added"])
|
|
545
|
+
deleted = format_change('-', current_file["non-ws deleted"])
|
|
540
546
|
else:
|
|
541
|
-
added =
|
|
542
|
-
deleted =
|
|
547
|
+
added = format_change('+', current_file["added"])
|
|
548
|
+
deleted = format_change('-', current_file["deleted"])
|
|
543
549
|
|
|
544
|
-
status = f'{current_file["status"]} {
|
|
550
|
+
status = f'{current_file["status"]} {deleted:>4}/{added:>4}'
|
|
545
551
|
|
|
546
552
|
abspath = os.path.join(self.working_tree_dir, filename)
|
|
547
553
|
|
skilleter_thingy/photodupe.py
CHANGED
|
@@ -5,6 +5,7 @@ import sys
|
|
|
5
5
|
import os
|
|
6
6
|
import sys
|
|
7
7
|
import pickle
|
|
8
|
+
import argparse
|
|
8
9
|
|
|
9
10
|
import PIL
|
|
10
11
|
|
|
@@ -15,28 +16,42 @@ import imagehash
|
|
|
15
16
|
|
|
16
17
|
################################################################################
|
|
17
18
|
|
|
18
|
-
def read_image_hashes():
|
|
19
|
+
def read_image_hashes(directories):
|
|
19
20
|
"""Read all the specfied directories and hash every picture therein"""
|
|
20
21
|
|
|
21
22
|
hashes = defaultdict(list)
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
# Walk each directory tree
|
|
25
|
+
|
|
26
|
+
for directory in directories:
|
|
27
|
+
print(f'Scanning directory tree {directory}')
|
|
28
|
+
|
|
24
29
|
for root, _, files in os.walk(directory):
|
|
30
|
+
print(f'Scanning directory {root}')
|
|
31
|
+
|
|
25
32
|
for file in files:
|
|
26
33
|
filepath = os.path.join(root, file)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
34
|
+
|
|
35
|
+
fileext = os.path.splitext(file)[1]
|
|
36
|
+
|
|
37
|
+
if fileext.lower() not in ('.jbf', '.ini', '.xml', '.ffs_db'):
|
|
38
|
+
# Calculate the hash and store path, dimensions and file size under the hash entry in the hashes table
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
with Image.open(filepath) as image:
|
|
42
|
+
hash_value = imagehash.average_hash(image, hash_size=12)
|
|
43
|
+
|
|
44
|
+
size = os.stat(filepath).st_size
|
|
45
|
+
hashes[hash_value].append({'path': filepath, 'width': image.width, 'height': image.height, 'size': size})
|
|
46
|
+
|
|
47
|
+
except PIL.UnidentifiedImageError:
|
|
48
|
+
sys.stderr.write(f'ERROR: Unrecognized format {filepath} (size={size})\n')
|
|
49
|
+
|
|
50
|
+
except OSError:
|
|
51
|
+
sys.stderr.write(f'ERROR: Unable to read {filepath} (size={size})\n')
|
|
52
|
+
|
|
53
|
+
# Return the hash table
|
|
54
|
+
|
|
40
55
|
return hashes
|
|
41
56
|
|
|
42
57
|
################################################################################
|
|
@@ -44,6 +59,15 @@ def read_image_hashes():
|
|
|
44
59
|
def main():
|
|
45
60
|
"""Read the hashes and report duplicates in a vaguely civilised way"""
|
|
46
61
|
|
|
62
|
+
parser = argparse.ArgumentParser(description='Search for similar images')
|
|
63
|
+
parser.add_argument('directories', nargs='*', action='store', help='Directories to search')
|
|
64
|
+
|
|
65
|
+
args = parser.parse_args()
|
|
66
|
+
|
|
67
|
+
if not args.directories:
|
|
68
|
+
print('You must be specify at least one directory')
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
|
|
47
71
|
try:
|
|
48
72
|
print('Loading cached data')
|
|
49
73
|
|
|
@@ -52,8 +76,10 @@ def main():
|
|
|
52
76
|
except (FileNotFoundError, EOFError):
|
|
53
77
|
print('Scanning directories')
|
|
54
78
|
|
|
55
|
-
hashes = read_image_hashes()
|
|
79
|
+
hashes = read_image_hashes(args.directories)
|
|
56
80
|
|
|
81
|
+
# Sort the list of hashes so that we can easily find close matches
|
|
82
|
+
|
|
57
83
|
print('Sorting hashes')
|
|
58
84
|
|
|
59
85
|
hash_values = sorted([str(hashval) for hashval in hashes])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: skilleter_thingy
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.26
|
|
4
4
|
Summary: A collection of useful utilities, mainly aimed at making Git more friendly
|
|
5
5
|
Author-email: John Skilleter <john@skilleter.org.uk>
|
|
6
6
|
Project-URL: Home, https://skilleter.org.uk
|
|
@@ -10,7 +10,6 @@ Classifier: Operating System :: OS Independent
|
|
|
10
10
|
Requires-Python: >=3.6
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
License-File: LICENSE
|
|
13
|
-
Requires-Dist: boto3
|
|
14
13
|
Requires-Dist: imagehash
|
|
15
14
|
Requires-Dist: inotify
|
|
16
15
|
Requires-Dist: pillow
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
skilleter_thingy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
skilleter_thingy/addpath.py,sha256=4Yhhgjjz1XDI98j0dAiQpNA2ejLefeWUTeSg3nIXQq0,3842
|
|
3
|
-
skilleter_thingy/aws.py,sha256=k08MT1866KuMjgG7ECr7LCuqcgWx78iPDcS9CmemIoA,18614
|
|
4
3
|
skilleter_thingy/borger.py,sha256=AQX7OHeGXcUjkgyXEWE2h_oOey9eczZBbKjrreXvRAs,7832
|
|
5
4
|
skilleter_thingy/colour.py,sha256=D-RTYsND6Xm6m3xl0mOe9QSrTNYsyY0K_a8x3id2gvg,7031
|
|
6
5
|
skilleter_thingy/console_colours.py,sha256=dT5qc_B62VJaWs92yrFt1izoB7hs3a22t3lfrZFybG4,1786
|
|
@@ -25,7 +24,7 @@ skilleter_thingy/git_common.py,sha256=sjCRCNfyDhLPtxajjiq1l0SCEqB7d1L2uAgnuxLqc1
|
|
|
25
24
|
skilleter_thingy/git_hold.py,sha256=i-mbY6wBZ-lRx6rS2kGyWlTCWqK6suhKape7Dnc4OAo,4630
|
|
26
25
|
skilleter_thingy/git_mr.py,sha256=ik3uVecNos_73aYANgnaHutPdnW3PlGmWiAw9EJ9IGk,3100
|
|
27
26
|
skilleter_thingy/git_parent.py,sha256=1VHbh_iH4tnw6vpafqHWEeZwHZVL_sZOIVZMojKTwlk,2696
|
|
28
|
-
skilleter_thingy/git_review.py,sha256=
|
|
27
|
+
skilleter_thingy/git_review.py,sha256=4pNf8DSVu3V2l9CDo97OIsEHaBIGxfINREbAYU9thg4,51566
|
|
29
28
|
skilleter_thingy/git_update.py,sha256=comv8CR1j9W8yuCIeoksAKxH8fPTiROkIXVrHdpXWl4,13985
|
|
30
29
|
skilleter_thingy/git_wt.py,sha256=VAcOgEAqVUUdmmkZ-zxaHi0Xp6LAt9NvZAZzUoJM3AU,3143
|
|
31
30
|
skilleter_thingy/gitcmp_helper.py,sha256=rFCZiivZfYmd1_-0_3YwmYSfGDLWu6e6rK0I9hFXCUY,11279
|
|
@@ -37,7 +36,7 @@ skilleter_thingy/linecount.py,sha256=lw3vuXUUnMwrUihY6bHfZJsRKe6ZMCRz3952Z9N-ogI
|
|
|
37
36
|
skilleter_thingy/logger.py,sha256=xKgPAq8KGXmtaXIFjFs1AmZJXtYrXJn2sqL3oxHZjfQ,3107
|
|
38
37
|
skilleter_thingy/moviemover.py,sha256=j_Xb9_jFdgpFBAXcF4tEqbnKH_FonlnUU39LiCK980k,4470
|
|
39
38
|
skilleter_thingy/path.py,sha256=3ba_e-QwYpAs-jFVWoV8sfjVjs_59uc5JZt-87Hqn6g,4737
|
|
40
|
-
skilleter_thingy/photodupe.py,sha256=
|
|
39
|
+
skilleter_thingy/photodupe.py,sha256=FssLgbLnqHPuSvMGtRyOa7bRdowufJOQaJJ56f9ybxk,4195
|
|
41
40
|
skilleter_thingy/phototidier.py,sha256=1uj1XbUemJOalNC3PwTG2-_yUQp4WMAcmwRr7QXPn1w,7823
|
|
42
41
|
skilleter_thingy/popup.py,sha256=jW-nbpdeswqEMTli7OmBv1J8XQsvFoMI0J33O6dOeu8,2529
|
|
43
42
|
skilleter_thingy/process.py,sha256=RmXj2RrzwqP_mugeVejtjgF_T3PD7M10XsWuAnc99t0,3565
|
|
@@ -47,7 +46,6 @@ skilleter_thingy/remdir.py,sha256=zp5Nr0IMGXQ-b5iT48O5arqWoSjW65Xnr-SpKuav1Ac,46
|
|
|
47
46
|
skilleter_thingy/rmdupe.py,sha256=tcX3w8XvliGwBMdSt9BUu07kuDtQEc0IiU8sCxmgzHA,17117
|
|
48
47
|
skilleter_thingy/rpylint.py,sha256=na39x0yNXDwDkG9yP48BoM5FeTut-OS4AVsYixE0YZU,2639
|
|
49
48
|
skilleter_thingy/run.py,sha256=EGYJSuMcOmUca6dpfVUFE41vG9C6ZNK8hzZlJCJE6Rs,12619
|
|
50
|
-
skilleter_thingy/s3_sync.py,sha256=TITptjua_B-iwPlgTniuoxPvuEnQjyTKfs6l9CKHbXc,13849
|
|
51
49
|
skilleter_thingy/splitpics.py,sha256=qRlJrqet7TEI6SodS4bkuKXQUpOdMaqmjE4c1CR7ouo,3266
|
|
52
50
|
skilleter_thingy/strreplace.py,sha256=xsIWw0hc452rYEBtNEQFKIzmV03xjm_Taz-eDTmFFKI,2539
|
|
53
51
|
skilleter_thingy/sysmon.py,sha256=XRZG6EVSzoVYan_N16qVB1l1RaU51uvLWlRA0CDjC54,11348
|
|
@@ -59,9 +57,9 @@ skilleter_thingy/trimpath.py,sha256=SAfOB75_dTldQHjam4kQy1J42209NYPYi8vVAaNn1e8,
|
|
|
59
57
|
skilleter_thingy/window_rename.py,sha256=dCBgZqih_3YKHt35hsOAhARFp3QxOi8w8huC63sqJK8,3128
|
|
60
58
|
skilleter_thingy/xchmod.py,sha256=F9_lxKuLqVlHHr3oBI3dkMoFOuwRzYDlpQMTmDcjpBI,4590
|
|
61
59
|
skilleter_thingy/yamlcheck.py,sha256=FXylZ5NtHirDlPVhVEUZUZkTugVR-g51BbjaN06akAc,2868
|
|
62
|
-
skilleter_thingy-0.0.
|
|
63
|
-
skilleter_thingy-0.0.
|
|
64
|
-
skilleter_thingy-0.0.
|
|
65
|
-
skilleter_thingy-0.0.
|
|
66
|
-
skilleter_thingy-0.0.
|
|
67
|
-
skilleter_thingy-0.0.
|
|
60
|
+
skilleter_thingy-0.0.26.dist-info/LICENSE,sha256=ljOS4DjXvqEo5VzGfdaRwgRZPbNScGBmfwyC8PChvmQ,32422
|
|
61
|
+
skilleter_thingy-0.0.26.dist-info/METADATA,sha256=tn3FjCTFSjHBom9bV7v__U8UFJ9QdVDCwtO9B7vhvy8,5210
|
|
62
|
+
skilleter_thingy-0.0.26.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
63
|
+
skilleter_thingy-0.0.26.dist-info/entry_points.txt,sha256=jf7hVfH0dojLxk0D4TjIazKoqdToditCGdCfyhIotME,1971
|
|
64
|
+
skilleter_thingy-0.0.26.dist-info/top_level.txt,sha256=8-JhgToBBiWURunmvfpSxEvNkDHQQ7r25-aBXtZv61g,17
|
|
65
|
+
skilleter_thingy-0.0.26.dist-info/RECORD,,
|
skilleter_thingy/aws.py
DELETED
|
@@ -1,548 +0,0 @@
|
|
|
1
|
-
#! /usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
################################################################################
|
|
4
|
-
""" Simple AWS module - provides an abstraction layer on top of the boto3 client
|
|
5
|
-
for simple operations such as reading/writing S3 buckets, sending emails
|
|
6
|
-
etc.
|
|
7
|
-
|
|
8
|
-
Currently this is a random collection of functions extracted from the S3
|
|
9
|
-
bucket ACL checking code but it is hoped it will evolve over time into
|
|
10
|
-
something more comprehensive.
|
|
11
|
-
|
|
12
|
-
Author: John Skilleter
|
|
13
|
-
"""
|
|
14
|
-
################################################################################
|
|
15
|
-
|
|
16
|
-
# System modules
|
|
17
|
-
|
|
18
|
-
import sys
|
|
19
|
-
import logging
|
|
20
|
-
import json
|
|
21
|
-
|
|
22
|
-
# AWS modules
|
|
23
|
-
|
|
24
|
-
import boto3
|
|
25
|
-
import botocore
|
|
26
|
-
|
|
27
|
-
################################################################################
|
|
28
|
-
|
|
29
|
-
class GenericAWS:
|
|
30
|
-
""" Class providing generic S3 functionality from which the
|
|
31
|
-
other S3 classes are derived """
|
|
32
|
-
|
|
33
|
-
################################################################################
|
|
34
|
-
|
|
35
|
-
def __init__(self, service, session=None, profile=None, createresource=True, createclient=True, region=None):
|
|
36
|
-
""" Initialisation - just create a client handle, optionally using
|
|
37
|
-
the specified profile """
|
|
38
|
-
|
|
39
|
-
if session:
|
|
40
|
-
self.session = session
|
|
41
|
-
elif profile:
|
|
42
|
-
self.session = boto3.session.Session(profile_name=profile)
|
|
43
|
-
else:
|
|
44
|
-
self.session = boto3
|
|
45
|
-
|
|
46
|
-
args = {}
|
|
47
|
-
if region:
|
|
48
|
-
args['region_name'] = region
|
|
49
|
-
|
|
50
|
-
if createresource:
|
|
51
|
-
self.resource = self.session.resource(service, **args)
|
|
52
|
-
|
|
53
|
-
if createclient:
|
|
54
|
-
self.client = self.session.client(service, **args)
|
|
55
|
-
|
|
56
|
-
self.log = logging.getLogger('%s:%s' % (__name__, service))
|
|
57
|
-
|
|
58
|
-
################################################################################
|
|
59
|
-
|
|
60
|
-
def set_log_level(self, level):
|
|
61
|
-
""" Set logging for the module """
|
|
62
|
-
|
|
63
|
-
self.log.setLevel(level)
|
|
64
|
-
|
|
65
|
-
################################################################################
|
|
66
|
-
|
|
67
|
-
class SES(GenericAWS):
|
|
68
|
-
""" Class for AWS Simple Email Service """
|
|
69
|
-
|
|
70
|
-
################################################################################
|
|
71
|
-
|
|
72
|
-
def __init__(self, session=None, profile=None):
|
|
73
|
-
""" Initialisation """
|
|
74
|
-
|
|
75
|
-
super().__init__('ses', session, profile, createresource=False)
|
|
76
|
-
|
|
77
|
-
################################################################################
|
|
78
|
-
|
|
79
|
-
def send(self, sender, recipient, subject, body):
|
|
80
|
-
""" Send an email """
|
|
81
|
-
|
|
82
|
-
destination = \
|
|
83
|
-
{
|
|
84
|
-
'ToAddresses': [recipient]
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
message = \
|
|
88
|
-
{
|
|
89
|
-
'Subject':
|
|
90
|
-
{
|
|
91
|
-
'Data': subject
|
|
92
|
-
},
|
|
93
|
-
'Body':
|
|
94
|
-
{
|
|
95
|
-
'Text':
|
|
96
|
-
{
|
|
97
|
-
'Data': body
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
# Attempt to send the email - just report the error and quit on failure
|
|
103
|
-
|
|
104
|
-
try:
|
|
105
|
-
self.log.info('Sending email from %s to %s', sender, recipient)
|
|
106
|
-
|
|
107
|
-
self.client.send_email(Source=sender,
|
|
108
|
-
Destination=destination,
|
|
109
|
-
Message=message)
|
|
110
|
-
|
|
111
|
-
except (botocore.exceptions.EndpointConnectionError,
|
|
112
|
-
self.client.exceptions.MessageRejected) as err:
|
|
113
|
-
print('')
|
|
114
|
-
print('Error sending email: %sn' % err)
|
|
115
|
-
print(' Sender: %s' % sender)
|
|
116
|
-
print(' Recipient: %s' % recipient)
|
|
117
|
-
print(' Subject: %s' % subject)
|
|
118
|
-
print(' Body:')
|
|
119
|
-
|
|
120
|
-
for txt in body.split('\n'):
|
|
121
|
-
print(' %s' % txt)
|
|
122
|
-
|
|
123
|
-
print('')
|
|
124
|
-
|
|
125
|
-
sys.exit(1)
|
|
126
|
-
|
|
127
|
-
################################################################################
|
|
128
|
-
|
|
129
|
-
class S3Bucket(GenericAWS):
|
|
130
|
-
""" Class providing access to S3 buckets """
|
|
131
|
-
|
|
132
|
-
################################################################################
|
|
133
|
-
|
|
134
|
-
def __init__(self, session=None, profile=None):
|
|
135
|
-
""" Initialisation - just create a client handle, optionally using
|
|
136
|
-
the specified profile """
|
|
137
|
-
|
|
138
|
-
super().__init__('s3', session, profile)
|
|
139
|
-
|
|
140
|
-
################################################################################
|
|
141
|
-
|
|
142
|
-
def read(self, bucket, key):
|
|
143
|
-
""" Read the specified data from the specified bucket.
|
|
144
|
-
Returns the data, or raises a boto3 exception on error. """
|
|
145
|
-
|
|
146
|
-
self.log.info('Get object from key %s in bucket %s', key, bucket)
|
|
147
|
-
|
|
148
|
-
response = self.client.get_object(Bucket=bucket, Key=key)
|
|
149
|
-
|
|
150
|
-
self.log.debug('Get object: %s', response)
|
|
151
|
-
|
|
152
|
-
return response['Body'].read()
|
|
153
|
-
|
|
154
|
-
################################################################################
|
|
155
|
-
|
|
156
|
-
def write(self, bucket, key, data):
|
|
157
|
-
""" Write data into the specified key of the specified bucket
|
|
158
|
-
Raises a boto3 exception on error. """
|
|
159
|
-
|
|
160
|
-
self.log.info('Writing %d bytes of data to key %s in bucket %s', len(data), key, bucket)
|
|
161
|
-
|
|
162
|
-
self.client.put_object(Bucket=bucket, Key=key, Body=data)
|
|
163
|
-
|
|
164
|
-
################################################################################
|
|
165
|
-
|
|
166
|
-
def get_tags(self, bucket):
|
|
167
|
-
""" Read the tags from a bucket. Returns a dictionary of tags, which will be
|
|
168
|
-
empty if the bucket has no tags. """
|
|
169
|
-
|
|
170
|
-
self.log.info('Reading tags from bucket %s', bucket)
|
|
171
|
-
|
|
172
|
-
try:
|
|
173
|
-
tags = self.client.get_bucket_tagging(Bucket=bucket)['TagSet']
|
|
174
|
-
|
|
175
|
-
except botocore.exceptions.ClientError as err:
|
|
176
|
-
# Any exception except NoSuchTagSet gets raised.
|
|
177
|
-
# NoSuchTagSet simply means that the bucket has no tags
|
|
178
|
-
# and isn't really an error, as such, so we just return
|
|
179
|
-
# an empty dictionary.
|
|
180
|
-
|
|
181
|
-
if err.response['Error']['Code'] != 'NoSuchTagSet':
|
|
182
|
-
self.log.error('Error reading tags for bucket %s: %s', bucket, err.response['Error']['Code'])
|
|
183
|
-
raise
|
|
184
|
-
|
|
185
|
-
tags = {}
|
|
186
|
-
|
|
187
|
-
self.log.info('Tags: %s', tags)
|
|
188
|
-
|
|
189
|
-
return tags
|
|
190
|
-
|
|
191
|
-
################################################################################
|
|
192
|
-
|
|
193
|
-
def get_location(self, bucket):
|
|
194
|
-
""" Return the location of a bucket - note that the Boto3 returns the location
|
|
195
|
-
set to none, rather than us-east-1 if you query a bucket located in that
|
|
196
|
-
region. """
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
location = self.client.get_bucket_location(Bucket=bucket)['LocationConstraint']
|
|
200
|
-
except botocore.exceptions.ClientError as err:
|
|
201
|
-
self.log.info('Error getting location of bucket %s: %s', bucket, err.response['Error']['Code'])
|
|
202
|
-
raise
|
|
203
|
-
|
|
204
|
-
if not location:
|
|
205
|
-
location = 'us-east-1'
|
|
206
|
-
|
|
207
|
-
return location
|
|
208
|
-
|
|
209
|
-
################################################################################
|
|
210
|
-
|
|
211
|
-
def get_acl(self, bucket):
|
|
212
|
-
""" Return the bucket ACLs """
|
|
213
|
-
|
|
214
|
-
try:
|
|
215
|
-
return self.client.get_bucket_acl(Bucket=bucket)['Grants']
|
|
216
|
-
except botocore.exceptions.ClientError as err:
|
|
217
|
-
self.log.error('Error getting ACL for bucket %s: %s', bucket, err.response['Error']['Code'])
|
|
218
|
-
raise
|
|
219
|
-
|
|
220
|
-
################################################################################
|
|
221
|
-
|
|
222
|
-
def get_policy(self, bucket):
|
|
223
|
-
""" Return bucket policy information as a list of policy dictionaries
|
|
224
|
-
Returns an empty list if the bucket has no policies (similar to
|
|
225
|
-
get_tags() above). """
|
|
226
|
-
|
|
227
|
-
try:
|
|
228
|
-
policy_data = self.client.get_bucket_policy(Bucket=bucket)
|
|
229
|
-
|
|
230
|
-
except botocore.exceptions.ClientError as err:
|
|
231
|
-
# Any exception which *isn't* a no-policy exception gets raised
|
|
232
|
-
# to the caller, the no-policy one causes the function to return
|
|
233
|
-
# an empty policy list.
|
|
234
|
-
|
|
235
|
-
if err.response['Error']['Code'] != 'NoSuchBucketPolicy':
|
|
236
|
-
self.log.error('Error reading policy for bucket %s: %s', bucket, err.response['Error']['Code'])
|
|
237
|
-
raise
|
|
238
|
-
|
|
239
|
-
return []
|
|
240
|
-
|
|
241
|
-
return json.loads(policy_data['Policy'])
|
|
242
|
-
|
|
243
|
-
################################################################################
|
|
244
|
-
|
|
245
|
-
def get_buckets(self):
|
|
246
|
-
""" Return a list of all the available buckets """
|
|
247
|
-
|
|
248
|
-
return [bucket.name for bucket in self.resource.buckets.all()]
|
|
249
|
-
|
|
250
|
-
################################################################################
|
|
251
|
-
|
|
252
|
-
def get_website(self, bucket):
|
|
253
|
-
""" Return the web site configuration for the bucket, or None if the bucket
|
|
254
|
-
is not configured for hosting """
|
|
255
|
-
|
|
256
|
-
try:
|
|
257
|
-
web = self.client.get_bucket_website(Bucket=bucket)
|
|
258
|
-
|
|
259
|
-
except botocore.exceptions.ClientError as err:
|
|
260
|
-
if err.response['Error']['Code'] != 'NoSuchWebsiteConfiguration':
|
|
261
|
-
print('>>>%s' % err.response)
|
|
262
|
-
raise
|
|
263
|
-
|
|
264
|
-
web = None
|
|
265
|
-
|
|
266
|
-
return web
|
|
267
|
-
|
|
268
|
-
################################################################################
|
|
269
|
-
|
|
270
|
-
def get_objects(self, bucket, max_objects=None):
|
|
271
|
-
""" Yield a list of the details of the objects in a bucket, stopping after
|
|
272
|
-
returning max_objects (if specified). """
|
|
273
|
-
|
|
274
|
-
paginator = self.client.get_paginator('list_objects_v2')
|
|
275
|
-
|
|
276
|
-
pagesize = min(25, max_objects) if max_objects else 25
|
|
277
|
-
|
|
278
|
-
objects = paginator.paginate(Bucket=bucket, PaginationConfig={'PageSize': pagesize})
|
|
279
|
-
|
|
280
|
-
count = 0
|
|
281
|
-
|
|
282
|
-
for data in objects:
|
|
283
|
-
if 'Contents' in data:
|
|
284
|
-
for obj in data['Contents']:
|
|
285
|
-
|
|
286
|
-
if max_objects:
|
|
287
|
-
count += 1
|
|
288
|
-
if count > max_objects:
|
|
289
|
-
break
|
|
290
|
-
|
|
291
|
-
yield obj
|
|
292
|
-
|
|
293
|
-
################################################################################
|
|
294
|
-
|
|
295
|
-
def get_object_acl(self, bucket, obj):
|
|
296
|
-
""" Return the ACL data for an object in a bucket """
|
|
297
|
-
|
|
298
|
-
try:
|
|
299
|
-
return self.client.get_object_acl(Bucket=bucket, Key=obj)['Grants']
|
|
300
|
-
except botocore.exceptions.ClientError as err:
|
|
301
|
-
self.log.error('Error getting ACL for object %s in bucket %s: %s', obj, bucket, err.response['Error']['Code'])
|
|
302
|
-
raise
|
|
303
|
-
|
|
304
|
-
################################################################################
|
|
305
|
-
|
|
306
|
-
def get_lifecycle(self, bucket):
|
|
307
|
-
""" Return the bucket lifecycle data """
|
|
308
|
-
|
|
309
|
-
try:
|
|
310
|
-
lifecycle = self.client.get_bucket_lifecycle_configuration(Bucket=bucket)
|
|
311
|
-
except botocore.exceptions.ClientError as err:
|
|
312
|
-
if err.response['Error']['Code'] == 'NoSuchLifecycleConfiguration':
|
|
313
|
-
lifecycle = None
|
|
314
|
-
else:
|
|
315
|
-
raise
|
|
316
|
-
|
|
317
|
-
return lifecycle
|
|
318
|
-
|
|
319
|
-
################################################################################
|
|
320
|
-
|
|
321
|
-
class STS(GenericAWS):
|
|
322
|
-
""" Class providing access to STS functionality """
|
|
323
|
-
|
|
324
|
-
def __init__(self, session=None, profile=None):
|
|
325
|
-
""" Initialise the STS client (there is no STS resource in boto3) """
|
|
326
|
-
|
|
327
|
-
super().__init__('sts', session, profile, createresource=False)
|
|
328
|
-
|
|
329
|
-
################################################################################
|
|
330
|
-
|
|
331
|
-
def account(self):
|
|
332
|
-
""" Return the name of the current AWS account """
|
|
333
|
-
|
|
334
|
-
return self.client.get_caller_identity()['Account']
|
|
335
|
-
|
|
336
|
-
################################################################################
|
|
337
|
-
|
|
338
|
-
class IAM(GenericAWS):
|
|
339
|
-
""" Class providing access to IAM """
|
|
340
|
-
|
|
341
|
-
def __init__(self, session=None, profile=None):
|
|
342
|
-
""" Initialise the IAM client/resource """
|
|
343
|
-
|
|
344
|
-
super().__init__('iam', session, profile, createresource=False)
|
|
345
|
-
|
|
346
|
-
def role_exists(self, role):
|
|
347
|
-
""" Return True if the role exists """
|
|
348
|
-
|
|
349
|
-
try:
|
|
350
|
-
self.client.get_role(RoleName=role)
|
|
351
|
-
except botocore.exceptions.ClientError as err:
|
|
352
|
-
if err.response['Error']['Code'] != 'NoSuchEntity':
|
|
353
|
-
raise
|
|
354
|
-
|
|
355
|
-
return False
|
|
356
|
-
else:
|
|
357
|
-
return True
|
|
358
|
-
|
|
359
|
-
def create_role(self, role, description, policy):
|
|
360
|
-
""" Create a new role """
|
|
361
|
-
|
|
362
|
-
response = self.client.create_role(RoleName=role, Description=description, AssumeRolePolicyDocument=policy)
|
|
363
|
-
|
|
364
|
-
return response
|
|
365
|
-
|
|
366
|
-
def put_role_policy(self, role, policy_name, policy):
|
|
367
|
-
""" Update the policy in an existing role """
|
|
368
|
-
|
|
369
|
-
response = self.client.put_role_policy(RoleName=role, PolicyName=policy_name, PolicyDocument=policy)
|
|
370
|
-
|
|
371
|
-
return response
|
|
372
|
-
|
|
373
|
-
def get_role_policy(self, role, policy_name):
|
|
374
|
-
""" Read the policy in an existing role """
|
|
375
|
-
|
|
376
|
-
response = self.client.get_role_policy(RoleName=role, PolicyName=policy_name)
|
|
377
|
-
|
|
378
|
-
return response
|
|
379
|
-
|
|
380
|
-
################################################################################
|
|
381
|
-
|
|
382
|
-
class Events(GenericAWS):
|
|
383
|
-
""" Class providing access to CloudWatch Events """
|
|
384
|
-
|
|
385
|
-
def __init__(self, session=None, profile=None):
|
|
386
|
-
""" Initialise the CloudWatch Events client/resource """
|
|
387
|
-
|
|
388
|
-
super().__init__('events', session, profile, createresource=False)
|
|
389
|
-
|
|
390
|
-
def put_rule(self, name, schedule):
|
|
391
|
-
""" Create or update the specified rule """
|
|
392
|
-
|
|
393
|
-
self.client.put_rule(Name=name, ScheduleExpression=schedule)
|
|
394
|
-
|
|
395
|
-
def put_target(self, name, targets):
|
|
396
|
-
""" Add the specified target(s) to the specified rule, or update
|
|
397
|
-
existing targets """
|
|
398
|
-
|
|
399
|
-
self.client.put_targets(Rule=name, Targets=targets)
|
|
400
|
-
|
|
401
|
-
################################################################################
|
|
402
|
-
|
|
403
|
-
DEFAULT_LAMBDA_TIMEOUT = 3
|
|
404
|
-
DEFAULT_LAMBDA_MEMORY = 128
|
|
405
|
-
DEFAULT_LAMBDA_HANDLER = 'main.main'
|
|
406
|
-
DEFAULT_LAMBDA_RUNTIME = 'python3.6'
|
|
407
|
-
|
|
408
|
-
class Lambda(GenericAWS):
|
|
409
|
-
""" Class providing access to Lambda functions """
|
|
410
|
-
|
|
411
|
-
def __init__(self, session=None, profile=None, region=None):
|
|
412
|
-
""" Initialise the Lambda client/resource """
|
|
413
|
-
|
|
414
|
-
super().__init__('lambda', session, profile, createresource=False, region=region)
|
|
415
|
-
|
|
416
|
-
def exists(self, name):
|
|
417
|
-
""" Return True if the specified Lambda function exists """
|
|
418
|
-
|
|
419
|
-
try:
|
|
420
|
-
self.client.get_function(FunctionName=name)
|
|
421
|
-
except botocore.exceptions.ClientError as err:
|
|
422
|
-
if err.response['Error']['Code'] != 'ResourceNotFoundException':
|
|
423
|
-
raise
|
|
424
|
-
return False
|
|
425
|
-
else:
|
|
426
|
-
return True
|
|
427
|
-
|
|
428
|
-
def update_function(self, name, zipfile):
|
|
429
|
-
""" Update the specified Lambda function given a zip file """
|
|
430
|
-
|
|
431
|
-
with open(zipfile, 'rb') as zipper:
|
|
432
|
-
zipdata = zipper.read()
|
|
433
|
-
|
|
434
|
-
response = self.client.update_function_code(FunctionName=name, ZipFile=zipdata)
|
|
435
|
-
|
|
436
|
-
return response
|
|
437
|
-
|
|
438
|
-
def update_function_configuration(self, name,
|
|
439
|
-
handler=None,
|
|
440
|
-
environment=None):
|
|
441
|
-
""" Update the handler associated with a Lambda fucntion """
|
|
442
|
-
|
|
443
|
-
update_args = {}
|
|
444
|
-
update_args['FunctionName'] = name
|
|
445
|
-
|
|
446
|
-
if handler:
|
|
447
|
-
update_args['Handler'] = handler
|
|
448
|
-
|
|
449
|
-
if environment:
|
|
450
|
-
update_args['Environment'] = {'Variables': environment}
|
|
451
|
-
|
|
452
|
-
self.client.update_function_configuration(**update_args)
|
|
453
|
-
|
|
454
|
-
def create_function(self, name, role, description, zipfile,
|
|
455
|
-
runtime=DEFAULT_LAMBDA_RUNTIME,
|
|
456
|
-
handler=DEFAULT_LAMBDA_HANDLER,
|
|
457
|
-
timeout=DEFAULT_LAMBDA_TIMEOUT,
|
|
458
|
-
memory=DEFAULT_LAMBDA_MEMORY):
|
|
459
|
-
""" Create the specified Lambda function given a zip file"""
|
|
460
|
-
|
|
461
|
-
with open(zipfile, 'rb') as zipper:
|
|
462
|
-
zipdata = zipper.read()
|
|
463
|
-
|
|
464
|
-
response = self.client.create_function(FunctionName=name,
|
|
465
|
-
Runtime=runtime,
|
|
466
|
-
Role=role,
|
|
467
|
-
Handler=handler,
|
|
468
|
-
Code={'ZipFile': zipdata},
|
|
469
|
-
Description=description,
|
|
470
|
-
Timeout=timeout,
|
|
471
|
-
MemorySize=memory)
|
|
472
|
-
|
|
473
|
-
return response
|
|
474
|
-
|
|
475
|
-
################################################################################
|
|
476
|
-
|
|
477
|
-
def get_session(profile_name=None):
|
|
478
|
-
""" Wrapper for boto3.session.Session """
|
|
479
|
-
|
|
480
|
-
return boto3.session.Session(profile_name=profile_name)
|
|
481
|
-
|
|
482
|
-
################################################################################
|
|
483
|
-
|
|
484
|
-
def set_stream_logger(name='botocore', level=10, format_string=None):
|
|
485
|
-
""" Wrapper for boto3.set_stream_logger """
|
|
486
|
-
|
|
487
|
-
return boto3.set_stream_logger(name=name, level=level, format_string=format_string)
|
|
488
|
-
|
|
489
|
-
################################################################################
|
|
490
|
-
|
|
491
|
-
def set_default_region(region):
|
|
492
|
-
""" Set the default region for the module """
|
|
493
|
-
|
|
494
|
-
boto3.setup_default_session(region_name=region)
|
|
495
|
-
|
|
496
|
-
################################################################################
|
|
497
|
-
|
|
498
|
-
def get_regions(servicename):
|
|
499
|
-
""" Generate a list of regions where a service is supported """
|
|
500
|
-
|
|
501
|
-
return boto3.session.Session().get_available_regions(servicename)
|
|
502
|
-
|
|
503
|
-
################################################################################
|
|
504
|
-
|
|
505
|
-
def get_resources():
|
|
506
|
-
""" Devious method of getting the list of clients supported by boto3
|
|
507
|
-
Note that I'm somewhat embarrassed by this, but can't find a better
|
|
508
|
-
way of doing it! """
|
|
509
|
-
|
|
510
|
-
try:
|
|
511
|
-
boto3.resource('XXX-NOT-A-RESOURCE-XXX')
|
|
512
|
-
except boto3.exceptions.ResourceNotExistsError as err:
|
|
513
|
-
return str(err).replace(' - ', '').split('\n')[2:-1]
|
|
514
|
-
|
|
515
|
-
################################################################################
|
|
516
|
-
|
|
517
|
-
def get_clients():
|
|
518
|
-
""" Similarly devious way of getting the list of supported clients.
|
|
519
|
-
This is equally as yukky as the get_resources() function. """
|
|
520
|
-
|
|
521
|
-
try:
|
|
522
|
-
boto3.client('XXX-NOT-A-CLIENT-XXX')
|
|
523
|
-
except botocore.exceptions.UnknownServiceError as err:
|
|
524
|
-
return repr(err)[:-3].split(': ')[2].split(', ')
|
|
525
|
-
|
|
526
|
-
################################################################################
|
|
527
|
-
|
|
528
|
-
def test_function():
|
|
529
|
-
""" Module test function """
|
|
530
|
-
|
|
531
|
-
# Test STS
|
|
532
|
-
|
|
533
|
-
sts_client = STS()
|
|
534
|
-
|
|
535
|
-
print('Current account: %s' % sts_client.account())
|
|
536
|
-
|
|
537
|
-
# TODO: Test SES
|
|
538
|
-
# TODO: Test S3
|
|
539
|
-
# TODO: Test IAM
|
|
540
|
-
# TODO: Test Cloudwatch Events
|
|
541
|
-
# TODO: Test Lambda
|
|
542
|
-
# TODO: Test set_default_region(), get_session(), get_resources(), get_regions(), get_clients()
|
|
543
|
-
|
|
544
|
-
################################################################################
|
|
545
|
-
# Test code
|
|
546
|
-
|
|
547
|
-
if __name__ == '__main__':
|
|
548
|
-
test_function()
|
skilleter_thingy/s3_sync.py
DELETED
|
@@ -1,383 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
"""Selectively synchronise an S3 bucket to a local destination.
|
|
4
|
-
Similar to the aws s3 sync CLI command, but faster, has better
|
|
5
|
-
options to filter files, only downloads from S3 to local and
|
|
6
|
-
doesn't support the huge range of command line options."""
|
|
7
|
-
|
|
8
|
-
import os
|
|
9
|
-
import argparse
|
|
10
|
-
import sys
|
|
11
|
-
import fnmatch
|
|
12
|
-
import datetime
|
|
13
|
-
import threading
|
|
14
|
-
import queue
|
|
15
|
-
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
import boto3
|
|
19
|
-
|
|
20
|
-
from botocore.exceptions import ClientError
|
|
21
|
-
|
|
22
|
-
################################################################################
|
|
23
|
-
|
|
24
|
-
# Number of download threads to run - doing the downloads in threads is about
|
|
25
|
-
# six times faster than doing so sequentially.
|
|
26
|
-
|
|
27
|
-
NUM_THREADS = 12
|
|
28
|
-
|
|
29
|
-
# Translate our environment names to AWS ARNs
|
|
30
|
-
|
|
31
|
-
AWS_ACCOUNT_ARNS = {
|
|
32
|
-
'prod': 'arn:aws:iam::459580378985:role/ERSReadOnlyRole',
|
|
33
|
-
'test': 'arn:aws:iam::094438481629:role/ERSReadOnlyRole',
|
|
34
|
-
'dev': 'arn:aws:iam::402653103803:role/ERSReadOnlyRole',
|
|
35
|
-
'mgmt': 'arn:aws:iam::125943076446:role/ERSReadOnlyRole',
|
|
36
|
-
'audit': 'arn:aws:iam::229627323276:role/ERSReadOnlyRole',
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
################################################################################
|
|
40
|
-
|
|
41
|
-
def error(msg, status=1):
|
|
42
|
-
"""Report an error message and exit"""
|
|
43
|
-
|
|
44
|
-
print(f'ERROR: {msg}')
|
|
45
|
-
sys.exit(status)
|
|
46
|
-
|
|
47
|
-
################################################################################
|
|
48
|
-
|
|
49
|
-
def verbose(args, msg):
|
|
50
|
-
"""Report a message in verbose mode"""
|
|
51
|
-
|
|
52
|
-
if not args or args.verbose:
|
|
53
|
-
print(msg)
|
|
54
|
-
|
|
55
|
-
################################################################################
|
|
56
|
-
|
|
57
|
-
def splitlist(lists, deliminator):
|
|
58
|
-
"""Create a list from a list of deliminated strings"""
|
|
59
|
-
|
|
60
|
-
result = []
|
|
61
|
-
|
|
62
|
-
for item in lists or []:
|
|
63
|
-
result += item.split(deliminator)
|
|
64
|
-
|
|
65
|
-
return result
|
|
66
|
-
|
|
67
|
-
################################################################################
|
|
68
|
-
|
|
69
|
-
def configure():
|
|
70
|
-
"""Parse the command line"""
|
|
71
|
-
|
|
72
|
-
parser = argparse.ArgumentParser(description='Selectively sync an S3 bucket to a local directory')
|
|
73
|
-
|
|
74
|
-
parser.add_argument('--verbose', '-v', action='store_true', help='Report verbose results (includes number of commits between branch and parent)')
|
|
75
|
-
|
|
76
|
-
parser.add_argument('--profile', '-p', action='store', help='Specify the AWS profile')
|
|
77
|
-
|
|
78
|
-
parser.add_argument('--include', '-i', action='append', help='Comma-separated list of wildcards to sync - if specified, only files matching one or more of these are synced')
|
|
79
|
-
parser.add_argument('--exclude', '-x', action='append', help='Comma-separated list of wildcards NOT to sync - if specified, only files NOT matching any of these are synced')
|
|
80
|
-
|
|
81
|
-
parser.add_argument('--include-type', '-I', action='append',
|
|
82
|
-
help='Comma-separated list of file types to sync - if specified, only files matching one or more of these are synced')
|
|
83
|
-
parser.add_argument('--exclude-type', '-X', action='append',
|
|
84
|
-
help='Comma-separated list of file types NOT to sync - if specified, only files NOT matching any of these are synced')
|
|
85
|
-
|
|
86
|
-
# TODO: parser.add_argument('--delete', '-d', action='store_true', help='Delete local files that don\'t exist in the bucket')
|
|
87
|
-
parser.add_argument('--force', '-f', action='store_true', help='Always overwrite locals files (by default files are only overwritten if they are older or a different size)')
|
|
88
|
-
|
|
89
|
-
parser.add_argument('--max-objects', '-m', action='store', type=int, help='Limit the number of objects to download')
|
|
90
|
-
parser.add_argument('--threads', '-t', action='store', type=int, default=NUM_THREADS, help='Number of parallel threads to run')
|
|
91
|
-
parser.add_argument('source', action='store', nargs=1, type=str, help='Name of the S3 bucket, optionally including path within the bucket')
|
|
92
|
-
parser.add_argument('destination', action='store', nargs=1, type=str, help='Name of the local directory to sync into')
|
|
93
|
-
|
|
94
|
-
args = parser.parse_args()
|
|
95
|
-
|
|
96
|
-
# Convert the arguments to single items, but 1-entry lists
|
|
97
|
-
|
|
98
|
-
args.source = args.source[0]
|
|
99
|
-
args.destination = args.destination[0]
|
|
100
|
-
|
|
101
|
-
# Convert the include/exclude parameters to lists
|
|
102
|
-
|
|
103
|
-
args.include = splitlist(args.include, ',')
|
|
104
|
-
args.exclude = splitlist(args.exclude, ',')
|
|
105
|
-
|
|
106
|
-
args.include_type = splitlist(args.include_type, ',')
|
|
107
|
-
args.exclude_type = splitlist(args.exclude_type, ',')
|
|
108
|
-
|
|
109
|
-
return args
|
|
110
|
-
|
|
111
|
-
################################################################################
|
|
112
|
-
|
|
113
|
-
def get_client(args):
|
|
114
|
-
"""Create an S3 client for the specified profile"""
|
|
115
|
-
|
|
116
|
-
if args.profile:
|
|
117
|
-
profile = args.profile.split('-')[0]
|
|
118
|
-
else:
|
|
119
|
-
try:
|
|
120
|
-
profile = os.environ['AWS_PROFILE']
|
|
121
|
-
except KeyError:
|
|
122
|
-
error('The AWS profile must be specified via the AWS_PROFILE environment variable or the --profile command line option')
|
|
123
|
-
|
|
124
|
-
try:
|
|
125
|
-
arn = AWS_ACCOUNT_ARNS[profile]
|
|
126
|
-
except KeyError:
|
|
127
|
-
error(f'Invalid AWS profile "{profile}"')
|
|
128
|
-
|
|
129
|
-
sts_connection = boto3.client("sts")
|
|
130
|
-
|
|
131
|
-
try:
|
|
132
|
-
acct_b = sts_connection.assume_role(RoleArn=arn, RoleSessionName='s3-selective-sync')
|
|
133
|
-
except ClientError as exc:
|
|
134
|
-
error(f'{exc.response["Error"]["Message"]}')
|
|
135
|
-
|
|
136
|
-
access_key = acct_b["Credentials"]["AccessKeyId"]
|
|
137
|
-
secret_key = acct_b["Credentials"]["SecretAccessKey"]
|
|
138
|
-
session_token = acct_b["Credentials"]["SessionToken"]
|
|
139
|
-
|
|
140
|
-
session = boto3.Session(
|
|
141
|
-
aws_access_key_id=access_key,
|
|
142
|
-
aws_secret_access_key=secret_key,
|
|
143
|
-
aws_session_token=session_token)
|
|
144
|
-
|
|
145
|
-
return session.client('s3')
|
|
146
|
-
|
|
147
|
-
################################################################################
|
|
148
|
-
|
|
149
|
-
def download_filter(args, s3_client, s3_bucket, s3_object):
|
|
150
|
-
"""Decide whether to download an object from S3
|
|
151
|
-
Returns True if the object should be downloaded, or False if it should be skipped."""
|
|
152
|
-
|
|
153
|
-
# Ignore directories
|
|
154
|
-
|
|
155
|
-
if s3_object['Key'][-1] == '/':
|
|
156
|
-
verbose(args, f'{s3_object["Key"]} is a prefix, so will be skipped')
|
|
157
|
-
return False
|
|
158
|
-
|
|
159
|
-
# Handle the object as a Path for simpicity
|
|
160
|
-
|
|
161
|
-
object_path = Path(s3_object['Key'])
|
|
162
|
-
|
|
163
|
-
# Filter according to wildcard
|
|
164
|
-
|
|
165
|
-
if args.include:
|
|
166
|
-
for wildcard in args.include:
|
|
167
|
-
if '/' in wildcard:
|
|
168
|
-
if fnmatch.fnmatch(s3_object['Key'], wildcard):
|
|
169
|
-
break
|
|
170
|
-
elif fnmatch.fnmatch(object_path.name, wildcard):
|
|
171
|
-
break
|
|
172
|
-
else:
|
|
173
|
-
verbose(args, f'"{s3_object["Key"]}" does not match any include wildcards, so will be skipped')
|
|
174
|
-
return False
|
|
175
|
-
|
|
176
|
-
if args.exclude:
|
|
177
|
-
for wildcard in args.exclude:
|
|
178
|
-
if '/' in wildcard:
|
|
179
|
-
if fnmatch.fnmatch(s3_object['Key'], wildcard):
|
|
180
|
-
verbose(args, f'"{s3_object["Key"]}" matches one or more exclude wildcards, so will be skipped')
|
|
181
|
-
elif fnmatch.fnmatch(object_path.name, wildcard):
|
|
182
|
-
verbose(args, f'"{s3_object["Key"]}" matches one or more exclude wildcards, so will be skipped')
|
|
183
|
-
return False
|
|
184
|
-
|
|
185
|
-
# Filter according to content type
|
|
186
|
-
|
|
187
|
-
if args.include_type or args.exclude_type:
|
|
188
|
-
object_type = s3_client.head_object(Bucket=s3_bucket, Key=s3_object["Key"])['ContentType']
|
|
189
|
-
|
|
190
|
-
if args.include_type:
|
|
191
|
-
for include_type in args.include_type:
|
|
192
|
-
if object_type == include_type:
|
|
193
|
-
break
|
|
194
|
-
else:
|
|
195
|
-
verbose(args, f'"{s3_object["Key"]}" is of type "{object_type}" which does not match any entries in the the type include list, so will be skipped')
|
|
196
|
-
return False
|
|
197
|
-
|
|
198
|
-
if args.exclude_type:
|
|
199
|
-
for exclude_type in args.exclude_type:
|
|
200
|
-
if object_type == exclude_type:
|
|
201
|
-
verbose(args, f'"{s3_object["Key"]}" is of type "{object_type}" which matches one of the entries in the type exclude list, so will be skipped')
|
|
202
|
-
return False
|
|
203
|
-
|
|
204
|
-
# Unless we are in force-download mode, check if the destination file already exists and see if it needs to be overwritten
|
|
205
|
-
|
|
206
|
-
if not args.force:
|
|
207
|
-
dest_file = args.destination / object_path
|
|
208
|
-
|
|
209
|
-
if dest_file.exists():
|
|
210
|
-
# Overwrite if destination is older or a different size
|
|
211
|
-
|
|
212
|
-
dest_stat = dest_file.stat()
|
|
213
|
-
dest_timestamp = datetime.datetime.fromtimestamp(dest_stat.st_mtime, tz=datetime.timezone.utc)
|
|
214
|
-
|
|
215
|
-
if dest_timestamp >= s3_object['LastModified']:
|
|
216
|
-
verbose(args, f'Destination file already exists and is same age or newer, so "{s3_object["Key"]}" will be skipped')
|
|
217
|
-
return False
|
|
218
|
-
|
|
219
|
-
return True
|
|
220
|
-
|
|
221
|
-
################################################################################
|
|
222
|
-
|
|
223
|
-
def download(args, s3_client, mkdir_lock, bucket, s3_object):
|
|
224
|
-
"""Attempt to download an object from S3 to an equivalent local location"""
|
|
225
|
-
|
|
226
|
-
local_path = Path(args.destination) / s3_object['Key']
|
|
227
|
-
|
|
228
|
-
with mkdir_lock:
|
|
229
|
-
if local_path.parent.exists():
|
|
230
|
-
if not local_path.parent.is_dir():
|
|
231
|
-
error(f'Unable to download "{s3_object["Key"]}" as the destination path is not a directory')
|
|
232
|
-
else:
|
|
233
|
-
local_path.parent.mkdir(parents=True)
|
|
234
|
-
|
|
235
|
-
# Download the object and the set the file timestamp to the same as the object
|
|
236
|
-
|
|
237
|
-
object_timestamp = s3_object['LastModified'].timestamp()
|
|
238
|
-
s3_client.download_file(bucket, s3_object['Key'], local_path)
|
|
239
|
-
os.utime(local_path, (object_timestamp, object_timestamp))
|
|
240
|
-
|
|
241
|
-
################################################################################
|
|
242
|
-
|
|
243
|
-
def downloader(args, s3_client, mkdir_lock, bucket, object_queue, error_queue, sem_counter, real_thread=True):
|
|
244
|
-
"""Download thread"""
|
|
245
|
-
|
|
246
|
-
finished = False
|
|
247
|
-
while not finished:
|
|
248
|
-
# Get the next object to download (waiting for one to be added to the queue)
|
|
249
|
-
|
|
250
|
-
s3_object = object_queue.get()
|
|
251
|
-
|
|
252
|
-
# If it is a candidate for downloading (meetings the criteria specified on the command
|
|
253
|
-
# line and, unless force-downloading, hasn't already been downloaded) then attempt to
|
|
254
|
-
# download it.
|
|
255
|
-
|
|
256
|
-
# If the semaphore is being used to limit the number of downloads, attempt to acquire it
|
|
257
|
-
# If we couldn't, then we've reached the download limit so we'll finish.
|
|
258
|
-
|
|
259
|
-
if download_filter(args, s3_client, bucket, s3_object):
|
|
260
|
-
|
|
261
|
-
if not sem_counter or sem_counter.acquire(blocking=False):
|
|
262
|
-
print(f'Downloading "{s3_object["Key"]}"')
|
|
263
|
-
try:
|
|
264
|
-
download(args, s3_client, mkdir_lock, bucket, s3_object)
|
|
265
|
-
except ClientError as exc:
|
|
266
|
-
error_queue.put(f'Failed to download "{s3_object["Key"]}" - {exc.response["Error"]["Message"]}')
|
|
267
|
-
|
|
268
|
-
if sem_counter:
|
|
269
|
-
sem_counter.release()
|
|
270
|
-
else:
|
|
271
|
-
print(f' Done "{s3_object["Key"]}"')
|
|
272
|
-
|
|
273
|
-
else:
|
|
274
|
-
finished = True
|
|
275
|
-
|
|
276
|
-
# Indicate the queued item has been consumed
|
|
277
|
-
|
|
278
|
-
object_queue.task_done()
|
|
279
|
-
|
|
280
|
-
# If we were using a download semaphore then drain the queue (this will happen in all
|
|
281
|
-
# threads and will never terminate, but we're running as a daemon so it doesn't matter too much).
|
|
282
|
-
|
|
283
|
-
if sem_counter and real_thread:
|
|
284
|
-
while True:
|
|
285
|
-
object_queue.get()
|
|
286
|
-
object_queue.task_done()
|
|
287
|
-
|
|
288
|
-
################################################################################
|
|
289
|
-
|
|
290
|
-
def thread_exception_handler(args):
|
|
291
|
-
"""Brute-force thread exception handler"""
|
|
292
|
-
|
|
293
|
-
_ = args
|
|
294
|
-
sys.exit(1)
|
|
295
|
-
|
|
296
|
-
################################################################################
|
|
297
|
-
|
|
298
|
-
def main():
|
|
299
|
-
"""Entry point"""
|
|
300
|
-
|
|
301
|
-
args = configure()
|
|
302
|
-
|
|
303
|
-
s3_client = get_client(args)
|
|
304
|
-
|
|
305
|
-
bucket = args.source
|
|
306
|
-
|
|
307
|
-
# Remove the 's3://' prefix, if present so that we can split bucket and folder
|
|
308
|
-
# if specified
|
|
309
|
-
|
|
310
|
-
if bucket.startswith('s3://'):
|
|
311
|
-
bucket = bucket[5:]
|
|
312
|
-
|
|
313
|
-
if '/' in bucket:
|
|
314
|
-
bucket, prefix = bucket.split('/', 1)
|
|
315
|
-
else:
|
|
316
|
-
prefix = ''
|
|
317
|
-
|
|
318
|
-
# Semaphore to protect download counter
|
|
319
|
-
|
|
320
|
-
sem_counter = threading.Semaphore(value=args.max_objects) if args.max_objects else None
|
|
321
|
-
|
|
322
|
-
# Create the download queue and the worker threads
|
|
323
|
-
|
|
324
|
-
object_queue = queue.Queue()
|
|
325
|
-
|
|
326
|
-
# Create the queue for reporting errors back from the threads
|
|
327
|
-
|
|
328
|
-
error_queue = queue.Queue()
|
|
329
|
-
|
|
330
|
-
# Lock to prevent race conditions around directory creation
|
|
331
|
-
|
|
332
|
-
mkdir_lock = threading.Lock()
|
|
333
|
-
|
|
334
|
-
if args.threads > 1:
|
|
335
|
-
# Create threads
|
|
336
|
-
|
|
337
|
-
threading.excepthook = thread_exception_handler
|
|
338
|
-
|
|
339
|
-
for _ in range(NUM_THREADS):
|
|
340
|
-
thread = threading.Thread(target=downloader, daemon=True, args=(args, s3_client, mkdir_lock, bucket, object_queue, error_queue, sem_counter))
|
|
341
|
-
thread.start()
|
|
342
|
-
|
|
343
|
-
# Read all the objects in the bucket and queue them for consideration by the download workers
|
|
344
|
-
|
|
345
|
-
for page in s3_client.get_paginator('list_objects_v2').paginate(Bucket=bucket, Prefix=prefix):
|
|
346
|
-
for s3_object in page['Contents']:
|
|
347
|
-
object_queue.put(s3_object)
|
|
348
|
-
|
|
349
|
-
print('Finished queuing objects')
|
|
350
|
-
|
|
351
|
-
if args.threads > 1:
|
|
352
|
-
# Wait for the queues to drain
|
|
353
|
-
|
|
354
|
-
object_queue.join()
|
|
355
|
-
else:
|
|
356
|
-
downloader(args, s3_client, mkdir_lock, bucket, object_queue, error_queue, sem_counter, real_thread=False)
|
|
357
|
-
|
|
358
|
-
# Report any errors:
|
|
359
|
-
|
|
360
|
-
if not error_queue.empty():
|
|
361
|
-
sys.stderr.write('\nErrors were encountered downloading some of the objects:\n\n\n')
|
|
362
|
-
|
|
363
|
-
while not error_queue.empty():
|
|
364
|
-
error_msg = error_queue.get()
|
|
365
|
-
sys.stderr.write(f'{error_msg}\n')
|
|
366
|
-
error_queue.task_done()
|
|
367
|
-
|
|
368
|
-
################################################################################
|
|
369
|
-
|
|
370
|
-
def s3_sync():
|
|
371
|
-
"""Entry point"""
|
|
372
|
-
|
|
373
|
-
try:
|
|
374
|
-
main()
|
|
375
|
-
except KeyboardInterrupt:
|
|
376
|
-
sys.exit(1)
|
|
377
|
-
except BrokenPipeError:
|
|
378
|
-
sys.exit(2)
|
|
379
|
-
|
|
380
|
-
################################################################################
|
|
381
|
-
|
|
382
|
-
if __name__ == '__main__':
|
|
383
|
-
s3_sync()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|