skilleter-thingy 0.0.24__py3-none-any.whl → 0.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skilleter-thingy might be problematic. Click here for more details.

@@ -514,6 +514,12 @@ class GitReview():
514
514
  def show_file_list(self):
515
515
  """ Draw the current page of the file list """
516
516
 
517
+ def format_change(prefix, value):
518
+ """If value is 0 just return it as a string, otherwise apply the prefix and
519
+ return it (e.g. '+' or '-')"""
520
+
521
+ return f'{prefix}{value}' if value else '0'
522
+
517
523
  for ypos in range(0, self.file_list_h):
518
524
 
519
525
  normal_colour = curses.color_pair(COLOUR_NORMAL)
@@ -535,13 +541,13 @@ class GitReview():
535
541
  # Diff stats, with or without non-whitespace changes
536
542
 
537
543
  if self.show_none_whitespace_stats:
538
- added = f'+{current_file["non-ws added"]}'
539
- deleted = f'-{current_file["non-ws deleted"]}'
544
+ added = format_change('+', current_file["non-ws added"])
545
+ deleted = format_change('-', current_file["non-ws deleted"])
540
546
  else:
541
- added = f'+{current_file["added"]}'
542
- deleted = f'-{current_file["deleted"]}'
547
+ added = format_change('+', current_file["added"])
548
+ deleted = format_change('-', current_file["deleted"])
543
549
 
544
- status = f'{current_file["status"]} {added:>4}/{deleted:>4}'
550
+ status = f'{current_file["status"]} {deleted:>4}/{added:>4}'
545
551
 
546
552
  abspath = os.path.join(self.working_tree_dir, filename)
547
553
 
@@ -5,6 +5,7 @@ import sys
5
5
  import os
6
6
  import sys
7
7
  import pickle
8
+ import argparse
8
9
 
9
10
  import PIL
10
11
 
@@ -15,28 +16,42 @@ import imagehash
15
16
 
16
17
  ################################################################################
17
18
 
18
- def read_image_hashes():
19
+ def read_image_hashes(directories):
19
20
  """Read all the specfied directories and hash every picture therein"""
20
21
 
21
22
  hashes = defaultdict(list)
22
23
 
23
- for directory in sys.argv[1:]:
24
+ # Walk each directory tree
25
+
26
+ for directory in directories:
27
+ print(f'Scanning directory tree {directory}')
28
+
24
29
  for root, _, files in os.walk(directory):
30
+ print(f'Scanning directory {root}')
31
+
25
32
  for file in files:
26
33
  filepath = os.path.join(root, file)
27
-
28
- try:
29
- with Image.open(filepath) as image:
30
- hash_value = imagehash.average_hash(image, hash_size=12)
31
-
32
- size = os.stat(filepath).st_size
33
- hashes[hash_value].append({'path': filepath, 'width': image.width, 'height': image.height, 'size': size})
34
-
35
- except PIL.UnidentifiedImageError:
36
- sys.stderr.write(f'ERROR: Unrecognized format {filepath}\n')
37
-
38
- except OSError:
39
- sys.stderr.write(f'ERROR: Unable to read {filepath}\n')
34
+
35
+ fileext = os.path.splitext(file)[1]
36
+
37
+ if fileext.lower() not in ('.jbf', '.ini', '.xml', '.ffs_db'):
38
+ # Calculate the hash and store path, dimensions and file size under the hash entry in the hashes table
39
+
40
+ try:
41
+ with Image.open(filepath) as image:
42
+ hash_value = imagehash.average_hash(image, hash_size=12)
43
+
44
+ size = os.stat(filepath).st_size
45
+ hashes[hash_value].append({'path': filepath, 'width': image.width, 'height': image.height, 'size': size})
46
+
47
+ except PIL.UnidentifiedImageError:
48
+ sys.stderr.write(f'ERROR: Unrecognized format {filepath} (size={size})\n')
49
+
50
+ except OSError:
51
+ sys.stderr.write(f'ERROR: Unable to read {filepath} (size={size})\n')
52
+
53
+ # Return the hash table
54
+
40
55
  return hashes
41
56
 
42
57
  ################################################################################
@@ -44,6 +59,15 @@ def read_image_hashes():
44
59
  def main():
45
60
  """Read the hashes and report duplicates in a vaguely civilised way"""
46
61
 
62
+ parser = argparse.ArgumentParser(description='Search for similar images')
63
+ parser.add_argument('directories', nargs='*', action='store', help='Directories to search')
64
+
65
+ args = parser.parse_args()
66
+
67
+ if not args.directories:
68
+ print('You must be specify at least one directory')
69
+ sys.exit(1)
70
+
47
71
  try:
48
72
  print('Loading cached data')
49
73
 
@@ -52,8 +76,10 @@ def main():
52
76
  except (FileNotFoundError, EOFError):
53
77
  print('Scanning directories')
54
78
 
55
- hashes = read_image_hashes()
79
+ hashes = read_image_hashes(args.directories)
56
80
 
81
+ # Sort the list of hashes so that we can easily find close matches
82
+
57
83
  print('Sorting hashes')
58
84
 
59
85
  hash_values = sorted([str(hashval) for hashval in hashes])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skilleter_thingy
3
- Version: 0.0.24
3
+ Version: 0.0.26
4
4
  Summary: A collection of useful utilities, mainly aimed at making Git more friendly
5
5
  Author-email: John Skilleter <john@skilleter.org.uk>
6
6
  Project-URL: Home, https://skilleter.org.uk
@@ -10,7 +10,6 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.6
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
- Requires-Dist: boto3
14
13
  Requires-Dist: imagehash
15
14
  Requires-Dist: inotify
16
15
  Requires-Dist: pillow
@@ -1,6 +1,5 @@
1
1
  skilleter_thingy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  skilleter_thingy/addpath.py,sha256=4Yhhgjjz1XDI98j0dAiQpNA2ejLefeWUTeSg3nIXQq0,3842
3
- skilleter_thingy/aws.py,sha256=k08MT1866KuMjgG7ECr7LCuqcgWx78iPDcS9CmemIoA,18614
4
3
  skilleter_thingy/borger.py,sha256=AQX7OHeGXcUjkgyXEWE2h_oOey9eczZBbKjrreXvRAs,7832
5
4
  skilleter_thingy/colour.py,sha256=D-RTYsND6Xm6m3xl0mOe9QSrTNYsyY0K_a8x3id2gvg,7031
6
5
  skilleter_thingy/console_colours.py,sha256=dT5qc_B62VJaWs92yrFt1izoB7hs3a22t3lfrZFybG4,1786
@@ -25,7 +24,7 @@ skilleter_thingy/git_common.py,sha256=sjCRCNfyDhLPtxajjiq1l0SCEqB7d1L2uAgnuxLqc1
25
24
  skilleter_thingy/git_hold.py,sha256=i-mbY6wBZ-lRx6rS2kGyWlTCWqK6suhKape7Dnc4OAo,4630
26
25
  skilleter_thingy/git_mr.py,sha256=ik3uVecNos_73aYANgnaHutPdnW3PlGmWiAw9EJ9IGk,3100
27
26
  skilleter_thingy/git_parent.py,sha256=1VHbh_iH4tnw6vpafqHWEeZwHZVL_sZOIVZMojKTwlk,2696
28
- skilleter_thingy/git_review.py,sha256=LDCILatY8wNqySTSK2PsvSiR4Dd-PbtoH-GXzvHfSS0,51276
27
+ skilleter_thingy/git_review.py,sha256=4pNf8DSVu3V2l9CDo97OIsEHaBIGxfINREbAYU9thg4,51566
29
28
  skilleter_thingy/git_update.py,sha256=comv8CR1j9W8yuCIeoksAKxH8fPTiROkIXVrHdpXWl4,13985
30
29
  skilleter_thingy/git_wt.py,sha256=VAcOgEAqVUUdmmkZ-zxaHi0Xp6LAt9NvZAZzUoJM3AU,3143
31
30
  skilleter_thingy/gitcmp_helper.py,sha256=rFCZiivZfYmd1_-0_3YwmYSfGDLWu6e6rK0I9hFXCUY,11279
@@ -37,7 +36,7 @@ skilleter_thingy/linecount.py,sha256=lw3vuXUUnMwrUihY6bHfZJsRKe6ZMCRz3952Z9N-ogI
37
36
  skilleter_thingy/logger.py,sha256=xKgPAq8KGXmtaXIFjFs1AmZJXtYrXJn2sqL3oxHZjfQ,3107
38
37
  skilleter_thingy/moviemover.py,sha256=j_Xb9_jFdgpFBAXcF4tEqbnKH_FonlnUU39LiCK980k,4470
39
38
  skilleter_thingy/path.py,sha256=3ba_e-QwYpAs-jFVWoV8sfjVjs_59uc5JZt-87Hqn6g,4737
40
- skilleter_thingy/photodupe.py,sha256=EiWTDLI4tFZp8ruwkbCayMTiwRfMI7O_kmbWgXwZGVQ,3178
39
+ skilleter_thingy/photodupe.py,sha256=FssLgbLnqHPuSvMGtRyOa7bRdowufJOQaJJ56f9ybxk,4195
41
40
  skilleter_thingy/phototidier.py,sha256=1uj1XbUemJOalNC3PwTG2-_yUQp4WMAcmwRr7QXPn1w,7823
42
41
  skilleter_thingy/popup.py,sha256=jW-nbpdeswqEMTli7OmBv1J8XQsvFoMI0J33O6dOeu8,2529
43
42
  skilleter_thingy/process.py,sha256=RmXj2RrzwqP_mugeVejtjgF_T3PD7M10XsWuAnc99t0,3565
@@ -47,7 +46,6 @@ skilleter_thingy/remdir.py,sha256=zp5Nr0IMGXQ-b5iT48O5arqWoSjW65Xnr-SpKuav1Ac,46
47
46
  skilleter_thingy/rmdupe.py,sha256=tcX3w8XvliGwBMdSt9BUu07kuDtQEc0IiU8sCxmgzHA,17117
48
47
  skilleter_thingy/rpylint.py,sha256=na39x0yNXDwDkG9yP48BoM5FeTut-OS4AVsYixE0YZU,2639
49
48
  skilleter_thingy/run.py,sha256=EGYJSuMcOmUca6dpfVUFE41vG9C6ZNK8hzZlJCJE6Rs,12619
50
- skilleter_thingy/s3_sync.py,sha256=TITptjua_B-iwPlgTniuoxPvuEnQjyTKfs6l9CKHbXc,13849
51
49
  skilleter_thingy/splitpics.py,sha256=qRlJrqet7TEI6SodS4bkuKXQUpOdMaqmjE4c1CR7ouo,3266
52
50
  skilleter_thingy/strreplace.py,sha256=xsIWw0hc452rYEBtNEQFKIzmV03xjm_Taz-eDTmFFKI,2539
53
51
  skilleter_thingy/sysmon.py,sha256=XRZG6EVSzoVYan_N16qVB1l1RaU51uvLWlRA0CDjC54,11348
@@ -59,9 +57,9 @@ skilleter_thingy/trimpath.py,sha256=SAfOB75_dTldQHjam4kQy1J42209NYPYi8vVAaNn1e8,
59
57
  skilleter_thingy/window_rename.py,sha256=dCBgZqih_3YKHt35hsOAhARFp3QxOi8w8huC63sqJK8,3128
60
58
  skilleter_thingy/xchmod.py,sha256=F9_lxKuLqVlHHr3oBI3dkMoFOuwRzYDlpQMTmDcjpBI,4590
61
59
  skilleter_thingy/yamlcheck.py,sha256=FXylZ5NtHirDlPVhVEUZUZkTugVR-g51BbjaN06akAc,2868
62
- skilleter_thingy-0.0.24.dist-info/LICENSE,sha256=ljOS4DjXvqEo5VzGfdaRwgRZPbNScGBmfwyC8PChvmQ,32422
63
- skilleter_thingy-0.0.24.dist-info/METADATA,sha256=bdAQ2Q7iW4Y0-Otcl_StalppVtk2pCtP26dtAxzaNWA,5231
64
- skilleter_thingy-0.0.24.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
65
- skilleter_thingy-0.0.24.dist-info/entry_points.txt,sha256=jf7hVfH0dojLxk0D4TjIazKoqdToditCGdCfyhIotME,1971
66
- skilleter_thingy-0.0.24.dist-info/top_level.txt,sha256=8-JhgToBBiWURunmvfpSxEvNkDHQQ7r25-aBXtZv61g,17
67
- skilleter_thingy-0.0.24.dist-info/RECORD,,
60
+ skilleter_thingy-0.0.26.dist-info/LICENSE,sha256=ljOS4DjXvqEo5VzGfdaRwgRZPbNScGBmfwyC8PChvmQ,32422
61
+ skilleter_thingy-0.0.26.dist-info/METADATA,sha256=tn3FjCTFSjHBom9bV7v__U8UFJ9QdVDCwtO9B7vhvy8,5210
62
+ skilleter_thingy-0.0.26.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
63
+ skilleter_thingy-0.0.26.dist-info/entry_points.txt,sha256=jf7hVfH0dojLxk0D4TjIazKoqdToditCGdCfyhIotME,1971
64
+ skilleter_thingy-0.0.26.dist-info/top_level.txt,sha256=8-JhgToBBiWURunmvfpSxEvNkDHQQ7r25-aBXtZv61g,17
65
+ skilleter_thingy-0.0.26.dist-info/RECORD,,
skilleter_thingy/aws.py DELETED
@@ -1,548 +0,0 @@
1
- #! /usr/bin/env python3
2
-
3
- ################################################################################
4
- """ Simple AWS module - provides an abstraction layer on top of the boto3 client
5
- for simple operations such as reading/writing S3 buckets, sending emails
6
- etc.
7
-
8
- Currently this is a random collection of functions extracted from the S3
9
- bucket ACL checking code but it is hoped it will evolve over time into
10
- something more comprehensive.
11
-
12
- Author: John Skilleter
13
- """
14
- ################################################################################
15
-
16
- # System modules
17
-
18
- import sys
19
- import logging
20
- import json
21
-
22
- # AWS modules
23
-
24
- import boto3
25
- import botocore
26
-
27
- ################################################################################
28
-
29
- class GenericAWS:
30
- """ Class providing generic S3 functionality from which the
31
- other S3 classes are derived """
32
-
33
- ################################################################################
34
-
35
- def __init__(self, service, session=None, profile=None, createresource=True, createclient=True, region=None):
36
- """ Initialisation - just create a client handle, optionally using
37
- the specified profile """
38
-
39
- if session:
40
- self.session = session
41
- elif profile:
42
- self.session = boto3.session.Session(profile_name=profile)
43
- else:
44
- self.session = boto3
45
-
46
- args = {}
47
- if region:
48
- args['region_name'] = region
49
-
50
- if createresource:
51
- self.resource = self.session.resource(service, **args)
52
-
53
- if createclient:
54
- self.client = self.session.client(service, **args)
55
-
56
- self.log = logging.getLogger('%s:%s' % (__name__, service))
57
-
58
- ################################################################################
59
-
60
- def set_log_level(self, level):
61
- """ Set logging for the module """
62
-
63
- self.log.setLevel(level)
64
-
65
- ################################################################################
66
-
67
- class SES(GenericAWS):
68
- """ Class for AWS Simple Email Service """
69
-
70
- ################################################################################
71
-
72
- def __init__(self, session=None, profile=None):
73
- """ Initialisation """
74
-
75
- super().__init__('ses', session, profile, createresource=False)
76
-
77
- ################################################################################
78
-
79
- def send(self, sender, recipient, subject, body):
80
- """ Send an email """
81
-
82
- destination = \
83
- {
84
- 'ToAddresses': [recipient]
85
- }
86
-
87
- message = \
88
- {
89
- 'Subject':
90
- {
91
- 'Data': subject
92
- },
93
- 'Body':
94
- {
95
- 'Text':
96
- {
97
- 'Data': body
98
- }
99
- }
100
- }
101
-
102
- # Attempt to send the email - just report the error and quit on failure
103
-
104
- try:
105
- self.log.info('Sending email from %s to %s', sender, recipient)
106
-
107
- self.client.send_email(Source=sender,
108
- Destination=destination,
109
- Message=message)
110
-
111
- except (botocore.exceptions.EndpointConnectionError,
112
- self.client.exceptions.MessageRejected) as err:
113
- print('')
114
- print('Error sending email: %sn' % err)
115
- print(' Sender: %s' % sender)
116
- print(' Recipient: %s' % recipient)
117
- print(' Subject: %s' % subject)
118
- print(' Body:')
119
-
120
- for txt in body.split('\n'):
121
- print(' %s' % txt)
122
-
123
- print('')
124
-
125
- sys.exit(1)
126
-
127
- ################################################################################
128
-
129
- class S3Bucket(GenericAWS):
130
- """ Class providing access to S3 buckets """
131
-
132
- ################################################################################
133
-
134
- def __init__(self, session=None, profile=None):
135
- """ Initialisation - just create a client handle, optionally using
136
- the specified profile """
137
-
138
- super().__init__('s3', session, profile)
139
-
140
- ################################################################################
141
-
142
- def read(self, bucket, key):
143
- """ Read the specified data from the specified bucket.
144
- Returns the data, or raises a boto3 exception on error. """
145
-
146
- self.log.info('Get object from key %s in bucket %s', key, bucket)
147
-
148
- response = self.client.get_object(Bucket=bucket, Key=key)
149
-
150
- self.log.debug('Get object: %s', response)
151
-
152
- return response['Body'].read()
153
-
154
- ################################################################################
155
-
156
- def write(self, bucket, key, data):
157
- """ Write data into the specified key of the specified bucket
158
- Raises a boto3 exception on error. """
159
-
160
- self.log.info('Writing %d bytes of data to key %s in bucket %s', len(data), key, bucket)
161
-
162
- self.client.put_object(Bucket=bucket, Key=key, Body=data)
163
-
164
- ################################################################################
165
-
166
- def get_tags(self, bucket):
167
- """ Read the tags from a bucket. Returns a dictionary of tags, which will be
168
- empty if the bucket has no tags. """
169
-
170
- self.log.info('Reading tags from bucket %s', bucket)
171
-
172
- try:
173
- tags = self.client.get_bucket_tagging(Bucket=bucket)['TagSet']
174
-
175
- except botocore.exceptions.ClientError as err:
176
- # Any exception except NoSuchTagSet gets raised.
177
- # NoSuchTagSet simply means that the bucket has no tags
178
- # and isn't really an error, as such, so we just return
179
- # an empty dictionary.
180
-
181
- if err.response['Error']['Code'] != 'NoSuchTagSet':
182
- self.log.error('Error reading tags for bucket %s: %s', bucket, err.response['Error']['Code'])
183
- raise
184
-
185
- tags = {}
186
-
187
- self.log.info('Tags: %s', tags)
188
-
189
- return tags
190
-
191
- ################################################################################
192
-
193
- def get_location(self, bucket):
194
- """ Return the location of a bucket - note that the Boto3 returns the location
195
- set to none, rather than us-east-1 if you query a bucket located in that
196
- region. """
197
-
198
- try:
199
- location = self.client.get_bucket_location(Bucket=bucket)['LocationConstraint']
200
- except botocore.exceptions.ClientError as err:
201
- self.log.info('Error getting location of bucket %s: %s', bucket, err.response['Error']['Code'])
202
- raise
203
-
204
- if not location:
205
- location = 'us-east-1'
206
-
207
- return location
208
-
209
- ################################################################################
210
-
211
- def get_acl(self, bucket):
212
- """ Return the bucket ACLs """
213
-
214
- try:
215
- return self.client.get_bucket_acl(Bucket=bucket)['Grants']
216
- except botocore.exceptions.ClientError as err:
217
- self.log.error('Error getting ACL for bucket %s: %s', bucket, err.response['Error']['Code'])
218
- raise
219
-
220
- ################################################################################
221
-
222
- def get_policy(self, bucket):
223
- """ Return bucket policy information as a list of policy dictionaries
224
- Returns an empty list if the bucket has no policies (similar to
225
- get_tags() above). """
226
-
227
- try:
228
- policy_data = self.client.get_bucket_policy(Bucket=bucket)
229
-
230
- except botocore.exceptions.ClientError as err:
231
- # Any exception which *isn't* a no-policy exception gets raised
232
- # to the caller, the no-policy one causes the function to return
233
- # an empty policy list.
234
-
235
- if err.response['Error']['Code'] != 'NoSuchBucketPolicy':
236
- self.log.error('Error reading policy for bucket %s: %s', bucket, err.response['Error']['Code'])
237
- raise
238
-
239
- return []
240
-
241
- return json.loads(policy_data['Policy'])
242
-
243
- ################################################################################
244
-
245
- def get_buckets(self):
246
- """ Return a list of all the available buckets """
247
-
248
- return [bucket.name for bucket in self.resource.buckets.all()]
249
-
250
- ################################################################################
251
-
252
- def get_website(self, bucket):
253
- """ Return the web site configuration for the bucket, or None if the bucket
254
- is not configured for hosting """
255
-
256
- try:
257
- web = self.client.get_bucket_website(Bucket=bucket)
258
-
259
- except botocore.exceptions.ClientError as err:
260
- if err.response['Error']['Code'] != 'NoSuchWebsiteConfiguration':
261
- print('>>>%s' % err.response)
262
- raise
263
-
264
- web = None
265
-
266
- return web
267
-
268
- ################################################################################
269
-
270
- def get_objects(self, bucket, max_objects=None):
271
- """ Yield a list of the details of the objects in a bucket, stopping after
272
- returning max_objects (if specified). """
273
-
274
- paginator = self.client.get_paginator('list_objects_v2')
275
-
276
- pagesize = min(25, max_objects) if max_objects else 25
277
-
278
- objects = paginator.paginate(Bucket=bucket, PaginationConfig={'PageSize': pagesize})
279
-
280
- count = 0
281
-
282
- for data in objects:
283
- if 'Contents' in data:
284
- for obj in data['Contents']:
285
-
286
- if max_objects:
287
- count += 1
288
- if count > max_objects:
289
- break
290
-
291
- yield obj
292
-
293
- ################################################################################
294
-
295
- def get_object_acl(self, bucket, obj):
296
- """ Return the ACL data for an object in a bucket """
297
-
298
- try:
299
- return self.client.get_object_acl(Bucket=bucket, Key=obj)['Grants']
300
- except botocore.exceptions.ClientError as err:
301
- self.log.error('Error getting ACL for object %s in bucket %s: %s', obj, bucket, err.response['Error']['Code'])
302
- raise
303
-
304
- ################################################################################
305
-
306
- def get_lifecycle(self, bucket):
307
- """ Return the bucket lifecycle data """
308
-
309
- try:
310
- lifecycle = self.client.get_bucket_lifecycle_configuration(Bucket=bucket)
311
- except botocore.exceptions.ClientError as err:
312
- if err.response['Error']['Code'] == 'NoSuchLifecycleConfiguration':
313
- lifecycle = None
314
- else:
315
- raise
316
-
317
- return lifecycle
318
-
319
- ################################################################################
320
-
321
- class STS(GenericAWS):
322
- """ Class providing access to STS functionality """
323
-
324
- def __init__(self, session=None, profile=None):
325
- """ Initialise the STS client (there is no STS resource in boto3) """
326
-
327
- super().__init__('sts', session, profile, createresource=False)
328
-
329
- ################################################################################
330
-
331
- def account(self):
332
- """ Return the name of the current AWS account """
333
-
334
- return self.client.get_caller_identity()['Account']
335
-
336
- ################################################################################
337
-
338
- class IAM(GenericAWS):
339
- """ Class providing access to IAM """
340
-
341
- def __init__(self, session=None, profile=None):
342
- """ Initialise the IAM client/resource """
343
-
344
- super().__init__('iam', session, profile, createresource=False)
345
-
346
- def role_exists(self, role):
347
- """ Return True if the role exists """
348
-
349
- try:
350
- self.client.get_role(RoleName=role)
351
- except botocore.exceptions.ClientError as err:
352
- if err.response['Error']['Code'] != 'NoSuchEntity':
353
- raise
354
-
355
- return False
356
- else:
357
- return True
358
-
359
- def create_role(self, role, description, policy):
360
- """ Create a new role """
361
-
362
- response = self.client.create_role(RoleName=role, Description=description, AssumeRolePolicyDocument=policy)
363
-
364
- return response
365
-
366
- def put_role_policy(self, role, policy_name, policy):
367
- """ Update the policy in an existing role """
368
-
369
- response = self.client.put_role_policy(RoleName=role, PolicyName=policy_name, PolicyDocument=policy)
370
-
371
- return response
372
-
373
- def get_role_policy(self, role, policy_name):
374
- """ Read the policy in an existing role """
375
-
376
- response = self.client.get_role_policy(RoleName=role, PolicyName=policy_name)
377
-
378
- return response
379
-
380
- ################################################################################
381
-
382
- class Events(GenericAWS):
383
- """ Class providing access to CloudWatch Events """
384
-
385
- def __init__(self, session=None, profile=None):
386
- """ Initialise the CloudWatch Events client/resource """
387
-
388
- super().__init__('events', session, profile, createresource=False)
389
-
390
- def put_rule(self, name, schedule):
391
- """ Create or update the specified rule """
392
-
393
- self.client.put_rule(Name=name, ScheduleExpression=schedule)
394
-
395
- def put_target(self, name, targets):
396
- """ Add the specified target(s) to the specified rule, or update
397
- existing targets """
398
-
399
- self.client.put_targets(Rule=name, Targets=targets)
400
-
401
- ################################################################################
402
-
403
- DEFAULT_LAMBDA_TIMEOUT = 3
404
- DEFAULT_LAMBDA_MEMORY = 128
405
- DEFAULT_LAMBDA_HANDLER = 'main.main'
406
- DEFAULT_LAMBDA_RUNTIME = 'python3.6'
407
-
408
- class Lambda(GenericAWS):
409
- """ Class providing access to Lambda functions """
410
-
411
- def __init__(self, session=None, profile=None, region=None):
412
- """ Initialise the Lambda client/resource """
413
-
414
- super().__init__('lambda', session, profile, createresource=False, region=region)
415
-
416
- def exists(self, name):
417
- """ Return True if the specified Lambda function exists """
418
-
419
- try:
420
- self.client.get_function(FunctionName=name)
421
- except botocore.exceptions.ClientError as err:
422
- if err.response['Error']['Code'] != 'ResourceNotFoundException':
423
- raise
424
- return False
425
- else:
426
- return True
427
-
428
- def update_function(self, name, zipfile):
429
- """ Update the specified Lambda function given a zip file """
430
-
431
- with open(zipfile, 'rb') as zipper:
432
- zipdata = zipper.read()
433
-
434
- response = self.client.update_function_code(FunctionName=name, ZipFile=zipdata)
435
-
436
- return response
437
-
438
- def update_function_configuration(self, name,
439
- handler=None,
440
- environment=None):
441
- """ Update the handler associated with a Lambda fucntion """
442
-
443
- update_args = {}
444
- update_args['FunctionName'] = name
445
-
446
- if handler:
447
- update_args['Handler'] = handler
448
-
449
- if environment:
450
- update_args['Environment'] = {'Variables': environment}
451
-
452
- self.client.update_function_configuration(**update_args)
453
-
454
- def create_function(self, name, role, description, zipfile,
455
- runtime=DEFAULT_LAMBDA_RUNTIME,
456
- handler=DEFAULT_LAMBDA_HANDLER,
457
- timeout=DEFAULT_LAMBDA_TIMEOUT,
458
- memory=DEFAULT_LAMBDA_MEMORY):
459
- """ Create the specified Lambda function given a zip file"""
460
-
461
- with open(zipfile, 'rb') as zipper:
462
- zipdata = zipper.read()
463
-
464
- response = self.client.create_function(FunctionName=name,
465
- Runtime=runtime,
466
- Role=role,
467
- Handler=handler,
468
- Code={'ZipFile': zipdata},
469
- Description=description,
470
- Timeout=timeout,
471
- MemorySize=memory)
472
-
473
- return response
474
-
475
- ################################################################################
476
-
477
- def get_session(profile_name=None):
478
- """ Wrapper for boto3.session.Session """
479
-
480
- return boto3.session.Session(profile_name=profile_name)
481
-
482
- ################################################################################
483
-
484
- def set_stream_logger(name='botocore', level=10, format_string=None):
485
- """ Wrapper for boto3.set_stream_logger """
486
-
487
- return boto3.set_stream_logger(name=name, level=level, format_string=format_string)
488
-
489
- ################################################################################
490
-
491
- def set_default_region(region):
492
- """ Set the default region for the module """
493
-
494
- boto3.setup_default_session(region_name=region)
495
-
496
- ################################################################################
497
-
498
- def get_regions(servicename):
499
- """ Generate a list of regions where a service is supported """
500
-
501
- return boto3.session.Session().get_available_regions(servicename)
502
-
503
- ################################################################################
504
-
505
- def get_resources():
506
- """ Devious method of getting the list of clients supported by boto3
507
- Note that I'm somewhat embarrassed by this, but can't find a better
508
- way of doing it! """
509
-
510
- try:
511
- boto3.resource('XXX-NOT-A-RESOURCE-XXX')
512
- except boto3.exceptions.ResourceNotExistsError as err:
513
- return str(err).replace(' - ', '').split('\n')[2:-1]
514
-
515
- ################################################################################
516
-
517
- def get_clients():
518
- """ Similarly devious way of getting the list of supported clients.
519
- This is equally as yukky as the get_resources() function. """
520
-
521
- try:
522
- boto3.client('XXX-NOT-A-CLIENT-XXX')
523
- except botocore.exceptions.UnknownServiceError as err:
524
- return repr(err)[:-3].split(': ')[2].split(', ')
525
-
526
- ################################################################################
527
-
528
- def test_function():
529
- """ Module test function """
530
-
531
- # Test STS
532
-
533
- sts_client = STS()
534
-
535
- print('Current account: %s' % sts_client.account())
536
-
537
- # TODO: Test SES
538
- # TODO: Test S3
539
- # TODO: Test IAM
540
- # TODO: Test Cloudwatch Events
541
- # TODO: Test Lambda
542
- # TODO: Test set_default_region(), get_session(), get_resources(), get_regions(), get_clients()
543
-
544
- ################################################################################
545
- # Test code
546
-
547
- if __name__ == '__main__':
548
- test_function()
@@ -1,383 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- """Selectively synchronise an S3 bucket to a local destination.
4
- Similar to the aws s3 sync CLI command, but faster, has better
5
- options to filter files, only downloads from S3 to local and
6
- doesn't support the huge range of command line options."""
7
-
8
- import os
9
- import argparse
10
- import sys
11
- import fnmatch
12
- import datetime
13
- import threading
14
- import queue
15
-
16
- from pathlib import Path
17
-
18
- import boto3
19
-
20
- from botocore.exceptions import ClientError
21
-
22
- ################################################################################
23
-
24
- # Number of download threads to run - doing the downloads in threads is about
25
- # six times faster than doing so sequentially.
26
-
27
- NUM_THREADS = 12
28
-
29
- # Translate our environment names to AWS ARNs
30
-
31
- AWS_ACCOUNT_ARNS = {
32
- 'prod': 'arn:aws:iam::459580378985:role/ERSReadOnlyRole',
33
- 'test': 'arn:aws:iam::094438481629:role/ERSReadOnlyRole',
34
- 'dev': 'arn:aws:iam::402653103803:role/ERSReadOnlyRole',
35
- 'mgmt': 'arn:aws:iam::125943076446:role/ERSReadOnlyRole',
36
- 'audit': 'arn:aws:iam::229627323276:role/ERSReadOnlyRole',
37
- }
38
-
39
- ################################################################################
40
-
41
- def error(msg, status=1):
42
- """Report an error message and exit"""
43
-
44
- print(f'ERROR: {msg}')
45
- sys.exit(status)
46
-
47
- ################################################################################
48
-
49
- def verbose(args, msg):
50
- """Report a message in verbose mode"""
51
-
52
- if not args or args.verbose:
53
- print(msg)
54
-
55
- ################################################################################
56
-
57
- def splitlist(lists, deliminator):
58
- """Create a list from a list of deliminated strings"""
59
-
60
- result = []
61
-
62
- for item in lists or []:
63
- result += item.split(deliminator)
64
-
65
- return result
66
-
67
- ################################################################################
68
-
69
- def configure():
70
- """Parse the command line"""
71
-
72
- parser = argparse.ArgumentParser(description='Selectively sync an S3 bucket to a local directory')
73
-
74
- parser.add_argument('--verbose', '-v', action='store_true', help='Report verbose results (includes number of commits between branch and parent)')
75
-
76
- parser.add_argument('--profile', '-p', action='store', help='Specify the AWS profile')
77
-
78
- parser.add_argument('--include', '-i', action='append', help='Comma-separated list of wildcards to sync - if specified, only files matching one or more of these are synced')
79
- parser.add_argument('--exclude', '-x', action='append', help='Comma-separated list of wildcards NOT to sync - if specified, only files NOT matching any of these are synced')
80
-
81
- parser.add_argument('--include-type', '-I', action='append',
82
- help='Comma-separated list of file types to sync - if specified, only files matching one or more of these are synced')
83
- parser.add_argument('--exclude-type', '-X', action='append',
84
- help='Comma-separated list of file types NOT to sync - if specified, only files NOT matching any of these are synced')
85
-
86
- # TODO: parser.add_argument('--delete', '-d', action='store_true', help='Delete local files that don\'t exist in the bucket')
87
- parser.add_argument('--force', '-f', action='store_true', help='Always overwrite locals files (by default files are only overwritten if they are older or a different size)')
88
-
89
- parser.add_argument('--max-objects', '-m', action='store', type=int, help='Limit the number of objects to download')
90
- parser.add_argument('--threads', '-t', action='store', type=int, default=NUM_THREADS, help='Number of parallel threads to run')
91
- parser.add_argument('source', action='store', nargs=1, type=str, help='Name of the S3 bucket, optionally including path within the bucket')
92
- parser.add_argument('destination', action='store', nargs=1, type=str, help='Name of the local directory to sync into')
93
-
94
- args = parser.parse_args()
95
-
96
- # Convert the arguments to single items, but 1-entry lists
97
-
98
- args.source = args.source[0]
99
- args.destination = args.destination[0]
100
-
101
- # Convert the include/exclude parameters to lists
102
-
103
- args.include = splitlist(args.include, ',')
104
- args.exclude = splitlist(args.exclude, ',')
105
-
106
- args.include_type = splitlist(args.include_type, ',')
107
- args.exclude_type = splitlist(args.exclude_type, ',')
108
-
109
- return args
110
-
111
- ################################################################################
112
-
113
- def get_client(args):
114
- """Create an S3 client for the specified profile"""
115
-
116
- if args.profile:
117
- profile = args.profile.split('-')[0]
118
- else:
119
- try:
120
- profile = os.environ['AWS_PROFILE']
121
- except KeyError:
122
- error('The AWS profile must be specified via the AWS_PROFILE environment variable or the --profile command line option')
123
-
124
- try:
125
- arn = AWS_ACCOUNT_ARNS[profile]
126
- except KeyError:
127
- error(f'Invalid AWS profile "{profile}"')
128
-
129
- sts_connection = boto3.client("sts")
130
-
131
- try:
132
- acct_b = sts_connection.assume_role(RoleArn=arn, RoleSessionName='s3-selective-sync')
133
- except ClientError as exc:
134
- error(f'{exc.response["Error"]["Message"]}')
135
-
136
- access_key = acct_b["Credentials"]["AccessKeyId"]
137
- secret_key = acct_b["Credentials"]["SecretAccessKey"]
138
- session_token = acct_b["Credentials"]["SessionToken"]
139
-
140
- session = boto3.Session(
141
- aws_access_key_id=access_key,
142
- aws_secret_access_key=secret_key,
143
- aws_session_token=session_token)
144
-
145
- return session.client('s3')
146
-
147
- ################################################################################
148
-
149
- def download_filter(args, s3_client, s3_bucket, s3_object):
150
- """Decide whether to download an object from S3
151
- Returns True if the object should be downloaded, or False if it should be skipped."""
152
-
153
- # Ignore directories
154
-
155
- if s3_object['Key'][-1] == '/':
156
- verbose(args, f'{s3_object["Key"]} is a prefix, so will be skipped')
157
- return False
158
-
159
- # Handle the object as a Path for simpicity
160
-
161
- object_path = Path(s3_object['Key'])
162
-
163
- # Filter according to wildcard
164
-
165
- if args.include:
166
- for wildcard in args.include:
167
- if '/' in wildcard:
168
- if fnmatch.fnmatch(s3_object['Key'], wildcard):
169
- break
170
- elif fnmatch.fnmatch(object_path.name, wildcard):
171
- break
172
- else:
173
- verbose(args, f'"{s3_object["Key"]}" does not match any include wildcards, so will be skipped')
174
- return False
175
-
176
- if args.exclude:
177
- for wildcard in args.exclude:
178
- if '/' in wildcard:
179
- if fnmatch.fnmatch(s3_object['Key'], wildcard):
180
- verbose(args, f'"{s3_object["Key"]}" matches one or more exclude wildcards, so will be skipped')
181
- elif fnmatch.fnmatch(object_path.name, wildcard):
182
- verbose(args, f'"{s3_object["Key"]}" matches one or more exclude wildcards, so will be skipped')
183
- return False
184
-
185
- # Filter according to content type
186
-
187
- if args.include_type or args.exclude_type:
188
- object_type = s3_client.head_object(Bucket=s3_bucket, Key=s3_object["Key"])['ContentType']
189
-
190
- if args.include_type:
191
- for include_type in args.include_type:
192
- if object_type == include_type:
193
- break
194
- else:
195
- verbose(args, f'"{s3_object["Key"]}" is of type "{object_type}" which does not match any entries in the the type include list, so will be skipped')
196
- return False
197
-
198
- if args.exclude_type:
199
- for exclude_type in args.exclude_type:
200
- if object_type == exclude_type:
201
- verbose(args, f'"{s3_object["Key"]}" is of type "{object_type}" which matches one of the entries in the type exclude list, so will be skipped')
202
- return False
203
-
204
- # Unless we are in force-download mode, check if the destination file already exists and see if it needs to be overwritten
205
-
206
- if not args.force:
207
- dest_file = args.destination / object_path
208
-
209
- if dest_file.exists():
210
- # Overwrite if destination is older or a different size
211
-
212
- dest_stat = dest_file.stat()
213
- dest_timestamp = datetime.datetime.fromtimestamp(dest_stat.st_mtime, tz=datetime.timezone.utc)
214
-
215
- if dest_timestamp >= s3_object['LastModified']:
216
- verbose(args, f'Destination file already exists and is same age or newer, so "{s3_object["Key"]}" will be skipped')
217
- return False
218
-
219
- return True
220
-
221
- ################################################################################
222
-
223
- def download(args, s3_client, mkdir_lock, bucket, s3_object):
224
- """Attempt to download an object from S3 to an equivalent local location"""
225
-
226
- local_path = Path(args.destination) / s3_object['Key']
227
-
228
- with mkdir_lock:
229
- if local_path.parent.exists():
230
- if not local_path.parent.is_dir():
231
- error(f'Unable to download "{s3_object["Key"]}" as the destination path is not a directory')
232
- else:
233
- local_path.parent.mkdir(parents=True)
234
-
235
- # Download the object and the set the file timestamp to the same as the object
236
-
237
- object_timestamp = s3_object['LastModified'].timestamp()
238
- s3_client.download_file(bucket, s3_object['Key'], local_path)
239
- os.utime(local_path, (object_timestamp, object_timestamp))
240
-
241
- ################################################################################
242
-
243
- def downloader(args, s3_client, mkdir_lock, bucket, object_queue, error_queue, sem_counter, real_thread=True):
244
- """Download thread"""
245
-
246
- finished = False
247
- while not finished:
248
- # Get the next object to download (waiting for one to be added to the queue)
249
-
250
- s3_object = object_queue.get()
251
-
252
- # If it is a candidate for downloading (meetings the criteria specified on the command
253
- # line and, unless force-downloading, hasn't already been downloaded) then attempt to
254
- # download it.
255
-
256
- # If the semaphore is being used to limit the number of downloads, attempt to acquire it
257
- # If we couldn't, then we've reached the download limit so we'll finish.
258
-
259
- if download_filter(args, s3_client, bucket, s3_object):
260
-
261
- if not sem_counter or sem_counter.acquire(blocking=False):
262
- print(f'Downloading "{s3_object["Key"]}"')
263
- try:
264
- download(args, s3_client, mkdir_lock, bucket, s3_object)
265
- except ClientError as exc:
266
- error_queue.put(f'Failed to download "{s3_object["Key"]}" - {exc.response["Error"]["Message"]}')
267
-
268
- if sem_counter:
269
- sem_counter.release()
270
- else:
271
- print(f' Done "{s3_object["Key"]}"')
272
-
273
- else:
274
- finished = True
275
-
276
- # Indicate the queued item has been consumed
277
-
278
- object_queue.task_done()
279
-
280
- # If we were using a download semaphore then drain the queue (this will happen in all
281
- # threads and will never terminate, but we're running as a daemon so it doesn't matter too much).
282
-
283
- if sem_counter and real_thread:
284
- while True:
285
- object_queue.get()
286
- object_queue.task_done()
287
-
288
- ################################################################################
289
-
290
- def thread_exception_handler(args):
291
- """Brute-force thread exception handler"""
292
-
293
- _ = args
294
- sys.exit(1)
295
-
296
- ################################################################################
297
-
298
- def main():
299
- """Entry point"""
300
-
301
- args = configure()
302
-
303
- s3_client = get_client(args)
304
-
305
- bucket = args.source
306
-
307
- # Remove the 's3://' prefix, if present so that we can split bucket and folder
308
- # if specified
309
-
310
- if bucket.startswith('s3://'):
311
- bucket = bucket[5:]
312
-
313
- if '/' in bucket:
314
- bucket, prefix = bucket.split('/', 1)
315
- else:
316
- prefix = ''
317
-
318
- # Semaphore to protect download counter
319
-
320
- sem_counter = threading.Semaphore(value=args.max_objects) if args.max_objects else None
321
-
322
- # Create the download queue and the worker threads
323
-
324
- object_queue = queue.Queue()
325
-
326
- # Create the queue for reporting errors back from the threads
327
-
328
- error_queue = queue.Queue()
329
-
330
- # Lock to prevent race conditions around directory creation
331
-
332
- mkdir_lock = threading.Lock()
333
-
334
- if args.threads > 1:
335
- # Create threads
336
-
337
- threading.excepthook = thread_exception_handler
338
-
339
- for _ in range(NUM_THREADS):
340
- thread = threading.Thread(target=downloader, daemon=True, args=(args, s3_client, mkdir_lock, bucket, object_queue, error_queue, sem_counter))
341
- thread.start()
342
-
343
- # Read all the objects in the bucket and queue them for consideration by the download workers
344
-
345
- for page in s3_client.get_paginator('list_objects_v2').paginate(Bucket=bucket, Prefix=prefix):
346
- for s3_object in page['Contents']:
347
- object_queue.put(s3_object)
348
-
349
- print('Finished queuing objects')
350
-
351
- if args.threads > 1:
352
- # Wait for the queues to drain
353
-
354
- object_queue.join()
355
- else:
356
- downloader(args, s3_client, mkdir_lock, bucket, object_queue, error_queue, sem_counter, real_thread=False)
357
-
358
- # Report any errors:
359
-
360
- if not error_queue.empty():
361
- sys.stderr.write('\nErrors were encountered downloading some of the objects:\n\n\n')
362
-
363
- while not error_queue.empty():
364
- error_msg = error_queue.get()
365
- sys.stderr.write(f'{error_msg}\n')
366
- error_queue.task_done()
367
-
368
- ################################################################################
369
-
370
- def s3_sync():
371
- """Entry point"""
372
-
373
- try:
374
- main()
375
- except KeyboardInterrupt:
376
- sys.exit(1)
377
- except BrokenPipeError:
378
- sys.exit(2)
379
-
380
- ################################################################################
381
-
382
- if __name__ == '__main__':
383
- s3_sync()