skilleter-thingy 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skilleter-thingy might be problematic. Click here for more details.

@@ -0,0 +1,471 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Sync a directory tree full of photos into a tree organised by year, month and date
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import datetime
10
+ import logging
11
+ import argparse
12
+ import glob
13
+ import re
14
+ import shutil
15
+ import PIL
16
+ import imagehash
17
+
18
+ from collections import defaultdict
19
+ from enum import Enum
20
+
21
+ from PIL import Image, ExifTags
22
+
23
+ import thingy.colour as colour
24
+
25
+ ################################################################################
26
+
27
+ # Default locations for local storage of photos and videos
28
+
29
+ DEFAULT_PHOTO_DIR = os.path.expanduser('~/Pictures')
30
+ DEFAULT_VIDEO_DIR = os.path.expanduser('~/Videos')
31
+
32
+ # File extensions (case-insensitive)
33
+
34
+ IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', )
35
+ VIDEO_EXTENSIONS = ('.mp4', '.mov', )
36
+ IGNORE_EXTENSIONS = ('.ini', )
37
+
38
+ # Enum of filetypes
39
+
40
+ class FileType(Enum):
41
+ IMAGE = 0
42
+ VIDEO = 1
43
+ UNKNOWN = 2
44
+ IGNORE = 3
45
+
46
+ # Regexes for matching date strings
47
+
48
+ YYYY_MM_DD_re = re.compile(r'^(\d{4}):(\d{2}):(\d{2})')
49
+ IMG_DATE_re = re.compile(r'(?:IMG|VID)[-_](\d{4})(\d{2})(\d{2})[-_.].*')
50
+
51
+ GENERAL_DATE_re = re.compile(r'(\d{4})[-_ ](\d{2})[-_ ](\d{2})')
52
+
53
+ YEAR_MONTH_PATH_re = re.compile(r'/(\d{4})/(\d{2})/')
54
+
55
+ YYYY_MM_re = re.compile(r'(\d{4})-(\d{2})')
56
+
57
+ DUP_RE = re.compile(r'(.*) \{aalq_f.*\}(.*)')
58
+
59
+ # Date format for YYYY-MM
60
+
61
+ DATE_FORMAT = '%Y-%m'
62
+
63
+ # If two pictures with the same name prefix have a hash differing by less than
64
+ # this then we don't hash the duplicates
65
+
66
+ MIN_HASH_DIFF = 15
67
+
68
+ ################################################################################
69
+
70
+ def parse_yyyymm(datestr):
71
+ """Convert a date string in the form YYYY-MM to a datetime.date"""
72
+
73
+ date_match = YYYY_MM_re.fullmatch(datestr)
74
+
75
+ if not date_match:
76
+ colour.error(f'ERROR: Invalid date: {datestr}')
77
+
78
+ return datetime.date(int(date_match.group(1)), int(date_match.group(2)), day=1)
79
+
80
+ ################################################################################
81
+
82
+ def parse_command_line():
83
+ """Parse and validate the command line options"""
84
+
85
+ parser = argparse.ArgumentParser(description='Sync photos from Google Photos')
86
+
87
+ today = datetime.date.today()
88
+
89
+ parser.add_argument('--verbose', '-v', action='store_true', help='Output verbose status information')
90
+ parser.add_argument('--dryrun', '--dry-run', '-D', action='store_true', help='Just list files to be copied, without actually copying them')
91
+ parser.add_argument('--picturedir', '-P', action='store', default=DEFAULT_PHOTO_DIR, help=f'Location of local picture storage directory (defaults to {DEFAULT_PHOTO_DIR})')
92
+ parser.add_argument('--videodir', '-V', action='store', default=DEFAULT_VIDEO_DIR, help=f'Location of local video storage directory (defaults to {DEFAULT_VIDEO_DIR})')
93
+ parser.add_argument('--skip-no-day', '-z', action='store_true', help='Don\'t sync files where the day of the month could not be determined')
94
+ parser.add_argument('--path', '-p', action='store', default=None, help='Path to sync from')
95
+ parser.add_argument('action', nargs='*', help='Actions to perform (report or sync)')
96
+
97
+ args = parser.parse_args()
98
+
99
+ if not args.path:
100
+ colour.error('You must specify a source directory')
101
+
102
+ # Configure debugging
103
+
104
+ logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
105
+
106
+ # Report parameters if verbose
107
+
108
+ logging.debug('Source: %s', args.path)
109
+ logging.debug('Pictures: %s', args.picturedir)
110
+ logging.debug('Videos: %s', args.videodir)
111
+ logging.debug('Dry run: %d', args.dryrun)
112
+
113
+ args.local_dir = {'photo': args.picturedir, 'video': args.videodir}
114
+
115
+ return args
116
+
117
+ ################################################################################
118
+
119
+ def get_exif_data(image):
120
+ """Return EXIF data for the image as a dictionary"""
121
+
122
+ try:
123
+ img = Image.open(image)
124
+
125
+ img_exif = img.getexif()
126
+ except OSError as exc:
127
+ logging.info('Error reading EXIF data for %s - %s', image, exc)
128
+ img_exif = None
129
+
130
+ result = {}
131
+
132
+ if img_exif is None:
133
+ return result
134
+
135
+ for key, val in img_exif.items():
136
+ if key in ExifTags.TAGS:
137
+ result[ExifTags.TAGS[key]] = val
138
+ else:
139
+ result[key] = val
140
+
141
+ return result
142
+
143
+ ################################################################################
144
+
145
+ def get_filetype(filename):
146
+ """Return the type of a file"""
147
+
148
+ _, ext = os.path.splitext(filename)
149
+
150
+ ext = ext.lower()
151
+
152
+ if ext in IMAGE_EXTENSIONS:
153
+ return FileType.IMAGE
154
+
155
+ if ext in VIDEO_EXTENSIONS:
156
+ return FileType.VIDEO
157
+
158
+ if ext in IGNORE_EXTENSIONS:
159
+ return FileType.IGNORE
160
+
161
+ return FileType.UNKNOWN
162
+
163
+ ################################################################################
164
+
165
+ def find_files(directory_wildcards):
166
+ """Return a list of all the files in the specified directory tree, which can contain wildcards,
167
+ as 3 lists; pictures, videos and unknown."""
168
+
169
+ image_list = {}
170
+ video_list = {}
171
+ unknown_list = []
172
+
173
+ logging.info('Reading files in the directory tree(s) at %s', ', '.join(directory_wildcards))
174
+
175
+ for directory_wildcard in directory_wildcards:
176
+ directories = glob.glob(directory_wildcard)
177
+
178
+ for directory in directories:
179
+ for root, _, files in os.walk(directory):
180
+ logging.debug('Reading %s', root)
181
+
182
+ for file in files:
183
+ filepath = os.path.join(root, file)
184
+
185
+ file_type = get_filetype(filepath)
186
+
187
+ if file_type == FileType.IMAGE:
188
+ try:
189
+ exif = get_exif_data(filepath)
190
+
191
+ image_list[filepath] = exif
192
+ except PIL.UnidentifiedImageError:
193
+ colour.write(f'[BOLD:WARNING:] Unable to get EXIF data from [BLUE:{filepath}]')
194
+ image_list[filepath] = {}
195
+
196
+ elif file_type == FileType.VIDEO:
197
+ # TODO: Is there a way of getting EXIF-type data from video files? (https://thepythoncode.com/article/extract-media-metadata-in-python but does it include date info?)
198
+ video_list[filepath] = {}
199
+
200
+ elif file_type == FileType.UNKNOWN:
201
+ unknown_list.append(filepath)
202
+
203
+ logging.info('Read %s image files', len(image_list))
204
+ logging.info('Read %s video files', len(video_list))
205
+ logging.info('Read %s unknown files', len(unknown_list))
206
+
207
+ return image_list, video_list, unknown_list
208
+
209
+ ################################################################################
210
+
211
+ def get_media_date(name, info):
212
+ """Try and determine the date for a given picture. Returns y, m, d or
213
+ None, None, None"""
214
+
215
+ # If the EXIF data has the date & time, just return that
216
+
217
+ if 'DateTimeOriginal' in info:
218
+ original_date_time = info['DateTimeOriginal']
219
+
220
+ date_match = YYYY_MM_DD_re.match(original_date_time)
221
+ if date_match:
222
+ year = date_match.group(1)
223
+ month = date_match.group(2)
224
+ day = date_match.group(3)
225
+
226
+ return year, month, day
227
+
228
+ # No EXIF date and time, try and parse it out of the filename
229
+
230
+ picture_name = os.path.basename(name)
231
+
232
+ date_match = IMG_DATE_re.match(picture_name) or GENERAL_DATE_re.search(picture_name)
233
+
234
+ if date_match:
235
+ year = date_match.group(1)
236
+ month = date_match.group(2)
237
+ day = date_match.group(3)
238
+
239
+ return year, month, day
240
+
241
+ date_match = YEAR_MONTH_PATH_re.search(name)
242
+ if date_match:
243
+ year = date_match.group(1)
244
+ month = date_match.group(2)
245
+ day = '00'
246
+
247
+ return year, month, day
248
+
249
+ # A miserable failure
250
+
251
+ return None, None, None
252
+
253
+ ################################################################################
254
+
255
+ def sync_media_local(dryrun, skip_no_day, media_files, destination_dir):
256
+ """Sync files from the cache to local storage"""
257
+
258
+ # Iterate through the list of remote media_files to try work out the date and
259
+ # time so that we can copy it the correct local location
260
+
261
+ for media_file in media_files:
262
+ year, month, day = get_media_date(media_file, media_files[media_file])
263
+
264
+ # If specified, skip files where the day of the month could not be determined
265
+
266
+ if skip_no_day and day == '00':
267
+ day = None
268
+
269
+ if year and month and day:
270
+ destination_media_file_path = os.path.join(destination_dir, year, f'{year}-{month}-{day}', os.path.basename(media_file))
271
+
272
+ if os.path.exists(destination_media_file_path):
273
+ colour.write(f'[RED:WARNING]: Destination [BLUE:{destination_media_file_path}] already exists - file will not be overwritten!')
274
+ else:
275
+ destination_dir_name = os.path.dirname(destination_media_file_path)
276
+
277
+ colour.write(f'Copying [BLUE:{media_file}] to [BLUE:{destination_dir_name}]')
278
+
279
+ if not dryrun:
280
+ os.makedirs(destination_dir_name, exist_ok=True)
281
+
282
+ shutil.copyfile(media_file, destination_media_file_path)
283
+ else:
284
+ colour.write(f'[RED:ERROR]: Unable to determine where to copy [BLUE:{media_file}]')
285
+
286
+ ################################################################################
287
+
288
+ def local_directory(args, mediatype, year, month):
289
+ """Return the location of the local picture directory for the specified year/month"""
290
+
291
+ return os.path.join(args.local_dir[mediatype], str(year), f'{year}-{month:02}')
292
+
293
+ ################################################################################
294
+
295
+ def media_sync(dryrun, skip_no_day, media, media_files, local_dir):
296
+ """Given a media type and list of local and remote files of the type, check
297
+ for out-of-sync files and sync any missing remote files to local storage"""
298
+
299
+ # Get the list of local and remote names of the specified media type
300
+ # TODO: Could be a problem if we have multiple files with the same name (e.g. in different months)
301
+
302
+ names = {'local': {}, 'remote': {}}
303
+
304
+ for name in media_files['local']:
305
+ names['local'][os.path.basename(name)] = name
306
+
307
+ for name in media_files['remote']:
308
+ names['remote'][os.path.basename(name)] = name
309
+
310
+ # Find matches and remove them
311
+
312
+ matching = 0
313
+ for name in names['local']:
314
+ if name in names['remote']:
315
+ matching += 1
316
+
317
+ del media_files['remote'][names['remote'][name]]
318
+ del media_files['local'][names['local'][name]]
319
+
320
+ if matching:
321
+ colour.write(f' [BOLD:{matching} {media} files are in sync]')
322
+ else:
323
+ colour.write(f' [BOLD:No {media} files are in sync]')
324
+
325
+ if media_files['local']:
326
+ colour.write(f' [BOLD:{len(media_files["local"])} local {media} files are out of sync]')
327
+ else:
328
+ colour.write(f' [BOLD:No local {media} files are out of sync]')
329
+
330
+ if media_files['remote']:
331
+ colour.write(f' [BOLD:{len(media_files["remote"])} remote {media} files are out of sync]')
332
+ sync_media_local(dryrun, skip_no_day, media_files['remote'], local_dir)
333
+ else:
334
+ colour.write(f' [BOLD:No remote {media} files are out of sync]')
335
+
336
+ colour.write('')
337
+
338
+ ################################################################################
339
+
340
+ # TODO: Tidy this up!
341
+ def remove_duplicates(media_files):
342
+ """Look for remote files which have an original and multiple
343
+ copies and remove the copies from the list of files to consider using the
344
+ imagehash library to detect duplicate or near-duplicate files.
345
+ """
346
+
347
+ print('Checking for duplicate files')
348
+
349
+ # Originals can have upper or lower case extensions, copies only tend to have lower
350
+ # case, so build a lower case to original lookup table
351
+
352
+ names = {name.lower():name for name in media_files}
353
+
354
+ duplicates = defaultdict(list)
355
+
356
+ # Build a list of duplicates for each filename in the list - i.e. files with the same
357
+ # prefix and a suffix matching DUP_RE, indexed by the base filename (without the suffix)
358
+
359
+ for entry in names:
360
+ orig_match = DUP_RE.fullmatch(entry)
361
+ if orig_match:
362
+ original = orig_match.group(1) + orig_match.group(2)
363
+
364
+ duplicates[original].append(entry)
365
+
366
+ # Now use the imagehash library to check each list of maybe-duplicate files
367
+ # to build a list of actual duplicates (or at least nearly-indistinguishable images)
368
+ # TODO: Better to build list of all hashes, then find near-duplicates
369
+
370
+ actual_duplicates = set()
371
+ for entry, dupes in duplicates.items():
372
+ # If the base file (no suffix) exists use that as the base, otherwise
373
+ # use the first duplicate (we can have a situation where we have duplicates
374
+ # and no original).
375
+
376
+ hash_list = defaultdict(list)
377
+
378
+ # Start with the base file, it it exists
379
+
380
+ if entry in names:
381
+ try:
382
+ base_hash = str(imagehash.average_hash(Image.open(names[entry])))
383
+
384
+ hash_list[base_hash].append(names[entry])
385
+ except OSError:
386
+ pass
387
+
388
+ # Calculate the hash of each of the potential duplicates and if they
389
+ # are close enough to the base hash, then add them to the real duplicate list
390
+
391
+ for entry in dupes:
392
+ filename = names[entry]
393
+ try:
394
+ dupe_hash = str(imagehash.average_hash(Image.open(filename)))
395
+
396
+ hash_list[dupe_hash].append(filename)
397
+ except OSError:
398
+ colour.write(f'[BOLD:WARNING]: Unable to read {filename}')
399
+
400
+ # Remove entries with identical hash values
401
+
402
+ for dupes in hash_list:
403
+ for dupe in hash_list[dupes][1:]:
404
+ actual_duplicates.add(dupe)
405
+ hash_list[dupes] = hash_list[dupes][0]
406
+
407
+ # Look for adjaced entries in the sorted list of hash values that differ by less then the minimum
408
+ # and remove the duplicates
409
+
410
+ hash_values = sorted(hash_list.keys())
411
+ logging.debug('Hash values for duplicates: %s', hash_values)
412
+
413
+ for i in range(len(hash_values)-1):
414
+ if int(hash_values[i+1], 16) - int(hash_values[i], 16) < MIN_HASH_DIFF:
415
+ actual_duplicates.add(hash_list[hash_values[i+1]])
416
+
417
+ # Remove all the entries in the real duplicates list
418
+
419
+ for entry in actual_duplicates:
420
+ logging.info('Removing %s as a (near-)duplicate', os.path.basename(entry))
421
+ del media_files[entry]
422
+
423
+ ################################################################################
424
+
425
+ def photo_sync(args):
426
+ """Synchronise the photos"""
427
+
428
+ colour.write('[GREEN:%s]' % '-'*80)
429
+
430
+ # Read the pictures and their EXIF data to get the dates
431
+
432
+ media_files = {'photo': {}, 'video': {}}
433
+ unknown_files = {}
434
+
435
+ media_files['photo']['remote'], media_files['video']['remote'], unknown_files['remote'] = find_files([args.path])
436
+ media_files['photo']['local'], media_files['video']['local'], unknown_files['local'] = find_files([args.picturedir, args.videodir])
437
+
438
+ for media in ('photo', 'video'):
439
+ remove_duplicates(media_files[media]['remote'])
440
+
441
+ colour.write('[GREEN:%s]' % '-'*80)
442
+
443
+ media_sync(args.dryrun, args.skip_no_day, media, media_files['photo'], args.picturedir)
444
+ media_sync(args.dryrun, args.skip_no_day, media, media_files['video'], args.videodir)
445
+
446
+ ################################################################################
447
+
448
+ def main():
449
+ """Entry point"""
450
+
451
+ # Handle the command line
452
+
453
+ args = parse_command_line()
454
+
455
+ photo_sync(args)
456
+
457
+ ################################################################################
458
+
459
+ def photosync():
460
+ """Entry point"""
461
+ try:
462
+ main()
463
+ except KeyboardInterrupt:
464
+ sys.exit(1)
465
+ except BrokenPipeError:
466
+ sys.exit(2)
467
+
468
+ ################################################################################
469
+
470
+ if __name__ == '__main__':
471
+ photosync()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skilleter_thingy
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: A collection of useful utilities, mainly aimed at making Git more friendly
5
5
  Author-email: John Skilleter <john@skilleter.org.uk>
6
6
  Project-URL: Home, https://skilleter.org.uk
@@ -602,9 +602,9 @@ Check how much free space is available on all filesystems, ignoring read-only fi
602
602
 
603
603
  Issue a warning if any are above 90% used.
604
604
 
605
- ## gphotosync
605
+ ## gphotosync & localphotosync
606
606
 
607
- Utility for syncing photos from Google Photos to local storage
607
+ Utilities for syncing photos from Google Photos or a local directory to local storage
608
608
 
609
609
  ## moviemover
610
610
 
@@ -23,6 +23,7 @@ skilleter_thingy/gitprompt.py,sha256=SzSMd0EGI7ftPko80Q2PipwbVA-qjU1jsmdpmTCM5GI
23
23
  skilleter_thingy/gl.py,sha256=9zbGpKxw6lX9RghLkdy-Q5sZlqtbB3uGFO04qTu1dH8,5954
24
24
  skilleter_thingy/gphotosync.py,sha256=M0yzt5IxCHezkCOQwz3Chn8oudY_M7PRffboIUFLedk,22477
25
25
  skilleter_thingy/linecount.py,sha256=ehTN6VD76i4U5k6dXuYoiqSRHI67_BP-bziklNAJSKY,4309
26
+ skilleter_thingy/localphotosync.py,sha256=EtcEMx71fEpMqGYvNdCEsp_9bOZ950HebiT_tCj5hV4,15912
26
27
  skilleter_thingy/moviemover.py,sha256=QzUAWQzQ1AWWREIhl-VMaLo2h8MMhOekBnao5jGWV1s,4470
27
28
  skilleter_thingy/multigit.py,sha256=TMfu6PCx0alxmv-qNwNnbG4rl4_eW3OrBZtvtGnuCWE,28708
28
29
  skilleter_thingy/photodupe.py,sha256=l0hbzSLb2Vk2ceteg-x9fHXCEE1uUuFo84hz5rsZUPA,4184
@@ -60,9 +61,9 @@ skilleter_thingy/thingy/run.py,sha256=6SNKWF01fSxzB10GMU9ajraXYZqAL1w0PXkqjJdr1U
60
61
  skilleter_thingy/thingy/tfm_pane.py,sha256=oqy5zBzKwfbjbGqetbbhpKi4x5He7sl4qkmhUeqtdZc,19789
61
62
  skilleter_thingy/thingy/tidy.py,sha256=UWpBWuIMCE1UonLJErb41yW3RtpXrK_bt4Z4cZR-eDU,5910
62
63
  skilleter_thingy/thingy/venv_template.py,sha256=SsVNvSwojd8NnFeQaZPCRQYTNdwJRplpZpygbUEXRnY,1015
63
- skilleter_thingy-0.1.13.dist-info/licenses/LICENSE,sha256=ljOS4DjXvqEo5VzGfdaRwgRZPbNScGBmfwyC8PChvmQ,32422
64
- skilleter_thingy-0.1.13.dist-info/METADATA,sha256=9dEfBsv7z_CRo7g_t0A8V5MmFNWWn3NXbz8TpiF8Ixc,29874
65
- skilleter_thingy-0.1.13.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
66
- skilleter_thingy-0.1.13.dist-info/entry_points.txt,sha256=u5ymS-KPljIGTnprV5yJsAjz7qgeT2BZ-Qo_Con_PFM,2145
67
- skilleter_thingy-0.1.13.dist-info/top_level.txt,sha256=8-JhgToBBiWURunmvfpSxEvNkDHQQ7r25-aBXtZv61g,17
68
- skilleter_thingy-0.1.13.dist-info/RECORD,,
64
+ skilleter_thingy-0.1.14.dist-info/licenses/LICENSE,sha256=ljOS4DjXvqEo5VzGfdaRwgRZPbNScGBmfwyC8PChvmQ,32422
65
+ skilleter_thingy-0.1.14.dist-info/METADATA,sha256=jcn6GLmUqMJgE3jETi37VlQv4yNw0_JmOuoE2Vid6hA,29914
66
+ skilleter_thingy-0.1.14.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
67
+ skilleter_thingy-0.1.14.dist-info/entry_points.txt,sha256=eGQNF-s-RGnOAyJqz_f9EPS0ExGsBmAmzwb3FlSmZzM,2209
68
+ skilleter_thingy-0.1.14.dist-info/top_level.txt,sha256=8-JhgToBBiWURunmvfpSxEvNkDHQQ7r25-aBXtZv61g,17
69
+ skilleter_thingy-0.1.14.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -23,6 +23,7 @@ gitprompt = skilleter_thingy:gitprompt.gitprompt
23
23
  gl = skilleter_thingy:gl.gl
24
24
  gphotosync = skilleter_thingy:gphotosync.gphotosync
25
25
  linecount = skilleter_thingy:linecount.linecount
26
+ localphotosync = skilleter_thingy:localphotosync.localphotosync
26
27
  mg = skilleter_thingy:mg.mg
27
28
  moviemover = skilleter_thingy:moviemover.moviemover
28
29
  multigit = skilleter_thingy:multigit.multigit