skilleter-thingy 0.2.15__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skilleter-thingy might be problematic. Click here for more details.

@@ -1,553 +0,0 @@
1
- #! /usr/bin/env python3
2
-
3
- ################################################################################
4
- """ Find duplicate files and do things with them.
5
-
6
- Uses the 'jdupes' utility
7
-
8
- TODO: Option to ignore by filetype
9
- TODO: Ignore folder.jpg files
10
-
11
- NOTE: The option to ignore directories in jdupes doesn't work (at least in the version in Ubuntu 18.04) so we do this after searching for duplicates
12
- """
13
- ################################################################################
14
-
15
- import os
16
- import argparse
17
- import logging
18
- import subprocess
19
- import sys
20
- import re
21
- import pickle
22
- import copy
23
- import fnmatch
24
-
25
- ################################################################################
26
-
27
- ALWAYS_IGNORE_DIRS = ['.git']
28
-
29
- ################################################################################
30
-
31
- def error(msg):
32
- """ Report an error and exit """
33
-
34
- sys.stderr.write('%s\n' % msg)
35
- sys.exit(1)
36
-
37
- ################################################################################
38
-
39
- def parse_command_line():
40
- """ Parse the command line """
41
-
42
- parser = argparse.ArgumentParser(description='Find duplicate files created by SyncThing or in temporary directories in a given path')
43
- parser.add_argument('--debug', action='store_true', help='Debug output')
44
- parser.add_argument('--save', action='store', help='Save duplicate file list')
45
- parser.add_argument('--load', action='store', help='Load duplicate file list')
46
- parser.add_argument('--script', action='store', help='Generate a shell script to delete the duplicates')
47
- parser.add_argument('--exclude', action='append', help='Directories to skip when looking for duplicates')
48
- parser.add_argument('--ignore', action='append', help='Wildcards to ignore when looking for duplicates')
49
- parser.add_argument('path', nargs='?', default='.', help='Path(s) to search for duplicates')
50
-
51
- args = parser.parse_args()
52
-
53
- logging.basicConfig(level=logging.DEBUG if args.debug else logging.ERROR)
54
-
55
- if args.save and args.load:
56
- error('The save and load options are mutually exclusive')
57
-
58
- return args
59
-
60
- ################################################################################
61
-
62
- def jdupes(path,
63
- one_file_system=False,
64
- no_hidden=False,
65
- check_permissions=False,
66
- quick=False,
67
- recurse=True,
68
- follow_symlinks=False,
69
- exclude=None,
70
- zero_match=False):
71
- """ Run jdupes with the specified options """
72
-
73
- cmd = ['jdupes', '--quiet']
74
-
75
- if one_file_system:
76
- cmd.append('--one-file-system')
77
-
78
- if no_hidden:
79
- cmd.append('--nohidden')
80
-
81
- if check_permissions:
82
- cmd.append('--permissions')
83
-
84
- if quick:
85
- cmd.append('--quick')
86
-
87
- if recurse:
88
- cmd += ['--recurse', path]
89
- else:
90
- cmd.append(path)
91
-
92
- if follow_symlinks:
93
- cmd.append('--symlinks')
94
-
95
- if exclude:
96
- cmd += ['--exclude', exclude]
97
-
98
- if zero_match:
99
- cmd.append('--zeromatch')
100
-
101
- logging.debug('Running %s', ' '.join(cmd))
102
-
103
- try:
104
- result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
105
- except FileNotFoundError:
106
- error('The jdupes utility is not installed')
107
-
108
- results = [[]]
109
- for output in result.stdout.split('\n'):
110
- output = output.strip()
111
-
112
- logging.debug(output)
113
-
114
- if output:
115
- results[-1].append(output)
116
- else:
117
- results.append([])
118
-
119
- while results and results[-1] == []:
120
- results = results[:-1]
121
-
122
- logging.debug('Found %d duplicated files', len(results))
123
- for entry in results:
124
- logging.debug(' %s', ', '.join(entry))
125
-
126
- return results
127
-
128
- ################################################################################
129
-
130
- def remove_excluded_entries(args, duplicates):
131
- """ Now filter out enties in the duplicates lists that are in the
132
- directories that we are supposed to be ignoring """
133
-
134
- # Build the list of directories to ignore - add the default list
135
-
136
- ignore_dirs = ALWAYS_IGNORE_DIRS
137
- if args.exclude:
138
- ignore_dirs += args.exclude
139
-
140
- # Build the list of absolute and relative paths to ignore
141
- # both are in the form '/path/'
142
-
143
- ignore_prefixes = []
144
- ignore_subdirs = []
145
-
146
- for ignore in ignore_dirs:
147
- if ignore[-1] != '/':
148
- ignore = '%s/' % ignore
149
-
150
- if ignore[0] == '/':
151
- ignore_prefixes.append(ignore)
152
- else:
153
- ignore_subdirs.append('/%s' % ignore)
154
-
155
- # Now remove entries from the duplicate list that are within the ignored
156
- # directories. If the resultant duplicate record is empty or only contains
157
- # one entry, then remove it.
158
-
159
- filtered_duplicates = []
160
-
161
- for duplicate in duplicates:
162
- # Set of entries in the record to remove
163
-
164
- remove_entries = set()
165
-
166
- for entry in duplicate:
167
- # If the entry is in an excluded directory tree, remove it
168
-
169
- for ignore in ignore_prefixes:
170
- if entry.startswith(ignore):
171
- remove_entries.add(entry)
172
-
173
- # If the entry is in an excluded subdirectory tree, remove it
174
-
175
- for ignore in ignore_subdirs:
176
- if ignore in entry:
177
- remove_entries.add(entry)
178
-
179
- # If we have a list of files to ignore then check each entry against the list
180
- # and remove any matches.
181
-
182
- for ignore in args.ignore or []:
183
- if fnmatch.fnmatch(os.path.basename(entry), ignore):
184
- remove_entries.add(entry)
185
-
186
- # If we loaded a saved list and the entry doesn't exist, remove it
187
-
188
- if args.load and entry not in remove_entries and not os.path.isfile(entry):
189
- remove_entries.add(entry)
190
-
191
- # If we have entries to remove from the record, then remove them
192
-
193
- if remove_entries:
194
- for entry in remove_entries:
195
- duplicate.remove(entry)
196
-
197
- # Only add to the filtered duplicate list if we have more than one duplicate in the entry
198
-
199
- if len(duplicate) >= 2:
200
- filtered_duplicates.append(duplicate)
201
-
202
- return filtered_duplicates
203
-
204
- ################################################################################
205
-
206
- def find_duplicates(args):
207
- """ Find duplicates, or load them from a saved status file """
208
-
209
- if args.load:
210
- logging.debug('Loading duplicate file data from %s', args.load)
211
-
212
- with open(args.load, 'rb') as infile:
213
- duplicates = pickle.load(infile)
214
-
215
- logging.debug('Data loaded, %d duplicates', len(duplicates))
216
- else:
217
- duplicates = jdupes(args.path)
218
-
219
- if args.save:
220
- logging.debug('Saving duplicate file data to %s', args.save)
221
-
222
- with open(args.save, 'wb') as outfile:
223
- pickle.dump(duplicates, outfile)
224
-
225
- print('Duplicate file data saved')
226
- sys.exit(0)
227
-
228
- return remove_excluded_entries(args, duplicates)
229
-
230
- ################################################################################
231
-
232
- def check_duplicates(duplicate):
233
- """ Given a list of duplicate files work out what to do with them.
234
- Returns:
235
- List of files (if any) to keep
236
- List of files (if any) to be deleted
237
- Name of a file that is similar to the duplicates (both in name and content)
238
- True if the files being removed are .ini files with mangled names
239
- Any error/warning message associated with processing the duplicates
240
- """
241
-
242
- keep = set()
243
- remove = set()
244
- similar = None
245
- error_msg = None
246
-
247
- # We can just delete entries that are conflicting picasa.ini files
248
-
249
- for entry in duplicate:
250
- if re.fullmatch(r'.*/\.?[pP]icasa.sync-conflict-.*\.ini', entry):
251
- logging.debug('Remove picasa.ini sync conflict: %s', entry)
252
-
253
- remove.add(entry)
254
-
255
- if remove:
256
- for item in remove:
257
- duplicate.remove(item)
258
-
259
- ini_file_purge = (len(remove) > 0)
260
-
261
- # If all of the files are called 'picasa.ini' then we skip them as it is valid to have multiple picasa.ini files
262
-
263
- if duplicate:
264
- for entry in duplicate:
265
- if os.path.basename(entry).lower() not in ('picasa.ini', '.picasa.ini'):
266
- break
267
- else:
268
- print('Keeping picasa.ini files: %s' % (', '.join(duplicate)))
269
- duplicate = []
270
-
271
- # Skip other checks if we don't have any files that aren't conflicting picasa.ini files
272
-
273
- if duplicate:
274
- # Look for entries that are in known temporary directories
275
-
276
- for entry in duplicate:
277
- if re.match(r'.*/(\$RECYCLE\.BIN|.Picasa3Temp|.Picasa3Temp_[0-9]+|.picasaoriginals)/.*', entry):
278
- logging.debug('Removing temporary directory item: %s', entry)
279
- remove.add(entry)
280
- else:
281
- keep.add(entry)
282
-
283
- # Look for lists of copies where some are marked as copies with _X appended to the file name
284
-
285
- if len(keep) > 1:
286
- copies = set()
287
- originals = set()
288
-
289
- for entry in keep:
290
- if re.fullmatch(r'.*_[1-9][0-9]{0,2}\.[^/]+', entry):
291
- copies.add(entry)
292
- else:
293
- originals.add(entry)
294
-
295
- # If we have at least one original, then we can remove the copies
296
-
297
- if originals:
298
- if copies:
299
- logging.debug('Removing copies: %s', list(copies))
300
- logging.debug('Keeping originals: %s', originals)
301
-
302
- remove |= copies
303
- keep = originals
304
- else:
305
- error_msg = 'No originals found in %s' % (', '.join(keep))
306
-
307
- # Looks for lists of copies where some are marked as copies with (N) appended to the file name
308
-
309
- copies = set()
310
- originals = set()
311
-
312
- for entry in keep:
313
- if re.fullmatch(r'.*\([0-9]+\)\.[^/]+', entry):
314
- copies.add(entry)
315
- else:
316
- originals.add(entry)
317
-
318
- # If we have at least one original, then we can remove the copies
319
-
320
- if originals:
321
- if copies:
322
- logging.debug('Removing copies: %s', list(copies))
323
- logging.debug('Keeping originals: %s', originals)
324
-
325
- remove |= copies
326
- keep = originals
327
- else:
328
- error_msg = 'No originals found in %s' % (', '.join(keep))
329
-
330
- # Now look for sync conflicts
331
-
332
- if len(keep) > 1:
333
- conflicts = set()
334
-
335
- for entry in keep:
336
- if re.fullmatch(r'.*(\.sync-conflict-|/.stversions/).*', entry):
337
- conflicts.add(entry)
338
-
339
- if conflicts:
340
- keep = keep.difference(conflicts)
341
-
342
- if keep:
343
- logging.debug('Removing sync conflicts: %s', conflicts)
344
- logging.debug('Keeping: %s', keep)
345
-
346
- remove |= conflicts
347
- else:
348
- logging.debug('No non-conflicting files found in %s', (', '.join(conflicts)))
349
-
350
- originals = set()
351
-
352
- for entry in conflicts:
353
- originals.add(re.sub(r'(\.sync-conflict-[0-9]{8}-[0-9]{6}-[A-Z]{7}|/.stversions/)', '', entry))
354
-
355
- if len(originals) == 1:
356
- original = originals.pop()
357
- if os.path.isfile(original):
358
-
359
- similar = original
360
- remove = conflicts
361
-
362
- # Now look for files that differ only by case
363
-
364
- if len(keep) > 1:
365
- # Take a copy of the set, then compare the lower case versions of the entries
366
- # and remove any that match
367
- # TODO: We only check for a match against a lower case version of the first entry
368
-
369
- keep_c = copy.copy(keep)
370
- name_lc = keep_c.pop().lower()
371
-
372
- for entry in keep_c:
373
- if entry.lower() == name_lc:
374
- logging.debug('Removing duplicate mixed-case entry: %s', entry)
375
-
376
- remove.add(entry)
377
-
378
- keep = keep.difference(remove)
379
-
380
- # Now look for files with '~' in the name
381
-
382
- if len(keep) > 1:
383
- tilde = set()
384
-
385
- for k in keep:
386
- if '~' in k:
387
- tilde.add(k)
388
-
389
- if tilde != keep:
390
- remove |= tilde
391
- keep = keep.difference(tilde)
392
-
393
- # Now remove entries with the shorter subdirectory names
394
-
395
- if len(keep) > 1:
396
- longest = ""
397
- longest_name = None
398
-
399
- for k in sorted(list(keep)):
400
- subdir = os.path.split(os.path.dirname(k))[1]
401
-
402
- if len(subdir) > len(longest):
403
- longest = subdir
404
- longest_name = k
405
-
406
- if longest_name:
407
- for k in keep:
408
- if k != longest_name:
409
- remove.add(k)
410
-
411
- keep = keep.difference(remove)
412
-
413
- # Now remove entries with the shorter file names
414
-
415
- if len(keep) > 1:
416
- longest = ""
417
- longest_name = None
418
-
419
- for k in sorted(list(keep)):
420
- filename = os.path.basename(k)
421
-
422
- if len(filename) > len(longest):
423
- longest = filename
424
- longest_name = k
425
-
426
- if longest_name:
427
- for k in keep:
428
- if k != filename:
429
- remove.add(k)
430
-
431
- keep = keep.difference(remove)
432
-
433
- # Don't allow files called 'folder.jpg' to be removed - multiple directories can
434
- # have the same cover art.
435
-
436
- if remove:
437
- for r in remove:
438
- if os.path.basename(r) in ('folder.jpg', 'Folder.jpg', 'cover.jpg', 'Cover.jpg'):
439
- keep.add(r)
440
-
441
- remove = remove.difference(keep)
442
-
443
- return sorted(list(keep)), sorted(list(remove)), similar, ini_file_purge, error_msg
444
-
445
- ################################################################################
446
-
447
- def process_duplicates(args, duplicates):
448
- """ Process the duplicate file records """
449
-
450
- # Optionally generate the shell script
451
-
452
- if args.script:
453
- script = open(args.script, 'wt')
454
-
455
- script.write('#! /usr/bin/env bash\n\n'
456
- '# Auto-generated shell script to delete duplicate files\n\n'
457
- 'set -o pipefail\n'
458
- 'set -o errexit\n'
459
- 'set -o nounset\n\n')
460
-
461
- # List of errors - we report everything that doesn't work at the end
462
-
463
- errors = []
464
-
465
- # Decide what to do with each duplication record
466
-
467
- for duplicate in duplicates:
468
- keep, remove, similar, ini_file_purge, error_msg = check_duplicates(duplicate)
469
-
470
- if error_msg:
471
- errors.append(error_msg)
472
-
473
- # Report what we'd do
474
-
475
- if args.script and (remove or keep):
476
- script.write('\n')
477
-
478
- for k in keep:
479
- script.write('# Keep %s\n' % k)
480
-
481
- if ini_file_purge:
482
- script.write('# Remove conflicting, renamed picasa.ini files\n')
483
-
484
- if similar:
485
- script.write('# Similar file: %s\n' % similar)
486
-
487
- for r in remove:
488
- r = r.replace('$', '\\$')
489
- script.write('rm -- "%s"\n' % r)
490
-
491
- if remove:
492
- print('Duplicates found:')
493
-
494
- if keep:
495
- print(' Keep: %s' % (', '.join(keep)))
496
-
497
- if similar:
498
- print(' Similar: %s' % similar)
499
-
500
- print(' Delete: %s' % (', '.join(remove)))
501
-
502
- elif keep and not remove:
503
- errors.append('Keeping all copies of %s' % (', '.join(keep)))
504
-
505
- elif len(keep) > 1:
506
- print('Keeping %d copies of %s' % (len(keep), ', '.join(keep)))
507
- print(' Whilst removing %s' % (', '.join(remove)))
508
-
509
- elif duplicate and remove and not keep:
510
- errors.append('All entries classified for removal: %s' % (', '.join(remove)))
511
-
512
- if errors:
513
- errors.sort()
514
-
515
- print('-' * 80)
516
- print('Problems:')
517
-
518
- for error in errors:
519
- print(error)
520
-
521
- if args.script:
522
- script.write('\n'
523
- '# %s\n'
524
- '# There are a number of duplicates where it is not clear which one should be kept,\n'
525
- '# or whether all copies should be kept. These are listed below.\n'
526
- '# %s\n\n' % ('-' * 80, '-' * 80))
527
-
528
- for error in errors:
529
- script.write('# %s\n' % error)
530
-
531
- ################################################################################
532
-
533
- def rmdupe():
534
- """ Main function """
535
-
536
- try:
537
- args = parse_command_line()
538
-
539
- duplicates = find_duplicates(args)
540
-
541
- process_duplicates(args, duplicates)
542
-
543
- except KeyboardInterrupt:
544
- sys.exit(1)
545
-
546
- except BrokenPipeError:
547
- sys.exit(2)
548
-
549
- ################################################################################
550
- # Entry point
551
-
552
- if __name__ == '__main__':
553
- rmdupe()
@@ -1,97 +0,0 @@
1
- #! /usr/bin/env python3
2
-
3
- ################################################################################
4
- """ Docker interface for Thingy
5
-
6
- Copyright (C) 2017 John Skilleter
7
-
8
- Note that this:
9
- * Only implements functions required by docker-purge
10
- * Only has basic error checking, in that it raises DockerError
11
- for any error returned by the external docker command.
12
- """
13
- ################################################################################
14
-
15
- import thingy.run as run
16
-
17
- ################################################################################
18
-
19
- class DockerError(Exception):
20
- """ Exception for dockery things """
21
-
22
- pass
23
-
24
- ################################################################################
25
-
26
- def instances(all=False):
27
- """ Return a list of all current Docker instances """
28
-
29
- cmd = ['docker', 'ps', '-q']
30
-
31
- if all:
32
- cmd.append('-a')
33
-
34
- instances_list = []
35
- try:
36
- for result in run.run(cmd):
37
- instances_list.append(result)
38
- except run.RunError as exc:
39
- raise DockerError(exc)
40
-
41
- return instances_list
42
-
43
- ################################################################################
44
-
45
- def stop(instance, force=False):
46
- """ Stop the specified Docker instance """
47
-
48
- # TODO: force option not implemented
49
-
50
- try:
51
- run.run(['docker', 'stop', instance], output=True)
52
- except run.RunError as exc:
53
- raise DockerError(exc)
54
-
55
- ################################################################################
56
-
57
- def rm(instance, force=False):
58
- """ Remove the specified instance """
59
-
60
- cmd = ['docker', 'rm']
61
-
62
- if force:
63
- cmd.append('--force')
64
-
65
- cmd.append(instance)
66
-
67
- try:
68
- run.run(cmd, output=True)
69
- except run.RunError as exc:
70
- raise DockerError(exc)
71
-
72
- ################################################################################
73
-
74
- def images():
75
- """ Return a list of all current Docker images """
76
-
77
- try:
78
- for result in run.run(['docker', 'images', '-q']):
79
- yield result
80
- except run.RunError as exc:
81
- raise DockerError(exc)
82
-
83
- ################################################################################
84
-
85
- def rmi(image, force=False):
86
- """ Remove the specified image """
87
-
88
- cmd = ['docker', 'rmi']
89
- if force:
90
- cmd.append('--force')
91
-
92
- cmd.append(image)
93
-
94
- try:
95
- run.run(cmd, foreground=True)
96
- except run.RunError as exc:
97
- raise DockerError(exc)
skilleter_thingy/x.py DELETED
@@ -1,3 +0,0 @@
1
- import subprocess
2
-
3
- subprocess.check_call(['ls'])