megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +15 -2
- megadetector/data_management/coco_to_yolo.py +53 -31
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
- megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
- megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
- megadetector/data_management/remap_coco_categories.py +60 -11
- megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
- megadetector/data_management/yolo_to_coco.py +45 -15
- megadetector/detection/run_detector.py +1 -0
- megadetector/detection/run_detector_batch.py +5 -4
- megadetector/postprocessing/classification_postprocessing.py +788 -524
- megadetector/postprocessing/compare_batch_results.py +176 -9
- megadetector/postprocessing/create_crop_folder.py +420 -0
- megadetector/postprocessing/load_api_results.py +4 -1
- megadetector/postprocessing/md_to_coco.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +158 -44
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/postprocessing/separate_detections_into_folders.py +20 -4
- megadetector/postprocessing/subset_json_detector_output.py +180 -15
- megadetector/postprocessing/validate_batch_results.py +13 -5
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
- megadetector/taxonomy_mapping/species_lookup.py +45 -2
- megadetector/utils/ct_utils.py +76 -3
- megadetector/utils/directory_listing.py +4 -4
- megadetector/utils/gpu_test.py +21 -3
- megadetector/utils/md_tests.py +142 -49
- megadetector/utils/path_utils.py +342 -19
- megadetector/utils/wi_utils.py +1286 -212
- megadetector/visualization/visualization_utils.py +16 -4
- megadetector/visualization/visualize_db.py +1 -1
- megadetector/visualization/visualize_detector_output.py +1 -4
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
megadetector/utils/path_utils.py
CHANGED
|
@@ -27,12 +27,14 @@ import re
|
|
|
27
27
|
|
|
28
28
|
from zipfile import ZipFile
|
|
29
29
|
from datetime import datetime
|
|
30
|
+
from collections import defaultdict
|
|
30
31
|
from multiprocessing.pool import Pool, ThreadPool
|
|
31
32
|
from functools import partial
|
|
32
33
|
from shutil import which
|
|
33
34
|
from tqdm import tqdm
|
|
34
35
|
|
|
35
36
|
from megadetector.utils.ct_utils import is_iterable
|
|
37
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
36
38
|
|
|
37
39
|
# Should all be lower-case
|
|
38
40
|
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
|
|
@@ -51,8 +53,7 @@ def recursive_file_list(base_dir,
|
|
|
51
53
|
sort_files=True,
|
|
52
54
|
recursive=True):
|
|
53
55
|
r"""
|
|
54
|
-
Enumerates files (not directories) in [base_dir]
|
|
55
|
-
backslahes to slashes
|
|
56
|
+
Enumerates files (not directories) in [base_dir].
|
|
56
57
|
|
|
57
58
|
Args:
|
|
58
59
|
base_dir (str): folder to enumerate
|
|
@@ -94,12 +95,15 @@ def recursive_file_list(base_dir,
|
|
|
94
95
|
return all_files
|
|
95
96
|
|
|
96
97
|
|
|
97
|
-
def file_list(base_dir,
|
|
98
|
+
def file_list(base_dir,
|
|
99
|
+
convert_slashes=True,
|
|
100
|
+
return_relative_paths=False,
|
|
101
|
+
sort_files=True,
|
|
98
102
|
recursive=False):
|
|
99
103
|
"""
|
|
100
|
-
Trivial wrapper for recursive_file_list, which was a poor function name choice
|
|
101
|
-
|
|
102
|
-
"recursive_file_list".
|
|
104
|
+
Trivial wrapper for recursive_file_list, which was a poor function name choice
|
|
105
|
+
at the time, since I later wanted to add non-recursive lists, but it doesn't
|
|
106
|
+
make sense to have a "recursive" option in a function called "recursive_file_list".
|
|
103
107
|
|
|
104
108
|
Args:
|
|
105
109
|
base_dir (str): folder to enumerate
|
|
@@ -119,6 +123,99 @@ def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_
|
|
|
119
123
|
recursive=recursive)
|
|
120
124
|
|
|
121
125
|
|
|
126
|
+
def folder_list(base_dir,
|
|
127
|
+
convert_slashes=True,
|
|
128
|
+
return_relative_paths=False,
|
|
129
|
+
sort_folders=True,
|
|
130
|
+
recursive=False):
|
|
131
|
+
|
|
132
|
+
"""
|
|
133
|
+
Enumerates folders (not files) in [base_dir].
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
base_dir (str): folder to enumerate
|
|
137
|
+
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
138
|
+
the native path separator
|
|
139
|
+
return_relative_paths (bool, optional): return paths that are relative to [base_dir],
|
|
140
|
+
rather than absolute paths
|
|
141
|
+
sort_files (bool, optional): force folders to be sorted, otherwise uses the sorting
|
|
142
|
+
provided by os.walk()
|
|
143
|
+
recursive (bool, optional): enumerate recursively
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
list: list of folder names
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
|
|
150
|
+
|
|
151
|
+
folders = []
|
|
152
|
+
|
|
153
|
+
if recursive:
|
|
154
|
+
folders = []
|
|
155
|
+
for root, dirs, _ in os.walk(base_dir):
|
|
156
|
+
for d in dirs:
|
|
157
|
+
folders.append(os.path.join(root, d))
|
|
158
|
+
else:
|
|
159
|
+
folders = os.listdir(base_dir)
|
|
160
|
+
folders = [os.path.join(base_dir,fn) for fn in folders]
|
|
161
|
+
folders = [fn for fn in folders if os.path.isdir(fn)]
|
|
162
|
+
|
|
163
|
+
if return_relative_paths:
|
|
164
|
+
folders = [os.path.relpath(fn,base_dir) for fn in folders]
|
|
165
|
+
|
|
166
|
+
if convert_slashes:
|
|
167
|
+
folders = [fn.replace('\\', '/') for fn in folders]
|
|
168
|
+
|
|
169
|
+
if sort_folders:
|
|
170
|
+
folders = sorted(folders)
|
|
171
|
+
|
|
172
|
+
return folders
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def folder_summary(folder,print_summary=True):
|
|
176
|
+
"""
|
|
177
|
+
Returns (and optionally prints) a summary of [folder], including:
|
|
178
|
+
|
|
179
|
+
* The total number of files
|
|
180
|
+
* The total number of folders
|
|
181
|
+
* The number of files for each extension
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
folder (str): folder to summarize
|
|
185
|
+
print_summary (bool, optional): whether to print the summary
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
dict: with fields "n_files", "n_folders", and "extension_to_count"
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
assert os.path.isdir(folder), '{} is not a folder'.format(folder)
|
|
192
|
+
|
|
193
|
+
folders_relative = folder_list(folder,return_relative_paths=True,recursive=True)
|
|
194
|
+
files_relative = file_list(folder,return_relative_paths=True,recursive=True)
|
|
195
|
+
|
|
196
|
+
extension_to_count = defaultdict(int)
|
|
197
|
+
|
|
198
|
+
for fn in files_relative:
|
|
199
|
+
ext = os.path.splitext(fn)[1]
|
|
200
|
+
extension_to_count[ext] += 1
|
|
201
|
+
|
|
202
|
+
extension_to_count = sort_dictionary_by_value(extension_to_count,reverse=True)
|
|
203
|
+
|
|
204
|
+
if print_summary:
|
|
205
|
+
for extension in extension_to_count.keys():
|
|
206
|
+
print('{}: {}'.format(extension,extension_to_count[extension]))
|
|
207
|
+
print('')
|
|
208
|
+
print('Total files: {}'.format(len(files_relative)))
|
|
209
|
+
print('Total folders: {}'.format(len(folders_relative)))
|
|
210
|
+
|
|
211
|
+
to_return = {}
|
|
212
|
+
to_return['n_files'] = len(files_relative)
|
|
213
|
+
to_return['n_folders'] = len(folders_relative)
|
|
214
|
+
to_return['extension_to_count'] = extension_to_count
|
|
215
|
+
|
|
216
|
+
return to_return
|
|
217
|
+
|
|
218
|
+
|
|
122
219
|
def fileparts(path):
|
|
123
220
|
r"""
|
|
124
221
|
Breaks down a path into the directory path, filename, and extension.
|
|
@@ -263,6 +360,56 @@ def safe_create_link(link_exists,link_new):
|
|
|
263
360
|
os.symlink(link_exists,link_new)
|
|
264
361
|
|
|
265
362
|
|
|
363
|
+
def remove_empty_folders(path, remove_root=False):
|
|
364
|
+
"""
|
|
365
|
+
Recursively removes empty folders within the specified path.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
path (str): the folder from which we should recursively remove
|
|
369
|
+
empty folders.
|
|
370
|
+
remove_root (bool, optional): whether to remove the root directory if
|
|
371
|
+
it's empty after removing all empty subdirectories. This will always
|
|
372
|
+
be True during recursive calls.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
bool: True if the directory is empty after processing, False otherwise
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
# Verify that [path] is a directory
|
|
379
|
+
if not os.path.isdir(path):
|
|
380
|
+
return False
|
|
381
|
+
|
|
382
|
+
# Track whether the current directory is empty
|
|
383
|
+
is_empty = True
|
|
384
|
+
|
|
385
|
+
# Iterate through all items in the directory
|
|
386
|
+
for item in os.listdir(path):
|
|
387
|
+
|
|
388
|
+
item_path = os.path.join(path, item)
|
|
389
|
+
|
|
390
|
+
# If it's a directory, process it recursively
|
|
391
|
+
if os.path.isdir(item_path):
|
|
392
|
+
# If the subdirectory is empty after processing, it will be removed
|
|
393
|
+
if not remove_empty_folders(item_path, True):
|
|
394
|
+
# If the subdirectory is not empty, the current directory isn't empty either
|
|
395
|
+
is_empty = False
|
|
396
|
+
else:
|
|
397
|
+
# If there's a file, the directory is not empty
|
|
398
|
+
is_empty = False
|
|
399
|
+
|
|
400
|
+
# If the directory is empty and we're supposed to remove it
|
|
401
|
+
if is_empty and remove_root:
|
|
402
|
+
try:
|
|
403
|
+
os.rmdir(path)
|
|
404
|
+
except Exception as e:
|
|
405
|
+
print('Error removing directory {}: {}'.format(path,str(e)))
|
|
406
|
+
is_empty = False
|
|
407
|
+
|
|
408
|
+
return is_empty
|
|
409
|
+
|
|
410
|
+
# ...def remove_empty_folders(...)
|
|
411
|
+
|
|
412
|
+
|
|
266
413
|
def top_level_folder(p):
|
|
267
414
|
r"""
|
|
268
415
|
Gets the top-level folder from the path *p*.
|
|
@@ -547,9 +694,161 @@ def wsl_path_to_windows_path(filename):
|
|
|
547
694
|
if result.returncode != 0:
|
|
548
695
|
print('Could not convert path {} from WSL to Windows'.format(filename))
|
|
549
696
|
return None
|
|
697
|
+
|
|
550
698
|
return result.stdout.strip()
|
|
551
699
|
|
|
700
|
+
|
|
701
|
+
def windows_path_to_wsl_path(filename):
|
|
702
|
+
r"""
|
|
703
|
+
Converts a Windows path to a WSL path, or returns None if that's not possible. E.g.
|
|
704
|
+
converts:
|
|
705
|
+
|
|
706
|
+
e:\a\b\c
|
|
707
|
+
|
|
708
|
+
...to:
|
|
709
|
+
|
|
710
|
+
/mnt/e/a/b/c
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
filename (str): filename to convert
|
|
714
|
+
|
|
715
|
+
Returns:
|
|
716
|
+
str: WSL equivalent to the Windows path [filename], or [filename] if the current
|
|
717
|
+
environment is neither Windows nor WSL.
|
|
718
|
+
"""
|
|
719
|
+
|
|
720
|
+
if (not environment_is_wsl()) and (os.name != 'nt'):
|
|
721
|
+
return filename
|
|
722
|
+
|
|
723
|
+
if environment_is_wsl():
|
|
724
|
+
result = subprocess.run(['wslpath', '-u', filename], text=True, capture_output=True)
|
|
725
|
+
else:
|
|
726
|
+
result = subprocess.run(['wsl', 'wslpath', '-u', filename], text=True, capture_output=True)
|
|
727
|
+
if result.returncode != 0:
|
|
728
|
+
print('Could not convert path {} from Windows to WSL'.format(filename))
|
|
729
|
+
return None
|
|
730
|
+
|
|
731
|
+
return result.stdout.strip()
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def open_file_in_chrome(filename):
|
|
735
|
+
"""
|
|
736
|
+
Open a file in chrome, regardless of file type. I typically use this to open
|
|
737
|
+
.md files in Chrome.
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
filename (str): file to open
|
|
741
|
+
|
|
742
|
+
Return:
|
|
743
|
+
bool: whether the operation was successful
|
|
744
|
+
"""
|
|
745
|
+
|
|
746
|
+
# Create URL
|
|
747
|
+
abs_path = os.path.abspath(filename)
|
|
552
748
|
|
|
749
|
+
system = platform.system()
|
|
750
|
+
if system == 'Windows':
|
|
751
|
+
url = f'file:///{abs_path.replace(os.sep, "/")}'
|
|
752
|
+
else: # macOS and Linux
|
|
753
|
+
url = f'file://{abs_path}'
|
|
754
|
+
|
|
755
|
+
# Determine the Chrome path
|
|
756
|
+
if system == 'Windows':
|
|
757
|
+
|
|
758
|
+
# This is a native Python module, but it only exists on Windows
|
|
759
|
+
import winreg
|
|
760
|
+
|
|
761
|
+
chrome_paths = [
|
|
762
|
+
os.path.expanduser("~") + r"\AppData\Local\Google\Chrome\Application\chrome.exe",
|
|
763
|
+
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
|
764
|
+
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
|
|
765
|
+
]
|
|
766
|
+
|
|
767
|
+
# Default approach: run from a typical chrome location
|
|
768
|
+
for path in chrome_paths:
|
|
769
|
+
if os.path.exists(path):
|
|
770
|
+
subprocess.run([path, url])
|
|
771
|
+
return True
|
|
772
|
+
|
|
773
|
+
# Method 2: Check registry for Chrome path
|
|
774
|
+
try:
|
|
775
|
+
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
|
|
776
|
+
r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe") as key:
|
|
777
|
+
chrome_path = winreg.QueryValue(key, None)
|
|
778
|
+
if chrome_path and os.path.exists(chrome_path):
|
|
779
|
+
subprocess.run([chrome_path, url])
|
|
780
|
+
return True
|
|
781
|
+
except:
|
|
782
|
+
pass
|
|
783
|
+
|
|
784
|
+
# Method 3: Try alternate registry location
|
|
785
|
+
try:
|
|
786
|
+
with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
|
|
787
|
+
r"Software\Google\Chrome\BLBeacon") as key:
|
|
788
|
+
chrome_path = os.path.join(os.path.dirname(winreg.QueryValueEx(key, "version")[0]), "chrome.exe")
|
|
789
|
+
if os.path.exists(chrome_path):
|
|
790
|
+
subprocess.run([chrome_path, url])
|
|
791
|
+
return True
|
|
792
|
+
except:
|
|
793
|
+
pass
|
|
794
|
+
|
|
795
|
+
# Method 4: Try system path or command
|
|
796
|
+
for chrome_cmd in ["chrome", "chrome.exe", "googlechrome", "google-chrome"]:
|
|
797
|
+
try:
|
|
798
|
+
subprocess.run([chrome_cmd, url], shell=True)
|
|
799
|
+
return True
|
|
800
|
+
except:
|
|
801
|
+
continue
|
|
802
|
+
|
|
803
|
+
# Method 5: Use Windows URL protocol handler
|
|
804
|
+
try:
|
|
805
|
+
os.startfile(url)
|
|
806
|
+
return True
|
|
807
|
+
except:
|
|
808
|
+
pass
|
|
809
|
+
|
|
810
|
+
# Method 6: Use rundll32
|
|
811
|
+
try:
|
|
812
|
+
cmd = f'rundll32 url.dll,FileProtocolHandler {url}'
|
|
813
|
+
subprocess.run(cmd, shell=True)
|
|
814
|
+
return True
|
|
815
|
+
except:
|
|
816
|
+
pass
|
|
817
|
+
|
|
818
|
+
elif system == 'Darwin':
|
|
819
|
+
|
|
820
|
+
chrome_paths = [
|
|
821
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
822
|
+
os.path.expanduser('~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
|
|
823
|
+
]
|
|
824
|
+
|
|
825
|
+
for path in chrome_paths:
|
|
826
|
+
if os.path.exists(path):
|
|
827
|
+
subprocess.run([path, url])
|
|
828
|
+
return True
|
|
829
|
+
|
|
830
|
+
# Fallback to 'open' command with Chrome as the app
|
|
831
|
+
try:
|
|
832
|
+
subprocess.run(['open', '-a', 'Google Chrome', url])
|
|
833
|
+
return True
|
|
834
|
+
except:
|
|
835
|
+
pass
|
|
836
|
+
|
|
837
|
+
elif system == 'Linux':
|
|
838
|
+
|
|
839
|
+
chrome_commands = ['google-chrome', 'chrome', 'chromium', 'chromium-browser']
|
|
840
|
+
|
|
841
|
+
for cmd in chrome_commands:
|
|
842
|
+
try:
|
|
843
|
+
subprocess.run([cmd, url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
844
|
+
return True
|
|
845
|
+
except:
|
|
846
|
+
continue
|
|
847
|
+
|
|
848
|
+
print(f"Could not open {filename} in Chrome on {system}.")
|
|
849
|
+
return False
|
|
850
|
+
|
|
851
|
+
|
|
553
852
|
def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
554
853
|
"""
|
|
555
854
|
Opens [filename] in the default OS file handler for this file type.
|
|
@@ -611,7 +910,7 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
|
611
910
|
# ...def open_file(...)
|
|
612
911
|
|
|
613
912
|
|
|
614
|
-
#%% File list functions
|
|
913
|
+
#%% File list functions (as in, files that are lists of other filenames)
|
|
615
914
|
|
|
616
915
|
def write_list_to_file(output_file,strings):
|
|
617
916
|
"""
|
|
@@ -650,7 +949,9 @@ def read_list_from_file(filename):
|
|
|
650
949
|
return file_list
|
|
651
950
|
|
|
652
951
|
|
|
653
|
-
|
|
952
|
+
#%% File copying functions
|
|
953
|
+
|
|
954
|
+
def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
|
|
654
955
|
"""
|
|
655
956
|
Internal function for copying files from within parallel_copy_files.
|
|
656
957
|
"""
|
|
@@ -663,17 +964,29 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
|
|
|
663
964
|
print('Skipping existing target file {}'.format(target_fn))
|
|
664
965
|
return
|
|
665
966
|
|
|
967
|
+
if move:
|
|
968
|
+
action_string = 'Moving'
|
|
969
|
+
else:
|
|
970
|
+
action_string = 'Copying'
|
|
971
|
+
|
|
666
972
|
if verbose:
|
|
667
|
-
print('
|
|
973
|
+
print('{} to {}'.format(action_string,target_fn))
|
|
668
974
|
|
|
669
975
|
os.makedirs(os.path.dirname(target_fn),exist_ok=True)
|
|
670
|
-
|
|
671
|
-
|
|
976
|
+
if move:
|
|
977
|
+
shutil.move(source_fn, target_fn)
|
|
978
|
+
else:
|
|
979
|
+
shutil.copyfile(source_fn,target_fn)
|
|
980
|
+
|
|
672
981
|
|
|
673
|
-
def parallel_copy_files(input_file_to_output_file,
|
|
674
|
-
|
|
982
|
+
def parallel_copy_files(input_file_to_output_file,
|
|
983
|
+
max_workers=16,
|
|
984
|
+
use_threads=True,
|
|
985
|
+
overwrite=False,
|
|
986
|
+
verbose=False,
|
|
987
|
+
move=False):
|
|
675
988
|
"""
|
|
676
|
-
|
|
989
|
+
Copy (or move) files from source to target according to the dict input_file_to_output_file.
|
|
677
990
|
|
|
678
991
|
Args:
|
|
679
992
|
input_file_to_output_file (dict): dictionary mapping source files to the target files
|
|
@@ -682,7 +995,8 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
|
|
|
682
995
|
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
683
996
|
parallel copying; ignored if max_workers <= 1
|
|
684
997
|
overwrite (bool, optional): whether to overwrite existing destination files
|
|
685
|
-
verbose (bool, optional): enable additional debug output
|
|
998
|
+
verbose (bool, optional): enable additional debug output
|
|
999
|
+
move (bool, optional): move instead of copying
|
|
686
1000
|
"""
|
|
687
1001
|
|
|
688
1002
|
n_workers = min(max_workers,len(input_file_to_output_file))
|
|
@@ -698,13 +1012,18 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
|
|
|
698
1012
|
pool = Pool(n_workers)
|
|
699
1013
|
|
|
700
1014
|
with tqdm(total=len(input_output_tuples)) as pbar:
|
|
701
|
-
for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
|
|
1015
|
+
for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
|
|
1016
|
+
overwrite=overwrite,
|
|
1017
|
+
verbose=verbose,
|
|
1018
|
+
move=move),
|
|
702
1019
|
input_output_tuples)):
|
|
703
1020
|
pbar.update()
|
|
704
1021
|
|
|
705
1022
|
# ...def parallel_copy_files(...)
|
|
706
1023
|
|
|
707
1024
|
|
|
1025
|
+
#%% File size functions
|
|
1026
|
+
|
|
708
1027
|
def get_file_sizes(base_dir, convert_slashes=True):
|
|
709
1028
|
"""
|
|
710
1029
|
Gets sizes recursively for all files in base_dir, returning a dict mapping
|
|
@@ -827,7 +1146,7 @@ def parallel_get_file_sizes(filenames,
|
|
|
827
1146
|
# ...def parallel_get_file_sizes(...)
|
|
828
1147
|
|
|
829
1148
|
|
|
830
|
-
#%%
|
|
1149
|
+
#%% Compression (zip/tar) functions
|
|
831
1150
|
|
|
832
1151
|
def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
|
|
833
1152
|
"""
|
|
@@ -985,8 +1304,12 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
985
1304
|
return output_fn
|
|
986
1305
|
|
|
987
1306
|
|
|
988
|
-
def parallel_zip_files(input_files,
|
|
989
|
-
|
|
1307
|
+
def parallel_zip_files(input_files,
|
|
1308
|
+
max_workers=16,
|
|
1309
|
+
use_threads=True,
|
|
1310
|
+
compresslevel=9,
|
|
1311
|
+
overwrite=False,
|
|
1312
|
+
verbose=False):
|
|
990
1313
|
"""
|
|
991
1314
|
Zips one or more files to separate output files in parallel, leaving the
|
|
992
1315
|
original files in place. Each file is zipped to [filename].zip.
|