megadetector 5.0.19__py3-none-any.whl → 5.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/importers/bellevue_to_json.py +0 -1
- megadetector/data_management/importers/osu-small-animals-to-json.py +364 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +1 -1
- megadetector/data_management/lila/get_lila_annotation_counts.py +2 -0
- megadetector/data_management/lila/lila_common.py +28 -12
- megadetector/data_management/lila/test_lila_metadata_urls.py +17 -8
- megadetector/data_management/read_exif.py +73 -0
- megadetector/data_management/yolo_output_to_md_output.py +18 -5
- megadetector/detection/process_video.py +84 -16
- megadetector/detection/run_detector.py +36 -13
- megadetector/detection/run_detector_batch.py +104 -15
- megadetector/detection/run_inference_with_yolov5_val.py +20 -23
- megadetector/detection/video_utils.py +79 -44
- megadetector/postprocessing/combine_api_outputs.py +1 -1
- megadetector/postprocessing/detector_calibration.py +367 -0
- megadetector/postprocessing/md_to_coco.py +2 -1
- megadetector/postprocessing/postprocess_batch_results.py +32 -20
- megadetector/postprocessing/validate_batch_results.py +118 -58
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +8 -3
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +3 -2
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
- megadetector/utils/ct_utils.py +20 -0
- megadetector/utils/md_tests.py +63 -17
- megadetector/utils/path_utils.py +139 -30
- megadetector/utils/write_html_image_list.py +16 -5
- megadetector/visualization/visualization_utils.py +126 -23
- megadetector/visualization/visualize_db.py +104 -63
- {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/METADATA +2 -2
- {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/RECORD +32 -32
- {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
- megadetector/data_management/importers/snapshot_safari_importer_reprise.py +0 -677
- {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/LICENSE +0 -0
- {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/top_level.txt +0 -0
megadetector/utils/path_utils.py
CHANGED
|
@@ -17,6 +17,7 @@ import platform
|
|
|
17
17
|
import string
|
|
18
18
|
import json
|
|
19
19
|
import shutil
|
|
20
|
+
import hashlib
|
|
20
21
|
import unicodedata
|
|
21
22
|
import zipfile
|
|
22
23
|
import tarfile
|
|
@@ -236,6 +237,30 @@ def path_is_abs(p):
|
|
|
236
237
|
return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
|
|
237
238
|
|
|
238
239
|
|
|
240
|
+
def safe_create_link(link_exists,link_new):
|
|
241
|
+
"""
|
|
242
|
+
Creates a symlink at [link_new] pointing to [link_exists].
|
|
243
|
+
|
|
244
|
+
If [link_new] already exists, make sure it's a link (not a file),
|
|
245
|
+
and if it has a different target than [link_exists], removes and re-creates
|
|
246
|
+
it.
|
|
247
|
+
|
|
248
|
+
Errors if [link_new] already exists but it's not a link.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
link_exists (str): the source of the (possibly-new) symlink
|
|
252
|
+
link_new (str): the target of the (possibly-new) symlink
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
if os.path.exists(link_new) or os.path.islink(link_new):
|
|
256
|
+
assert os.path.islink(link_new)
|
|
257
|
+
if not os.readlink(link_new) == link_exists:
|
|
258
|
+
os.remove(link_new)
|
|
259
|
+
os.symlink(link_exists,link_new)
|
|
260
|
+
else:
|
|
261
|
+
os.symlink(link_exists,link_new)
|
|
262
|
+
|
|
263
|
+
|
|
239
264
|
def top_level_folder(p):
|
|
240
265
|
r"""
|
|
241
266
|
Gets the top-level folder from the path *p*.
|
|
@@ -296,31 +321,6 @@ if False:
|
|
|
296
321
|
p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
|
|
297
322
|
p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
298
323
|
|
|
299
|
-
#%%
|
|
300
|
-
|
|
301
|
-
def safe_create_link(link_exists,link_new):
|
|
302
|
-
"""
|
|
303
|
-
Creates a symlink at [link_new] pointing to [link_exists].
|
|
304
|
-
|
|
305
|
-
If [link_new] already exists, make sure it's a link (not a file),
|
|
306
|
-
and if it has a different target than [link_exists], removes and re-creates
|
|
307
|
-
it.
|
|
308
|
-
|
|
309
|
-
Errors if [link_new] already exists but it's not a link.
|
|
310
|
-
|
|
311
|
-
Args:
|
|
312
|
-
link_exists (str): the source of the (possibly-new) symlink
|
|
313
|
-
link_new (str): the target of the (possibly-new) symlink
|
|
314
|
-
"""
|
|
315
|
-
|
|
316
|
-
if os.path.exists(link_new) or os.path.islink(link_new):
|
|
317
|
-
assert os.path.islink(link_new)
|
|
318
|
-
if not os.readlink(link_new) == link_exists:
|
|
319
|
-
os.remove(link_new)
|
|
320
|
-
os.symlink(link_exists,link_new)
|
|
321
|
-
else:
|
|
322
|
-
os.symlink(link_exists,link_new)
|
|
323
|
-
|
|
324
324
|
|
|
325
325
|
#%% Image-related path functions
|
|
326
326
|
|
|
@@ -598,7 +598,9 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
|
598
598
|
|
|
599
599
|
opener = 'xdg-open'
|
|
600
600
|
subprocess.call([opener, filename])
|
|
601
|
-
|
|
601
|
+
|
|
602
|
+
# ...def open_file(...)
|
|
603
|
+
|
|
602
604
|
|
|
603
605
|
#%% File list functions
|
|
604
606
|
|
|
@@ -649,8 +651,12 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
|
|
|
649
651
|
target_fn = input_output_tuple[1]
|
|
650
652
|
if (not overwrite) and (os.path.isfile(target_fn)):
|
|
651
653
|
if verbose:
|
|
652
|
-
print('Skipping existing file {}'.format(target_fn))
|
|
653
|
-
return
|
|
654
|
+
print('Skipping existing target file {}'.format(target_fn))
|
|
655
|
+
return
|
|
656
|
+
|
|
657
|
+
if verbose:
|
|
658
|
+
print('Copying to target file {}'.format(target_fn))
|
|
659
|
+
|
|
654
660
|
os.makedirs(os.path.dirname(target_fn),exist_ok=True)
|
|
655
661
|
shutil.copyfile(source_fn,target_fn)
|
|
656
662
|
|
|
@@ -667,7 +673,7 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
|
|
|
667
673
|
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
668
674
|
parallel copying; ignored if max_workers <= 1
|
|
669
675
|
overwrite (bool, optional): whether to overwrite existing destination files
|
|
670
|
-
verbose (bool, optional): enable
|
|
676
|
+
verbose (bool, optional): enable additional debug output
|
|
671
677
|
"""
|
|
672
678
|
|
|
673
679
|
n_workers = min(max_workers,len(input_file_to_output_file))
|
|
@@ -750,7 +756,7 @@ def parallel_get_file_sizes(filenames,
|
|
|
750
756
|
max_workers (int, optional): number of concurrent workers; set to <=1 to disable parallelism
|
|
751
757
|
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
752
758
|
parallel copying; ignored if max_workers <= 1
|
|
753
|
-
verbose (bool, optional): enable
|
|
759
|
+
verbose (bool, optional): enable additional debug output
|
|
754
760
|
recursive (bool, optional): enumerate recursively, only relevant if [filenames] is a folder.
|
|
755
761
|
convert_slashes (bool, optional): convert backslashes to forward slashes
|
|
756
762
|
return_relative_paths (bool, optional): return relative paths; only relevant if [filenames]
|
|
@@ -804,6 +810,8 @@ def parallel_get_file_sizes(filenames,
|
|
|
804
810
|
|
|
805
811
|
return to_return
|
|
806
812
|
|
|
813
|
+
# ...def parallel_get_file_sizes(...)
|
|
814
|
+
|
|
807
815
|
|
|
808
816
|
#%% Zip functions
|
|
809
817
|
|
|
@@ -1075,3 +1083,104 @@ def unzip_file(input_file, output_folder=None):
|
|
|
1075
1083
|
|
|
1076
1084
|
with zipfile.ZipFile(input_file, 'r') as zf:
|
|
1077
1085
|
zf.extractall(output_folder)
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
#%% File hashing functions
|
|
1089
|
+
|
|
1090
|
+
def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
|
|
1091
|
+
"""
|
|
1092
|
+
Compute the hash of a file.
|
|
1093
|
+
|
|
1094
|
+
Adapted from:
|
|
1095
|
+
|
|
1096
|
+
https://www.geeksforgeeks.org/python-program-to-find-hash-of-file/
|
|
1097
|
+
|
|
1098
|
+
Args:
|
|
1099
|
+
file_path (str): the file to hash
|
|
1100
|
+
algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
|
|
1101
|
+
|
|
1102
|
+
Returns:
|
|
1103
|
+
str: the hash value for this file
|
|
1104
|
+
"""
|
|
1105
|
+
|
|
1106
|
+
try:
|
|
1107
|
+
|
|
1108
|
+
hash_func = hashlib.new(algorithm)
|
|
1109
|
+
|
|
1110
|
+
with open(file_path, 'rb') as file:
|
|
1111
|
+
while chunk := file.read(8192): # Read the file in chunks of 8192 bytes
|
|
1112
|
+
hash_func.update(chunk)
|
|
1113
|
+
|
|
1114
|
+
return str(hash_func.hexdigest())
|
|
1115
|
+
|
|
1116
|
+
except Exception:
|
|
1117
|
+
|
|
1118
|
+
if allow_failures:
|
|
1119
|
+
return None
|
|
1120
|
+
else:
|
|
1121
|
+
raise
|
|
1122
|
+
|
|
1123
|
+
# ...def compute_file_hash(...)
|
|
1124
|
+
|
|
1125
|
+
|
|
1126
|
+
def parallel_compute_file_hashes(filenames,
|
|
1127
|
+
max_workers=16,
|
|
1128
|
+
use_threads=True,
|
|
1129
|
+
recursive=True,
|
|
1130
|
+
algorithm='sha256',
|
|
1131
|
+
verbose=False):
|
|
1132
|
+
"""
|
|
1133
|
+
Compute file hashes for a list or folder of images.
|
|
1134
|
+
|
|
1135
|
+
Args:
|
|
1136
|
+
filenames (list or str): a list of filenames or a folder
|
|
1137
|
+
max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
|
|
1138
|
+
parallelization
|
|
1139
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
1140
|
+
parallelization
|
|
1141
|
+
algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
|
|
1142
|
+
recursive (bool, optional): if [filenames] is a folder, whether to enumerate recursively.
|
|
1143
|
+
Ignored if [filenames] is a list.
|
|
1144
|
+
verbose (bool, optional): enable additional debug output
|
|
1145
|
+
|
|
1146
|
+
Returns:
|
|
1147
|
+
dict: a dict mapping filenames to hash values; values will be None for files that fail
|
|
1148
|
+
to load.
|
|
1149
|
+
"""
|
|
1150
|
+
|
|
1151
|
+
if isinstance(filenames,str) and os.path.isdir(filenames):
|
|
1152
|
+
if verbose:
|
|
1153
|
+
print('Enumerating files in {}'.format(filenames))
|
|
1154
|
+
filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
|
|
1155
|
+
|
|
1156
|
+
n_workers = min(max_workers,len(filenames))
|
|
1157
|
+
|
|
1158
|
+
if verbose:
|
|
1159
|
+
print('Computing hashes for {} files on {} workers'.format(len(filenames),n_workers))
|
|
1160
|
+
|
|
1161
|
+
if n_workers <= 1:
|
|
1162
|
+
|
|
1163
|
+
results = []
|
|
1164
|
+
for filename in filenames:
|
|
1165
|
+
results.append(compute_file_hash(filename,algorithm=algorithm,allow_failures=True))
|
|
1166
|
+
|
|
1167
|
+
else:
|
|
1168
|
+
|
|
1169
|
+
if use_threads:
|
|
1170
|
+
pool = ThreadPool(n_workers)
|
|
1171
|
+
else:
|
|
1172
|
+
pool = Pool(n_workers)
|
|
1173
|
+
|
|
1174
|
+
results = list(tqdm(pool.imap(
|
|
1175
|
+
partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
|
|
1176
|
+
filenames), total=len(filenames)))
|
|
1177
|
+
|
|
1178
|
+
assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
|
|
1179
|
+
|
|
1180
|
+
to_return = {}
|
|
1181
|
+
for i_file,filename in enumerate(filenames):
|
|
1182
|
+
to_return[filename] = results[i_file]
|
|
1183
|
+
|
|
1184
|
+
return to_return
|
|
1185
|
+
|
|
1186
|
+
# ...def parallel_compute_file_hashes(...)
|
|
@@ -42,6 +42,7 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
42
42
|
options (dict, optional): a dict with one or more of the following fields:
|
|
43
43
|
|
|
44
44
|
- fHtml (file pointer to write to, used for splitting write operations over multiple calls)
|
|
45
|
+
- pageTitle (HTML page title)
|
|
45
46
|
- headerHtml (html text to include before the image list)
|
|
46
47
|
- trailerHtml (html text to include after the image list)
|
|
47
48
|
- defaultImageStyle (default css style for images)
|
|
@@ -60,11 +61,14 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
60
61
|
if 'fHtml' not in options:
|
|
61
62
|
options['fHtml'] = -1
|
|
62
63
|
|
|
64
|
+
if 'pageTitle' not in options or options['pageTitle'] is None:
|
|
65
|
+
options['pageTitle'] = ''
|
|
66
|
+
|
|
63
67
|
if 'headerHtml' not in options or options['headerHtml'] is None:
|
|
64
|
-
options['headerHtml'] = ''
|
|
68
|
+
options['headerHtml'] = ''
|
|
65
69
|
|
|
66
70
|
if 'trailerHtml' not in options or options['trailerHtml'] is None:
|
|
67
|
-
options['trailerHtml'] = ''
|
|
71
|
+
options['trailerHtml'] = ''
|
|
68
72
|
|
|
69
73
|
if 'defaultTextStyle' not in options or options['defaultTextStyle'] is None:
|
|
70
74
|
options['defaultTextStyle'] = \
|
|
@@ -114,7 +118,7 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
114
118
|
# You can't supply your own file handle in this case
|
|
115
119
|
if options['fHtml'] != -1:
|
|
116
120
|
raise ValueError(
|
|
117
|
-
|
|
121
|
+
"You can't supply your own file handle if we have to page the image set")
|
|
118
122
|
|
|
119
123
|
figureFileStartingIndices = list(range(0,nImages,options['maxFiguresPerHtmlFile']))
|
|
120
124
|
|
|
@@ -124,7 +128,10 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
124
128
|
fMeta = open(filename,'w')
|
|
125
129
|
|
|
126
130
|
# Write header stuff
|
|
127
|
-
|
|
131
|
+
titleString = '<title>Index page</title>'
|
|
132
|
+
if len(options['pageTitle']) > 0:
|
|
133
|
+
titleString = '<title>Index page for: {}</title>'.format(options['pageTitle'])
|
|
134
|
+
fMeta.write('<html><head>{}</head><body>\n'.format(titleString))
|
|
128
135
|
fMeta.write(options['headerHtml'])
|
|
129
136
|
fMeta.write('<table border = 0 cellpadding = 2>\n')
|
|
130
137
|
|
|
@@ -170,7 +177,11 @@ def write_html_image_list(filename=None,images=None,options=None):
|
|
|
170
177
|
else:
|
|
171
178
|
fHtml = options['fHtml']
|
|
172
179
|
|
|
173
|
-
|
|
180
|
+
titleString = ''
|
|
181
|
+
if len(options['pageTitle']) > 0:
|
|
182
|
+
titleString = '<title>{}</title>'.format(options['pageTitle'])
|
|
183
|
+
|
|
184
|
+
fHtml.write('<html>{}<body>\n'.format(titleString))
|
|
174
185
|
|
|
175
186
|
fHtml.write(options['headerHtml'])
|
|
176
187
|
|
|
@@ -672,6 +672,36 @@ def draw_bounding_boxes_on_image(image,
|
|
|
672
672
|
# ...draw_bounding_boxes_on_image(...)
|
|
673
673
|
|
|
674
674
|
|
|
675
|
+
def get_text_size(font,s):
|
|
676
|
+
"""
|
|
677
|
+
Get the expected width and height when rendering the string [s] in the font
|
|
678
|
+
[font].
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
font (PIL.ImageFont): the font whose size we should query
|
|
682
|
+
s (str): the string whose size we should query
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
tuple: (w,h), both floats in pixel coordinatess
|
|
686
|
+
"""
|
|
687
|
+
|
|
688
|
+
# This is what we did w/Pillow 9
|
|
689
|
+
# w,h = font.getsize(s)
|
|
690
|
+
|
|
691
|
+
# I would *think* this would be the equivalent for Pillow 10
|
|
692
|
+
# l,t,r,b = font.getbbox(s); w = r-l; h=b-t
|
|
693
|
+
|
|
694
|
+
# ...but this actually produces the most similar results to Pillow 9
|
|
695
|
+
# l,t,r,b = font.getbbox(s); w = r; h=b
|
|
696
|
+
|
|
697
|
+
try:
|
|
698
|
+
l,t,r,b = font.getbbox(s); w = r; h=b
|
|
699
|
+
except Exception:
|
|
700
|
+
w,h = font.getsize(s)
|
|
701
|
+
|
|
702
|
+
return w,h
|
|
703
|
+
|
|
704
|
+
|
|
675
705
|
def draw_bounding_box_on_image(image,
|
|
676
706
|
ymin,
|
|
677
707
|
xmin,
|
|
@@ -773,24 +803,6 @@ def draw_bounding_box_on_image(image,
|
|
|
773
803
|
except IOError:
|
|
774
804
|
font = ImageFont.load_default()
|
|
775
805
|
|
|
776
|
-
def get_text_size(font,s):
|
|
777
|
-
|
|
778
|
-
# This is what we did w/Pillow 9
|
|
779
|
-
# w,h = font.getsize(s)
|
|
780
|
-
|
|
781
|
-
# I would *think* this would be the equivalent for Pillow 10
|
|
782
|
-
# l,t,r,b = font.getbbox(s); w = r-l; h=b-t
|
|
783
|
-
|
|
784
|
-
# ...but this actually produces the most similar results to Pillow 9
|
|
785
|
-
# l,t,r,b = font.getbbox(s); w = r; h=b
|
|
786
|
-
|
|
787
|
-
try:
|
|
788
|
-
l,t,r,b = font.getbbox(s); w = r; h=b
|
|
789
|
-
except Exception:
|
|
790
|
-
w,h = font.getsize(s)
|
|
791
|
-
|
|
792
|
-
return w,h
|
|
793
|
-
|
|
794
806
|
# If the total height of the display strings added to the top of the bounding
|
|
795
807
|
# box exceeds the top of the image, stack the strings below the bounding box
|
|
796
808
|
# instead of above.
|
|
@@ -972,7 +984,7 @@ def draw_bounding_boxes_on_file(input_file,
|
|
|
972
984
|
boxes are length-four arrays formatted as [x,y,w,h], normalized,
|
|
973
985
|
upper-left origin (this is the standard MD detection format)
|
|
974
986
|
detector_label_map (dict, optional): a dict mapping category IDs to strings. If this
|
|
975
|
-
is None, no confidence values or identifiers are shown If this is {}, just category
|
|
987
|
+
is None, no confidence values or identifiers are shown. If this is {}, just category
|
|
976
988
|
indices and confidence values are shown.
|
|
977
989
|
thickness (int, optional): line width in pixels for box rendering
|
|
978
990
|
expansion (int, optional): box expansion in pixels
|
|
@@ -1043,7 +1055,7 @@ def draw_db_boxes_on_file(input_file,
|
|
|
1043
1055
|
classes = [0] * len(boxes)
|
|
1044
1056
|
|
|
1045
1057
|
render_db_bounding_boxes(boxes, classes, image, original_size=None,
|
|
1046
|
-
|
|
1058
|
+
label_map=label_map, thickness=thickness, expansion=expansion)
|
|
1047
1059
|
|
|
1048
1060
|
image.save(output_file)
|
|
1049
1061
|
|
|
@@ -1125,7 +1137,6 @@ def gray_scale_fraction(image,crop_size=(0.1,0.1)):
|
|
|
1125
1137
|
if r == g and r == b and g == b:
|
|
1126
1138
|
n_gray_pixels += 1
|
|
1127
1139
|
|
|
1128
|
-
|
|
1129
1140
|
# ...def gray_scale_fraction(...)
|
|
1130
1141
|
|
|
1131
1142
|
|
|
@@ -1376,6 +1387,98 @@ def resize_image_folder(input_folder,
|
|
|
1376
1387
|
# ...def resize_image_folder(...)
|
|
1377
1388
|
|
|
1378
1389
|
|
|
1390
|
+
def get_image_size(im,verbose=False):
|
|
1391
|
+
"""
|
|
1392
|
+
Retrieve the size of an image. Returns None if the image fails to load.
|
|
1393
|
+
|
|
1394
|
+
Args:
|
|
1395
|
+
im (str or PIL.Image): filename or PIL image
|
|
1396
|
+
|
|
1397
|
+
Returns:
|
|
1398
|
+
tuple (w,h), or None if the image fails to load.
|
|
1399
|
+
"""
|
|
1400
|
+
|
|
1401
|
+
image_name = '[in memory]'
|
|
1402
|
+
|
|
1403
|
+
try:
|
|
1404
|
+
if isinstance(im,str):
|
|
1405
|
+
image_name = im
|
|
1406
|
+
im = load_image(im)
|
|
1407
|
+
w = im.width
|
|
1408
|
+
h = im.height
|
|
1409
|
+
if w <= 0 or h <= 0:
|
|
1410
|
+
if verbose:
|
|
1411
|
+
print('Error reading width from image {}: {},{}'.format(
|
|
1412
|
+
image_name,w,h))
|
|
1413
|
+
return None
|
|
1414
|
+
return (w,h)
|
|
1415
|
+
except Exception as e:
|
|
1416
|
+
if verbose:
|
|
1417
|
+
print('Error reading width from image {}: {}'.format(
|
|
1418
|
+
image_name,str(e)))
|
|
1419
|
+
return None
|
|
1420
|
+
|
|
1421
|
+
# ...def get_image_size(...)
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def parallel_get_image_sizes(filenames,
|
|
1425
|
+
max_workers=16,
|
|
1426
|
+
use_threads=True,
|
|
1427
|
+
recursive=True,
|
|
1428
|
+
verbose=False):
|
|
1429
|
+
"""
|
|
1430
|
+
Retrieve image sizes for a list or folder of images
|
|
1431
|
+
|
|
1432
|
+
Args:
|
|
1433
|
+
filenames (list or str): a list of image filenames or a folder
|
|
1434
|
+
max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
|
|
1435
|
+
parallelization
|
|
1436
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
1437
|
+
parallelization
|
|
1438
|
+
recursive (bool, optional): if [filenames] is a folder, whether to search recursively for images.
|
|
1439
|
+
Ignored if [filenames] is a list.
|
|
1440
|
+
verbose (bool, optional): enable additional debug output
|
|
1441
|
+
|
|
1442
|
+
Returns:
|
|
1443
|
+
dict: a dict mapping filenames to (w,h) tuples; values will be None for images that fail
|
|
1444
|
+
to load.
|
|
1445
|
+
"""
|
|
1446
|
+
|
|
1447
|
+
if isinstance(filenames,str) and os.path.isdir(filenames):
|
|
1448
|
+
if verbose:
|
|
1449
|
+
print('Enumerating images in {}'.format(filenames))
|
|
1450
|
+
filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
|
|
1451
|
+
|
|
1452
|
+
n_workers = min(max_workers,len(filenames))
|
|
1453
|
+
|
|
1454
|
+
if verbose:
|
|
1455
|
+
print('Getting image sizes for {} images'.format(len(filenames)))
|
|
1456
|
+
|
|
1457
|
+
if n_workers <= 1:
|
|
1458
|
+
|
|
1459
|
+
results = []
|
|
1460
|
+
for filename in filenames:
|
|
1461
|
+
results.append(get_image_size(filename,verbose=verbose))
|
|
1462
|
+
|
|
1463
|
+
else:
|
|
1464
|
+
|
|
1465
|
+
if use_threads:
|
|
1466
|
+
pool = ThreadPool(n_workers)
|
|
1467
|
+
else:
|
|
1468
|
+
pool = Pool(n_workers)
|
|
1469
|
+
|
|
1470
|
+
results = list(tqdm(pool.imap(
|
|
1471
|
+
partial(get_image_size,verbose=verbose),filenames), total=len(filenames)))
|
|
1472
|
+
|
|
1473
|
+
assert len(filenames) == len(results), 'Internal error in parallel_get_image_sizes'
|
|
1474
|
+
|
|
1475
|
+
to_return = {}
|
|
1476
|
+
for i_file,filename in enumerate(filenames):
|
|
1477
|
+
to_return[filename] = results[i_file]
|
|
1478
|
+
|
|
1479
|
+
return to_return
|
|
1480
|
+
|
|
1481
|
+
|
|
1379
1482
|
#%% Image integrity checking functions
|
|
1380
1483
|
|
|
1381
1484
|
def check_image_integrity(filename,modes=None):
|
|
@@ -1494,13 +1597,13 @@ def parallel_check_image_integrity(filenames,
|
|
|
1494
1597
|
with either 'success' or 'error').
|
|
1495
1598
|
"""
|
|
1496
1599
|
|
|
1497
|
-
n_workers = min(max_workers,len(filenames))
|
|
1498
|
-
|
|
1499
1600
|
if isinstance(filenames,str) and os.path.isdir(filenames):
|
|
1500
1601
|
if verbose:
|
|
1501
1602
|
print('Enumerating images in {}'.format(filenames))
|
|
1502
1603
|
filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
|
|
1503
1604
|
|
|
1605
|
+
n_workers = min(max_workers,len(filenames))
|
|
1606
|
+
|
|
1504
1607
|
if verbose:
|
|
1505
1608
|
print('Checking image integrity for {} filenames'.format(len(filenames)))
|
|
1506
1609
|
|