megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +297 -202
- api/batch_processing/data_preparation/manage_video_batch.py +7 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +111 -61
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +56 -72
- api/batch_processing/postprocessing/md_to_labelme.py +119 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- classification/prepare_classification_script.py +191 -191
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +72 -48
- data_management/databases/integrity_check_json_db.py +75 -64
- data_management/databases/subset_json_db.py +1 -1
- data_management/generate_crops_from_cct.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +3 -5
- data_management/importers/noaa_seals_2019.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +2 -2
- data_management/labelme_to_coco.py +264 -127
- data_management/labelme_to_yolo.py +96 -53
- data_management/lila/create_lila_blank_set.py +557 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +1 -1
- data_management/lila/download_lila_subset.py +138 -45
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +16 -10
- data_management/lila/lila_common.py +15 -42
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +14 -31
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_output_to_md_output.py +40 -13
- data_management/yolo_to_coco.py +313 -100
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +1 -1
- detection/run_detector.py +73 -18
- detection/run_detector_batch.py +116 -27
- detection/run_inference_with_yolov5_val.py +135 -27
- detection/run_tiled_inference.py +153 -43
- detection/tf_detector.py +2 -1
- detection/video_utils.py +4 -2
- md_utils/ct_utils.py +101 -6
- md_utils/md_tests.py +264 -17
- md_utils/path_utils.py +326 -47
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +66 -3
- md_utils/write_html_image_list.py +12 -2
- md_visualization/visualization_utils.py +380 -74
- md_visualization/visualize_db.py +41 -10
- md_visualization/visualize_detector_output.py +185 -104
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
data_management/read_exif.py
CHANGED
|
@@ -48,9 +48,18 @@ class ReadExifOptions:
|
|
|
48
48
|
#
|
|
49
49
|
# Not relevant if n_workers is 1.
|
|
50
50
|
use_threads = True
|
|
51
|
-
|
|
51
|
+
|
|
52
|
+
# "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
53
|
+
# doesn't come from EXIF, rather from the file (e.g. file size).
|
|
52
54
|
tag_types_to_ignore = set(['File','ExifTool'])
|
|
53
55
|
|
|
56
|
+
# Include/exclude specific tags (mutually incompatible)
|
|
57
|
+
tags_to_include = None
|
|
58
|
+
tags_to_exclude = None
|
|
59
|
+
|
|
60
|
+
# A useful set of tags one might want to limit queries for
|
|
61
|
+
# options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
|
|
62
|
+
|
|
54
63
|
exiftool_command_name = 'exiftool'
|
|
55
64
|
|
|
56
65
|
# How should we handle byte-formatted EXIF tags?
|
|
@@ -62,16 +71,17 @@ class ReadExifOptions:
|
|
|
62
71
|
|
|
63
72
|
# Should we use exiftool or pil?
|
|
64
73
|
processing_library = 'pil' # 'exiftool','pil'
|
|
65
|
-
|
|
74
|
+
|
|
75
|
+
|
|
66
76
|
|
|
67
77
|
#%% Functions
|
|
68
78
|
|
|
69
|
-
def enumerate_files(input_folder):
|
|
79
|
+
def enumerate_files(input_folder,recursive=True):
|
|
70
80
|
"""
|
|
71
81
|
Enumerates all image files in input_folder, returning relative paths
|
|
72
82
|
"""
|
|
73
83
|
|
|
74
|
-
image_files = find_images(input_folder,recursive=
|
|
84
|
+
image_files = find_images(input_folder,recursive=recursive)
|
|
75
85
|
image_files = [os.path.relpath(s,input_folder) for s in image_files]
|
|
76
86
|
image_files = [s.replace('\\','/') for s in image_files]
|
|
77
87
|
print('Enumerated {} files'.format(len(image_files)))
|
|
@@ -99,7 +109,7 @@ def get_exif_ifd(exif):
|
|
|
99
109
|
def read_pil_exif(im,options=None):
|
|
100
110
|
"""
|
|
101
111
|
Read all the EXIF data we know how to read from [im] (path or PIL Image), whether it's
|
|
102
|
-
in the PIL default EXIF data or not.
|
|
112
|
+
in the PIL default EXIF data or not. Returns a dict.
|
|
103
113
|
"""
|
|
104
114
|
|
|
105
115
|
if options is None:
|
|
@@ -192,6 +202,32 @@ def parse_exif_datetime_string(s,verbose=False):
|
|
|
192
202
|
return dt
|
|
193
203
|
|
|
194
204
|
|
|
205
|
+
def _filter_tags(tags,options):
|
|
206
|
+
"""
|
|
207
|
+
Internal function used to include/exclude specific tags from the exif_tags
|
|
208
|
+
dict.
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
if options is None:
|
|
212
|
+
return tags
|
|
213
|
+
if options.tags_to_include is None and options.tags_to_exclude is None:
|
|
214
|
+
return tags
|
|
215
|
+
if options.tags_to_include is not None:
|
|
216
|
+
assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
217
|
+
tags_to_return = {}
|
|
218
|
+
for tag_name in tags.keys():
|
|
219
|
+
if tag_name in options.tags_to_include:
|
|
220
|
+
tags_to_return[tag_name] = tags[tag_name]
|
|
221
|
+
return tags_to_return
|
|
222
|
+
if options.tags_to_exclude is not None:
|
|
223
|
+
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
224
|
+
tags_to_return = {}
|
|
225
|
+
for tag_name in tags.keys():
|
|
226
|
+
if tag_name not in options.tags_to_exclude:
|
|
227
|
+
tags_to_return[tag_name] = tags[tag_name]
|
|
228
|
+
return tags_to_return
|
|
229
|
+
|
|
230
|
+
|
|
195
231
|
def read_exif_tags_for_image(file_path,options=None):
|
|
196
232
|
"""
|
|
197
233
|
Get relevant fields from EXIF data for an image
|
|
@@ -227,8 +263,8 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
227
263
|
result['status'] = 'empty_read'
|
|
228
264
|
else:
|
|
229
265
|
result['status'] = 'success'
|
|
230
|
-
result['tags'] = exif_tags
|
|
231
|
-
|
|
266
|
+
result['tags'] = _filter_tags(exif_tags,options)
|
|
267
|
+
|
|
232
268
|
return result
|
|
233
269
|
|
|
234
270
|
elif options.processing_library == 'exiftool':
|
|
@@ -283,9 +319,12 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
283
319
|
print('Ignoring tag with type {}'.format(field_type))
|
|
284
320
|
continue
|
|
285
321
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
322
|
+
field_name = field_name_type_tokens[1].strip()
|
|
323
|
+
if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
|
|
324
|
+
continue
|
|
325
|
+
if options.tags_to_include is not None and field_name not in options.tags_to_include:
|
|
326
|
+
continue
|
|
327
|
+
tag = [field_type,field_name,field_value]
|
|
289
328
|
|
|
290
329
|
exif_tags.append(tag)
|
|
291
330
|
|
|
@@ -350,20 +389,22 @@ def populate_exif_data(im, image_base, options=None):
|
|
|
350
389
|
# ...populate_exif_data()
|
|
351
390
|
|
|
352
391
|
|
|
353
|
-
def create_image_objects(image_files):
|
|
392
|
+
def create_image_objects(image_files,recursive=True):
|
|
354
393
|
"""
|
|
355
394
|
Create empty image objects for every image in [image_files], which can be a
|
|
356
395
|
list of relative paths (which will get stored without processing, so the base
|
|
357
396
|
path doesn't matter here), or a folder name.
|
|
358
397
|
|
|
359
398
|
Returns a list of dicts with field 'file_name' (a relative path).
|
|
399
|
+
|
|
400
|
+
"recursive" is ignored if "image_files" is a list.
|
|
360
401
|
"""
|
|
361
402
|
|
|
362
403
|
# Enumerate *relative* paths
|
|
363
404
|
if isinstance(image_files,str):
|
|
364
405
|
print('Enumerating image files in {}'.format(image_files))
|
|
365
406
|
assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
|
|
366
|
-
image_files = enumerate_files(image_files)
|
|
407
|
+
image_files = enumerate_files(image_files,recursive=recursive)
|
|
367
408
|
|
|
368
409
|
images = []
|
|
369
410
|
for fn in image_files:
|
|
@@ -499,7 +540,7 @@ def is_executable(name):
|
|
|
499
540
|
return which(name) is not None
|
|
500
541
|
|
|
501
542
|
|
|
502
|
-
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None):
|
|
543
|
+
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
|
|
503
544
|
"""
|
|
504
545
|
Read EXIF data for all images in input_folder.
|
|
505
546
|
|
|
@@ -516,6 +557,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
516
557
|
if options is None:
|
|
517
558
|
options = ReadExifOptions()
|
|
518
559
|
|
|
560
|
+
# Validate options
|
|
561
|
+
if options.tags_to_include is not None:
|
|
562
|
+
assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
563
|
+
if options.tags_to_exclude is not None:
|
|
564
|
+
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
565
|
+
|
|
519
566
|
if input_folder is None:
|
|
520
567
|
input_folder = ''
|
|
521
568
|
if len(input_folder) > 0:
|
|
@@ -542,7 +589,7 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
542
589
|
assert is_executable(options.exiftool_command_name), 'exiftool not available'
|
|
543
590
|
|
|
544
591
|
if filenames is None:
|
|
545
|
-
images = create_image_objects(input_folder)
|
|
592
|
+
images = create_image_objects(input_folder,recursive=recursive)
|
|
546
593
|
else:
|
|
547
594
|
assert isinstance(filenames,list)
|
|
548
595
|
images = create_image_objects(filenames)
|
|
@@ -567,14 +614,16 @@ if False:
|
|
|
567
614
|
|
|
568
615
|
#%%
|
|
569
616
|
|
|
570
|
-
input_folder =
|
|
571
|
-
output_file =
|
|
617
|
+
input_folder = r'C:\temp\md-name-testing'
|
|
618
|
+
output_file = None # r'C:\temp\md-name-testing\exif.json'
|
|
572
619
|
options = ReadExifOptions()
|
|
573
620
|
options.verbose = False
|
|
574
621
|
options.n_workers = 10
|
|
575
622
|
options.use_threads = False
|
|
576
623
|
options.processing_library = 'pil'
|
|
577
624
|
# options.processing_library = 'exiftool'
|
|
625
|
+
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
|
|
626
|
+
# options.tags_to_exclude = ['MakerNote']
|
|
578
627
|
|
|
579
628
|
results = read_exif_from_folder(input_folder,output_file,options)
|
|
580
629
|
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
########
|
|
2
|
+
#
|
|
3
|
+
# remap_coco_categories.py
|
|
4
|
+
#
|
|
5
|
+
# Given a COCO-formatted dataset, remap the categories to a new mapping.
|
|
6
|
+
#
|
|
7
|
+
########
|
|
8
|
+
|
|
9
|
+
#%% Imports and constants
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
from copy import deepcopy
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#%% Main function
|
|
18
|
+
|
|
19
|
+
def remap_coco_categories(input_data,
|
|
20
|
+
output_category_name_to_id,
|
|
21
|
+
input_category_name_to_output_category_name,
|
|
22
|
+
output_file=None):
|
|
23
|
+
"""
|
|
24
|
+
Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
|
|
25
|
+
writing the results to a new file.
|
|
26
|
+
|
|
27
|
+
output_category_name_to_id is a dict mapping strings to ints.
|
|
28
|
+
|
|
29
|
+
input_category_name_to_output_category_name is a dict mapping strings to strings.
|
|
30
|
+
|
|
31
|
+
[input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
|
|
32
|
+
not modified in place.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
if isinstance(input_data,str):
|
|
36
|
+
assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
|
|
37
|
+
with open(input_data,'r') as f:
|
|
38
|
+
input_data = json.load(f)
|
|
39
|
+
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
40
|
+
else:
|
|
41
|
+
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
42
|
+
input_data = deepcopy(input_data)
|
|
43
|
+
|
|
44
|
+
# It's safe to modify in-place now
|
|
45
|
+
output_data = input_data
|
|
46
|
+
|
|
47
|
+
# Read input name --> ID mapping
|
|
48
|
+
input_category_name_to_input_category_id = {}
|
|
49
|
+
for c in input_data['categories']:
|
|
50
|
+
input_category_name_to_input_category_id[c['name']] = c['id']
|
|
51
|
+
|
|
52
|
+
# Map input IDs --> output IDs
|
|
53
|
+
input_category_id_to_output_category_id = {}
|
|
54
|
+
for input_name in input_category_name_to_output_category_name.keys():
|
|
55
|
+
output_name = input_category_name_to_output_category_name[input_name]
|
|
56
|
+
assert output_name in output_category_name_to_id, \
|
|
57
|
+
'No output ID for {} --> {}'.format(input_name,output_name)
|
|
58
|
+
input_id = input_category_name_to_input_category_id[input_name]
|
|
59
|
+
output_id = output_category_name_to_id[output_name]
|
|
60
|
+
input_category_id_to_output_category_id[input_id] = output_id
|
|
61
|
+
|
|
62
|
+
# Map annotations
|
|
63
|
+
for ann in output_data['annotations']:
|
|
64
|
+
assert ann['category_id'] in input_category_id_to_output_category_id, \
|
|
65
|
+
'Unrecognized category ID {}'.format(ann['category_id'])
|
|
66
|
+
ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
|
|
67
|
+
|
|
68
|
+
# Update the category list
|
|
69
|
+
output_categories = []
|
|
70
|
+
for output_name in output_category_name_to_id:
|
|
71
|
+
category = {'name':output_name,'id':output_category_name_to_id[output_name]}
|
|
72
|
+
output_categories.append(category)
|
|
73
|
+
output_data['categories'] = output_categories
|
|
74
|
+
|
|
75
|
+
if output_file is not None:
|
|
76
|
+
with open(output_file,'w') as f:
|
|
77
|
+
json.dump(output_data,f,indent=1)
|
|
78
|
+
|
|
79
|
+
return input_data
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
#%% Command-line driver
|
|
83
|
+
|
|
84
|
+
# TODO
|
|
@@ -26,8 +26,7 @@ from md_visualization.visualization_utils import \
|
|
|
26
26
|
def resize_coco_dataset(input_folder,input_filename,
|
|
27
27
|
output_folder,output_filename,
|
|
28
28
|
target_size=(-1,-1),
|
|
29
|
-
correct_size_image_handling='copy'
|
|
30
|
-
right_edge_quantization_threshold=None):
|
|
29
|
+
correct_size_image_handling='copy'):
|
|
31
30
|
"""
|
|
32
31
|
Given a COCO-formatted dataset (images in input_folder, data in input_filename), resize
|
|
33
32
|
all the images to a target size (in output_folder) and scale bounding boxes accordingly
|
|
@@ -36,7 +35,7 @@ def resize_coco_dataset(input_folder,input_filename,
|
|
|
36
35
|
target_size should be a tuple/list of ints, length 2. If either dimension is -1, aspect ratio
|
|
37
36
|
will be preserved. If both dimensions are -1, this means "keep the original size". If
|
|
38
37
|
both dimensions are -1 and correct_size_image_handling is copy, this function is basically
|
|
39
|
-
a no-op
|
|
38
|
+
a no-op.
|
|
40
39
|
|
|
41
40
|
correct_size_image_handling can be 'copy' (in which case the original image is just copied
|
|
42
41
|
to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
|
|
@@ -44,11 +43,7 @@ def resize_coco_dataset(input_folder,input_filename,
|
|
|
44
43
|
you're superstitious about biases coming from images in a training set being written
|
|
45
44
|
by different image encoders.
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
boxes that really should be running off the right side of the image only extend like 99%
|
|
49
|
-
of the way there, due to what appears to be a slight bias inherent to MD. If a box extends
|
|
50
|
-
within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
|
|
51
|
-
0.02) of the right edge of the image, it will be extended to the far right edge.
|
|
46
|
+
Returns the COCO database with resized images.
|
|
52
47
|
"""
|
|
53
48
|
|
|
54
49
|
# Read input data
|
|
@@ -62,7 +57,9 @@ def resize_coco_dataset(input_folder,input_filename,
|
|
|
62
57
|
|
|
63
58
|
# For each image
|
|
64
59
|
|
|
65
|
-
#
|
|
60
|
+
# TODO: this is trivially parallelizable
|
|
61
|
+
#
|
|
62
|
+
# im = d['images'][0]
|
|
66
63
|
for im in tqdm(d['images']):
|
|
67
64
|
|
|
68
65
|
input_fn_relative = im['file_name']
|
|
@@ -122,15 +119,6 @@ def resize_coco_dataset(input_folder,input_filename,
|
|
|
122
119
|
bbox[2] * width_scale,
|
|
123
120
|
bbox[3] * height_scale]
|
|
124
121
|
|
|
125
|
-
# Do we need to quantize this box?
|
|
126
|
-
if right_edge_quantization_threshold is not None and \
|
|
127
|
-
right_edge_quantization_threshold > 0:
|
|
128
|
-
bbox_right_edge_abs = bbox[0] + bbox[2]
|
|
129
|
-
bbox_right_edge_norm = bbox_right_edge_abs / output_w
|
|
130
|
-
bbox_right_edge_distance = (1.0 - bbox_right_edge_norm)
|
|
131
|
-
if bbox_right_edge_distance < right_edge_quantization_threshold:
|
|
132
|
-
bbox[2] = output_w - bbox[0]
|
|
133
|
-
|
|
134
122
|
ann['bbox'] = bbox
|
|
135
123
|
|
|
136
124
|
# ...if this annotation has a box
|
|
@@ -143,6 +131,8 @@ def resize_coco_dataset(input_folder,input_filename,
|
|
|
143
131
|
with open(output_filename,'w') as f:
|
|
144
132
|
json.dump(d,f,indent=1)
|
|
145
133
|
|
|
134
|
+
return d
|
|
135
|
+
|
|
146
136
|
# ...def resize_coco_dataset(...)
|
|
147
137
|
|
|
148
138
|
|
|
@@ -153,27 +143,20 @@ if False:
|
|
|
153
143
|
pass
|
|
154
144
|
|
|
155
145
|
#%% Test resizing
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
# target_size = (600,-1)
|
|
160
|
-
|
|
161
|
-
input_folder = os.path.expanduser('~/data/usgs-kissel-training')
|
|
162
|
-
input_filename = os.path.expanduser('~/data/usgs-tegus.json')
|
|
146
|
+
|
|
147
|
+
input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
|
|
148
|
+
input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
|
|
163
149
|
target_size = (1600,-1)
|
|
164
150
|
|
|
165
|
-
output_filename = insert_before_extension(input_filename,'resized')
|
|
166
|
-
output_folder = input_folder + '-resized'
|
|
151
|
+
output_filename = insert_before_extension(input_filename,'resized-test')
|
|
152
|
+
output_folder = input_folder + '-resized-test'
|
|
167
153
|
|
|
168
154
|
correct_size_image_handling = 'rewrite'
|
|
169
155
|
|
|
170
|
-
right_edge_quantization_threshold = 0.015
|
|
171
|
-
|
|
172
156
|
resize_coco_dataset(input_folder,input_filename,
|
|
173
157
|
output_folder,output_filename,
|
|
174
158
|
target_size=target_size,
|
|
175
|
-
correct_size_image_handling=correct_size_image_handling
|
|
176
|
-
right_edge_quantization_threshold=right_edge_quantization_threshold)
|
|
159
|
+
correct_size_image_handling=correct_size_image_handling)
|
|
177
160
|
|
|
178
161
|
|
|
179
162
|
#%% Preview
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
########
|
|
2
|
+
#
|
|
3
|
+
# wi_download_csv_to_coco.py
|
|
4
|
+
#
|
|
5
|
+
# Convert a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
|
|
6
|
+
#
|
|
7
|
+
# Currently assumes that common names are unique identifiers, which is convenient but unreliable.
|
|
8
|
+
#
|
|
9
|
+
########
|
|
10
|
+
|
|
11
|
+
#%% Imports and constants
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import json
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from tqdm import tqdm
|
|
19
|
+
from collections import defaultdict
|
|
20
|
+
|
|
21
|
+
from md_visualization import visualization_utils as vis_utils
|
|
22
|
+
|
|
23
|
+
wi_extra_annotation_columns = \
|
|
24
|
+
('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
|
|
25
|
+
'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
|
|
26
|
+
'behavior','highlighted','markings')
|
|
27
|
+
|
|
28
|
+
wi_extra_image_columns = ('project_id','deployment_id')
|
|
29
|
+
|
|
30
|
+
def make_location_id(project_id,deployment_id):
|
|
31
|
+
return 'project_' + str(project_id) + '_deployment_' + deployment_id
|
|
32
|
+
|
|
33
|
+
def isnan(v):
|
|
34
|
+
try:
|
|
35
|
+
return np.isnan(v)
|
|
36
|
+
except Exception:
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
default_category_remappings = {
|
|
40
|
+
'Homo Species':'Human',
|
|
41
|
+
'Human-Camera Trapper':'Human',
|
|
42
|
+
'No CV Result':'Unknown'
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
#%%
|
|
47
|
+
|
|
48
|
+
def wi_download_csv_to_coco(csv_file_in,
|
|
49
|
+
coco_file_out=None,
|
|
50
|
+
image_folder=None,
|
|
51
|
+
validate_images=False,
|
|
52
|
+
gs_prefix=None,
|
|
53
|
+
verbose=True,
|
|
54
|
+
category_remappings=default_category_remappings):
|
|
55
|
+
"""
|
|
56
|
+
Convert a .csv file from a Wildlife Insights project export to a COCO
|
|
57
|
+
camera traps .json file.
|
|
58
|
+
|
|
59
|
+
If [coco_file_out] is None, uses [csv_file_in].json
|
|
60
|
+
|
|
61
|
+
gs_prefix is a string to remove from GS URLs to convert to path names... for example, if
|
|
62
|
+
your gs:// URLs look like:
|
|
63
|
+
|
|
64
|
+
gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg
|
|
65
|
+
|
|
66
|
+
...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
|
|
67
|
+
the .json file will look like:
|
|
68
|
+
|
|
69
|
+
55554/dfadfasdfs.jpg
|
|
70
|
+
|
|
71
|
+
exclude_re discards matching images; typically use to omit thumbnail images.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
#%% Create COCO dictionaries
|
|
75
|
+
|
|
76
|
+
category_name_to_id = {}
|
|
77
|
+
category_name_to_id['empty'] = 0
|
|
78
|
+
|
|
79
|
+
df = pd.read_csv(csv_file_in)
|
|
80
|
+
|
|
81
|
+
print('Read {} rows from {}'.format(len(df),csv_file_in))
|
|
82
|
+
|
|
83
|
+
image_id_to_image = {}
|
|
84
|
+
image_id_to_annotations = defaultdict(list)
|
|
85
|
+
|
|
86
|
+
# i_row = 0; row = df.iloc[i_row]
|
|
87
|
+
for i_row,row in df.iterrows():
|
|
88
|
+
|
|
89
|
+
image_id = row['image_id']
|
|
90
|
+
|
|
91
|
+
if image_id not in image_id_to_image:
|
|
92
|
+
|
|
93
|
+
im = {}
|
|
94
|
+
image_id_to_image[image_id] = im
|
|
95
|
+
|
|
96
|
+
im['id'] = image_id
|
|
97
|
+
|
|
98
|
+
gs_url = row['location']
|
|
99
|
+
assert gs_url.startswith('gs://')
|
|
100
|
+
|
|
101
|
+
file_name = gs_url.replace('gs://','')
|
|
102
|
+
if gs_prefix is not None:
|
|
103
|
+
file_name = file_name.replace(gs_prefix,'')
|
|
104
|
+
|
|
105
|
+
location_id = make_location_id(row['project_id'],row['deployment_id'])
|
|
106
|
+
im['file_name'] = file_name
|
|
107
|
+
im['location'] = location_id
|
|
108
|
+
im['datetime'] = row['timestamp']
|
|
109
|
+
|
|
110
|
+
im['wi_image_info'] = {}
|
|
111
|
+
for s in wi_extra_image_columns:
|
|
112
|
+
im['wi_image_info'][s] = str(row[s])
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
|
|
116
|
+
im = image_id_to_image[image_id]
|
|
117
|
+
assert im['datetime'] == row['timestamp']
|
|
118
|
+
location_id = make_location_id(row['project_id'],row['deployment_id'])
|
|
119
|
+
assert im['location'] == location_id
|
|
120
|
+
|
|
121
|
+
category_name = row['common_name']
|
|
122
|
+
if category_remappings is not None and category_name in category_remappings:
|
|
123
|
+
category_name = category_remappings[category_name]
|
|
124
|
+
|
|
125
|
+
if category_name == 'Blank':
|
|
126
|
+
category_name = 'empty'
|
|
127
|
+
assert row['is_blank'] == 1
|
|
128
|
+
else:
|
|
129
|
+
assert row['is_blank'] == 0
|
|
130
|
+
assert isinstance(category_name,str)
|
|
131
|
+
if category_name in category_name_to_id:
|
|
132
|
+
category_id = category_name_to_id[category_name]
|
|
133
|
+
else:
|
|
134
|
+
category_id = len(category_name_to_id)
|
|
135
|
+
category_name_to_id[category_name] = category_id
|
|
136
|
+
|
|
137
|
+
ann = {}
|
|
138
|
+
ann['image_id'] = image_id
|
|
139
|
+
annotations_this_image = image_id_to_annotations[image_id]
|
|
140
|
+
annotation_number = len(annotations_this_image)
|
|
141
|
+
ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
|
|
142
|
+
ann['category_id'] = category_id
|
|
143
|
+
annotations_this_image.append(ann)
|
|
144
|
+
|
|
145
|
+
extra_info = {}
|
|
146
|
+
for s in wi_extra_annotation_columns:
|
|
147
|
+
v = row[s]
|
|
148
|
+
if not isnan(v):
|
|
149
|
+
extra_info[s] = v
|
|
150
|
+
ann['wi_extra_info'] = extra_info
|
|
151
|
+
|
|
152
|
+
# ...for each row
|
|
153
|
+
|
|
154
|
+
images = list(image_id_to_image.values())
|
|
155
|
+
categories = []
|
|
156
|
+
for category_name in category_name_to_id:
|
|
157
|
+
category_id = category_name_to_id[category_name]
|
|
158
|
+
categories.append({'id':category_id,'name':category_name})
|
|
159
|
+
annotations = []
|
|
160
|
+
for image_id in image_id_to_annotations:
|
|
161
|
+
annotations_this_image = image_id_to_annotations[image_id]
|
|
162
|
+
for ann in annotations_this_image:
|
|
163
|
+
annotations.append(ann)
|
|
164
|
+
info = {'version':'1.00','description':'converted from WI export'}
|
|
165
|
+
info['source_file'] = csv_file_in
|
|
166
|
+
coco_data = {}
|
|
167
|
+
coco_data['info'] = info
|
|
168
|
+
coco_data['images'] = images
|
|
169
|
+
coco_data['annotations'] = annotations
|
|
170
|
+
coco_data['categories'] = categories
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
##%% Validate images, add sizes
|
|
174
|
+
|
|
175
|
+
if validate_images:
|
|
176
|
+
|
|
177
|
+
print('Validating images')
|
|
178
|
+
# TODO: trivially parallelizable
|
|
179
|
+
|
|
180
|
+
assert os.path.isdir(image_folder), \
|
|
181
|
+
'Must specify a valid image folder if you specify validate_images=True'
|
|
182
|
+
|
|
183
|
+
# im = images[0]
|
|
184
|
+
for im in tqdm(images):
|
|
185
|
+
file_name_relative = im['file_name']
|
|
186
|
+
file_name_abs = os.path.join(image_folder,file_name_relative)
|
|
187
|
+
assert os.path.isfile(file_name_abs)
|
|
188
|
+
|
|
189
|
+
im['corrupt'] = False
|
|
190
|
+
try:
|
|
191
|
+
pil_im = vis_utils.load_image(file_name_abs)
|
|
192
|
+
except Exception:
|
|
193
|
+
im['corrupt'] = True
|
|
194
|
+
if not im['corrupt']:
|
|
195
|
+
im['width'] = pil_im.width
|
|
196
|
+
im['height'] = pil_im.height
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
##%% Write output json
|
|
200
|
+
|
|
201
|
+
if coco_file_out is None:
|
|
202
|
+
|
|
203
|
+
coco_file_out = csv_file_in + '.json'
|
|
204
|
+
|
|
205
|
+
with open(coco_file_out,'w') as f:
|
|
206
|
+
json.dump(coco_data,f,indent=1)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
##%% Validate output
|
|
210
|
+
|
|
211
|
+
from data_management.databases.integrity_check_json_db import \
|
|
212
|
+
IntegrityCheckOptions,integrity_check_json_db
|
|
213
|
+
options = IntegrityCheckOptions()
|
|
214
|
+
options.baseDir = image_folder
|
|
215
|
+
options.bCheckImageExistence = True
|
|
216
|
+
options.verbose = verbose
|
|
217
|
+
_ = integrity_check_json_db(coco_file_out,options)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
#%% Interactive driver
|
|
222
|
+
|
|
223
|
+
if False:
|
|
224
|
+
|
|
225
|
+
#%%
|
|
226
|
+
|
|
227
|
+
base_folder = r'a/b/c'
|
|
228
|
+
csv_file_in = os.path.join(base_folder,'images.csv')
|
|
229
|
+
coco_file_out = None
|
|
230
|
+
gs_prefix = 'a_b_c_main/'
|
|
231
|
+
image_folder = os.path.join(base_folder,'images')
|
|
232
|
+
validate_images = False
|
|
233
|
+
verbose = True
|
|
234
|
+
category_remappings = default_category_remappings
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
#%% Command-line driver
|
|
238
|
+
|
|
239
|
+
# TODO
|
|
@@ -61,21 +61,37 @@ from detection.run_detector import CONF_DIGITS, COORD_DIGITS
|
|
|
61
61
|
|
|
62
62
|
def read_classes_from_yolo_dataset_file(fn):
|
|
63
63
|
"""
|
|
64
|
-
Read a dictionary mapping integer class IDs to class names from a YOLOv5
|
|
65
|
-
file.
|
|
64
|
+
Read a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
|
|
65
|
+
dataset.yaml file or a .json file. A .json file should contain a dictionary mapping
|
|
66
|
+
integer category IDs to string category names.
|
|
66
67
|
"""
|
|
67
68
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
69
|
+
if fn.endswith('.yml') or fn.endswith('.yaml'):
|
|
70
|
+
|
|
71
|
+
with open(fn,'r') as f:
|
|
72
|
+
lines = f.readlines()
|
|
73
|
+
|
|
74
|
+
category_id_to_name = {}
|
|
75
|
+
pat = '\d+:.+'
|
|
76
|
+
for s in lines:
|
|
77
|
+
if re.search(pat,s) is not None:
|
|
78
|
+
tokens = s.split(':')
|
|
79
|
+
assert len(tokens) == 2, 'Invalid token in category file {}'.format(fn)
|
|
80
|
+
category_id_to_name[int(tokens[0].strip())] = tokens[1].strip()
|
|
81
|
+
|
|
82
|
+
elif fn.endswith('.json'):
|
|
83
|
+
|
|
84
|
+
with open(fn,'r') as f:
|
|
85
|
+
d_in = json.load(f)
|
|
86
|
+
category_id_to_name = {}
|
|
87
|
+
for k in d_in.keys():
|
|
88
|
+
category_id_to_name[int(k)] = d_in[k]
|
|
78
89
|
|
|
90
|
+
else:
|
|
91
|
+
|
|
92
|
+
raise ValueError('Unrecognized category file type: {}'.format(fn))
|
|
93
|
+
|
|
94
|
+
assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
|
|
79
95
|
return category_id_to_name
|
|
80
96
|
|
|
81
97
|
|
|
@@ -125,7 +141,8 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
|
|
|
125
141
|
if image_id_to_error is None:
|
|
126
142
|
image_id_to_error = {}
|
|
127
143
|
|
|
128
|
-
print('Converting {} to MD format'.format(
|
|
144
|
+
print('Converting {} to MD format and writing results to {}'.format(
|
|
145
|
+
yolo_json_file,output_file))
|
|
129
146
|
|
|
130
147
|
if isinstance(yolo_category_id_to_name,str):
|
|
131
148
|
assert os.path.isfile(yolo_category_id_to_name), \
|
|
@@ -194,6 +211,16 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
|
|
|
194
211
|
|
|
195
212
|
# ...if image IDs are formatted as integers in YOLO output
|
|
196
213
|
|
|
214
|
+
# In a modified version of val.py, we use negative category IDs to indicate an error
|
|
215
|
+
# that happened during inference (typically truncated images with valid headers,
|
|
216
|
+
# so corruption was not detected during val.py's initial corruption check pass.
|
|
217
|
+
for det in detections:
|
|
218
|
+
if det['category_id'] < 0:
|
|
219
|
+
assert 'error' in det, 'Negative category ID present with no error string'
|
|
220
|
+
error_string = det['error']
|
|
221
|
+
print('Caught inference-time failure {} for image {}'.format(error_string,det['image_id']))
|
|
222
|
+
image_id_to_error[det['image_id']] = error_string
|
|
223
|
+
|
|
197
224
|
output_images = []
|
|
198
225
|
|
|
199
226
|
# image_file_relative = image_files_relative[10]
|