megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +297 -202
- api/batch_processing/data_preparation/manage_video_batch.py +7 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +111 -61
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +56 -72
- api/batch_processing/postprocessing/md_to_labelme.py +119 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- classification/prepare_classification_script.py +191 -191
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +72 -48
- data_management/databases/integrity_check_json_db.py +75 -64
- data_management/databases/subset_json_db.py +1 -1
- data_management/generate_crops_from_cct.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +3 -5
- data_management/importers/noaa_seals_2019.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +2 -2
- data_management/labelme_to_coco.py +264 -127
- data_management/labelme_to_yolo.py +96 -53
- data_management/lila/create_lila_blank_set.py +557 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +1 -1
- data_management/lila/download_lila_subset.py +138 -45
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +16 -10
- data_management/lila/lila_common.py +15 -42
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +14 -31
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_output_to_md_output.py +40 -13
- data_management/yolo_to_coco.py +313 -100
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +1 -1
- detection/run_detector.py +73 -18
- detection/run_detector_batch.py +116 -27
- detection/run_inference_with_yolov5_val.py +135 -27
- detection/run_tiled_inference.py +153 -43
- detection/tf_detector.py +2 -1
- detection/video_utils.py +4 -2
- md_utils/ct_utils.py +101 -6
- md_utils/md_tests.py +264 -17
- md_utils/path_utils.py +326 -47
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +66 -3
- md_utils/write_html_image_list.py +12 -2
- md_visualization/visualization_utils.py +380 -74
- md_visualization/visualize_db.py +41 -10
- md_visualization/visualize_detector_output.py +185 -104
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
|
@@ -23,12 +23,10 @@ import collections
|
|
|
23
23
|
import copy
|
|
24
24
|
import errno
|
|
25
25
|
import io
|
|
26
|
-
import itertools
|
|
27
26
|
import os
|
|
28
27
|
import sys
|
|
29
28
|
import time
|
|
30
29
|
import uuid
|
|
31
|
-
import urllib
|
|
32
30
|
import warnings
|
|
33
31
|
import random
|
|
34
32
|
|
|
@@ -114,10 +112,18 @@ class PostProcessingOptions:
|
|
|
114
112
|
# detections_animal, detections_person, detections_vehicle
|
|
115
113
|
rendering_bypass_sets = []
|
|
116
114
|
|
|
117
|
-
#
|
|
115
|
+
# If this is None, choose a confidence threshold based on the detector version.
|
|
116
|
+
#
|
|
117
|
+
# This can either be a float or a dictionary mapping category names (not IDs) to
|
|
118
|
+
# thresholds. The category "default" can be used to specify thresholds for
|
|
119
|
+
# other categories. Currently the use of a dict here is not supported when
|
|
120
|
+
# ground truth is supplied.
|
|
118
121
|
confidence_threshold = None
|
|
119
122
|
|
|
120
123
|
# Confidence threshold to apply to classification (not detection) results
|
|
124
|
+
#
|
|
125
|
+
# Only a float is supported here (unlike the "confidence_threshold" parameter, which
|
|
126
|
+
# can be a dict).
|
|
121
127
|
classification_confidence_threshold = 0.5
|
|
122
128
|
|
|
123
129
|
# Used for summary statistics only
|
|
@@ -163,6 +169,9 @@ class PostProcessingOptions:
|
|
|
163
169
|
#
|
|
164
170
|
# Currently only supported when ground truth is unavailable
|
|
165
171
|
include_almost_detections = False
|
|
172
|
+
|
|
173
|
+
# Only a float is supported here (unlike the "confidence_threshold" parameter, which
|
|
174
|
+
# can be a dict).
|
|
166
175
|
almost_detection_confidence_threshold = None
|
|
167
176
|
|
|
168
177
|
# Control rendering parallelization
|
|
@@ -427,12 +436,25 @@ def render_bounding_boxes(
|
|
|
427
436
|
vis_utils.render_db_bounding_boxes(ground_truth_boxes, gt_classes, image,
|
|
428
437
|
original_size=original_size,label_map=label_map,
|
|
429
438
|
thickness=4,expansion=4)
|
|
439
|
+
|
|
440
|
+
# render_detection_bounding_boxes expects either a float or a dict mapping
|
|
441
|
+
# category IDs to names.
|
|
442
|
+
if isinstance(options.confidence_threshold,float):
|
|
443
|
+
rendering_confidence_threshold = options.confidence_threshold
|
|
444
|
+
else:
|
|
445
|
+
category_ids = set()
|
|
446
|
+
for d in detections:
|
|
447
|
+
category_ids.add(d['category'])
|
|
448
|
+
rendering_confidence_threshold = {}
|
|
449
|
+
for category_id in category_ids:
|
|
450
|
+
rendering_confidence_threshold[category_id] = \
|
|
451
|
+
get_threshold_for_category_id(category_id, options, detection_categories)
|
|
430
452
|
|
|
431
453
|
vis_utils.render_detection_bounding_boxes(
|
|
432
454
|
detections, image,
|
|
433
455
|
label_map=detection_categories,
|
|
434
456
|
classification_label_map=classification_categories,
|
|
435
|
-
confidence_threshold=
|
|
457
|
+
confidence_threshold=rendering_confidence_threshold,
|
|
436
458
|
thickness=options.line_thickness,
|
|
437
459
|
expansion=options.box_expansion)
|
|
438
460
|
|
|
@@ -460,7 +482,14 @@ def render_bounding_boxes(
|
|
|
460
482
|
|
|
461
483
|
# Optionally add links back to the original images
|
|
462
484
|
if options.link_images_to_originals and (image_full_path is not None):
|
|
463
|
-
|
|
485
|
+
|
|
486
|
+
# Handling special characters in links has been pushed down into
|
|
487
|
+
# write_html_image_list
|
|
488
|
+
#
|
|
489
|
+
# link_target = image_full_path.replace('\\','/')
|
|
490
|
+
# link_target = urllib.parse.quote(link_target)
|
|
491
|
+
link_target = image_full_path
|
|
492
|
+
info['linkTarget'] = link_target
|
|
464
493
|
|
|
465
494
|
return info
|
|
466
495
|
|
|
@@ -535,15 +564,68 @@ def prepare_html_subpages(images_html, output_dir, options=None):
|
|
|
535
564
|
|
|
536
565
|
# ...prepare_html_subpages()
|
|
537
566
|
|
|
538
|
-
|
|
539
|
-
|
|
567
|
+
|
|
568
|
+
# Determine the confidence threshold we should use for a specific category name
|
|
569
|
+
def get_threshold_for_category_name(category_name,options):
|
|
570
|
+
|
|
571
|
+
if isinstance(options.confidence_threshold,float):
|
|
572
|
+
return options.confidence_threshold
|
|
573
|
+
else:
|
|
574
|
+
assert isinstance(options.confidence_threshold,dict), \
|
|
575
|
+
'confidence_threshold must either be a float or a dict'
|
|
576
|
+
|
|
577
|
+
if category_name in options.confidence_threshold:
|
|
578
|
+
|
|
579
|
+
return options.confidence_threshold[category_name]
|
|
580
|
+
|
|
581
|
+
else:
|
|
582
|
+
assert 'default' in options.confidence_threshold, \
|
|
583
|
+
'category {} not in confidence_threshold dict, and no default supplied'.format(
|
|
584
|
+
category_name)
|
|
585
|
+
return options.confidence_threshold['default']
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
# Determine the confidence threshold we should use for a specific category ID
|
|
589
|
+
#
|
|
590
|
+
# detection_categories is a dict mapping category IDs to names.
|
|
591
|
+
def get_threshold_for_category_id(category_id,options,detection_categories):
|
|
592
|
+
|
|
593
|
+
if isinstance(options.confidence_threshold,float):
|
|
594
|
+
return options.confidence_threshold
|
|
595
|
+
|
|
596
|
+
assert category_id in detection_categories, \
|
|
597
|
+
'Invalid category ID {}'.format(category_id)
|
|
598
|
+
|
|
599
|
+
category_name = detection_categories[category_id]
|
|
600
|
+
|
|
601
|
+
return get_threshold_for_category_name(category_name,options)
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
# Get a sorted list of unique categories (as string IDs) above the threshold for this image
|
|
605
|
+
#
|
|
606
|
+
# "detection_categories" is a dict mapping category IDs to names.
|
|
607
|
+
def get_positive_categories(detections,options,detection_categories):
|
|
540
608
|
positive_categories = set()
|
|
541
609
|
for d in detections:
|
|
542
|
-
|
|
610
|
+
threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
|
|
611
|
+
if d['conf'] >= threshold:
|
|
543
612
|
positive_categories.add(d['category'])
|
|
544
613
|
return sorted(positive_categories)
|
|
545
614
|
|
|
546
615
|
|
|
616
|
+
# Determine whether any positive detections are present in the detection list
|
|
617
|
+
# [detections].
|
|
618
|
+
def has_positive_detection(detections,options,detection_categories):
|
|
619
|
+
|
|
620
|
+
found_positive_detection = False
|
|
621
|
+
for d in detections:
|
|
622
|
+
threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
|
|
623
|
+
if d['conf'] >= threshold:
|
|
624
|
+
found_positive_detection = True
|
|
625
|
+
break
|
|
626
|
+
return found_positive_detection
|
|
627
|
+
|
|
628
|
+
|
|
547
629
|
# Render an image (with no ground truth information)
|
|
548
630
|
#
|
|
549
631
|
# Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
|
|
@@ -573,8 +655,12 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
|
|
|
573
655
|
max_conf = file_info[1]
|
|
574
656
|
detections = file_info[2]
|
|
575
657
|
|
|
658
|
+
# Determine whether any positive detections are present (using a threshold that
|
|
659
|
+
# may vary by category)
|
|
660
|
+
found_positive_detection = has_positive_detection(detections,options,detection_categories)
|
|
661
|
+
|
|
576
662
|
detection_status = DetectionStatus.DS_UNASSIGNED
|
|
577
|
-
if
|
|
663
|
+
if found_positive_detection:
|
|
578
664
|
detection_status = DetectionStatus.DS_POSITIVE
|
|
579
665
|
else:
|
|
580
666
|
if options.include_almost_detections:
|
|
@@ -587,7 +673,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
|
|
|
587
673
|
|
|
588
674
|
if detection_status == DetectionStatus.DS_POSITIVE:
|
|
589
675
|
if options.separate_detections_by_category:
|
|
590
|
-
positive_categories = tuple(get_positive_categories(detections,options))
|
|
676
|
+
positive_categories = tuple(get_positive_categories(detections,options,detection_categories))
|
|
591
677
|
if positive_categories not in detection_categories_to_results_name:
|
|
592
678
|
raise ValueError('Error: {} not in category mapping (file {})'.format(
|
|
593
679
|
str(positive_categories),image_relative_path))
|
|
@@ -703,7 +789,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
|
|
|
703
789
|
f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
|
|
704
790
|
return None
|
|
705
791
|
|
|
706
|
-
detected =
|
|
792
|
+
detected = has_positive_detection(detections, options, detection_categories)
|
|
707
793
|
|
|
708
794
|
if gt_presence and detected:
|
|
709
795
|
if '_classification_accuracy' not in image.keys():
|
|
@@ -766,6 +852,10 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
766
852
|
|
|
767
853
|
ground_truth_indexed_db = None
|
|
768
854
|
|
|
855
|
+
if (options.ground_truth_json_file is not None):
|
|
856
|
+
assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
|
|
857
|
+
'Variable confidence thresholds are not supported when supplying ground truth'
|
|
858
|
+
|
|
769
859
|
if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
|
|
770
860
|
|
|
771
861
|
if options.separate_detections_by_category:
|
|
@@ -791,7 +881,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
791
881
|
# If the caller hasn't supplied results, load them
|
|
792
882
|
if options.api_detection_results is None:
|
|
793
883
|
detections_df, other_fields = load_api_results(
|
|
794
|
-
options.api_output_file,
|
|
884
|
+
options.api_output_file, force_forward_slashes=True,
|
|
795
885
|
filename_replacements=options.api_output_filename_replacements)
|
|
796
886
|
ppresults.api_detection_results = detections_df
|
|
797
887
|
ppresults.api_other_fields = other_fields
|
|
@@ -821,7 +911,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
821
911
|
n_failures = detections_df['failure'].count()
|
|
822
912
|
print('Ignoring {} failed images'.format(n_failures))
|
|
823
913
|
# Explicitly forcing a copy() operation here to suppress "trying to be set
|
|
824
|
-
# on a copy"
|
|
914
|
+
# on a copy" warnings (and associated risks) below.
|
|
825
915
|
detections_df = detections_df[detections_df['failure'].isna()].copy()
|
|
826
916
|
|
|
827
917
|
assert other_fields is not None
|
|
@@ -836,31 +926,24 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
836
926
|
for k, v in classification_categories.items()
|
|
837
927
|
}
|
|
838
928
|
|
|
839
|
-
#
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
else:
|
|
854
|
-
detections_df[det_status] = np.where(
|
|
855
|
-
detections_df['max_detection_conf'] >= options.confidence_threshold,
|
|
856
|
-
DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE)
|
|
857
|
-
|
|
858
|
-
n_positives = sum(detections_df[det_status] == DetectionStatus.DS_POSITIVE)
|
|
929
|
+
# Count detections and almost-detections for reporting purposes
|
|
930
|
+
n_positives = 0
|
|
931
|
+
n_almosts = 0
|
|
932
|
+
|
|
933
|
+
for i_row,row in tqdm(detections_df.iterrows(),total=len(detections_df)):
|
|
934
|
+
|
|
935
|
+
detections = row['detections']
|
|
936
|
+
max_conf = row['max_detection_conf']
|
|
937
|
+
if has_positive_detection(detections, options, detection_categories):
|
|
938
|
+
n_positives += 1
|
|
939
|
+
elif (options.almost_detection_confidence_threshold is not None) and \
|
|
940
|
+
(max_conf >= options.almost_detection_confidence_threshold):
|
|
941
|
+
n_almosts += 1
|
|
942
|
+
|
|
859
943
|
print(f'Finished loading and preprocessing {len(detections_df)} rows '
|
|
860
944
|
f'from detector output, predicted {n_positives} positives.')
|
|
861
945
|
|
|
862
946
|
if options.include_almost_detections:
|
|
863
|
-
n_almosts = sum(detections_df[det_status] == DetectionStatus.DS_ALMOST)
|
|
864
947
|
print('...and {} almost-positives'.format(n_almosts))
|
|
865
948
|
|
|
866
949
|
|
|
@@ -1211,7 +1294,8 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1211
1294
|
for file_info in tqdm(files_to_render):
|
|
1212
1295
|
rendering_results.append(render_image_with_gt(
|
|
1213
1296
|
file_info,ground_truth_indexed_db,
|
|
1214
|
-
detection_categories,classification_categories
|
|
1297
|
+
detection_categories,classification_categories,
|
|
1298
|
+
options=options))
|
|
1215
1299
|
elapsed = time.time() - start_time
|
|
1216
1300
|
|
|
1217
1301
|
# Map all the rendering results in the list rendering_results into the
|
|
@@ -1241,6 +1325,12 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1241
1325
|
image_counts['tp']
|
|
1242
1326
|
)
|
|
1243
1327
|
|
|
1328
|
+
confidence_threshold_string = ''
|
|
1329
|
+
if isinstance(options.confidence_threshold,float):
|
|
1330
|
+
confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
|
|
1331
|
+
else:
|
|
1332
|
+
confidence_threshold_string = str(options.confidence_threshold)
|
|
1333
|
+
|
|
1244
1334
|
index_page = """<html>
|
|
1245
1335
|
{}
|
|
1246
1336
|
<body>
|
|
@@ -1255,7 +1345,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1255
1345
|
|
|
1256
1346
|
<h3>Sample images</h3>
|
|
1257
1347
|
<div class="contentdiv">
|
|
1258
|
-
<p>A sample of {} images, annotated with detections above {
|
|
1348
|
+
<p>A sample of {} images, annotated with detections above confidence {}.</p>
|
|
1259
1349
|
<a href="tp.html">True positives (TP)</a> ({}) ({:0.1%})<br/>
|
|
1260
1350
|
CLASSIFICATION_PLACEHOLDER_1
|
|
1261
1351
|
<a href="tn.html">True negatives (TN)</a> ({}) ({:0.1%})<br/>
|
|
@@ -1265,7 +1355,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1265
1355
|
</div>
|
|
1266
1356
|
""".format(
|
|
1267
1357
|
style_header,job_name_string,model_version_string,
|
|
1268
|
-
image_count,
|
|
1358
|
+
image_count, confidence_threshold_string,
|
|
1269
1359
|
all_tp_count, all_tp_count/total_count,
|
|
1270
1360
|
image_counts['tn'], image_counts['tn']/total_count,
|
|
1271
1361
|
image_counts['fp'], image_counts['fp']/total_count,
|
|
@@ -1275,11 +1365,11 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1275
1365
|
index_page += """
|
|
1276
1366
|
<h3>Detection results</h3>
|
|
1277
1367
|
<div class="contentdiv">
|
|
1278
|
-
<p>At a confidence threshold of {
|
|
1368
|
+
<p>At a confidence threshold of {}, precision={:0.1%}, recall={:0.1%}</p>
|
|
1279
1369
|
<p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
|
|
1280
1370
|
</div>
|
|
1281
1371
|
""".format(
|
|
1282
|
-
|
|
1372
|
+
confidence_threshold_string, precision_at_confidence_threshold, recall_at_confidence_threshold,
|
|
1283
1373
|
len(detections_df), pr_figure_relative_filename
|
|
1284
1374
|
)
|
|
1285
1375
|
|
|
@@ -1345,46 +1435,60 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1345
1435
|
# Accumulate html image structs (in the format expected by write_html_image_list)
|
|
1346
1436
|
# for each category
|
|
1347
1437
|
images_html = collections.defaultdict(list)
|
|
1348
|
-
|
|
1438
|
+
|
|
1349
1439
|
|
|
1350
1440
|
# Add default entries by accessing them for the first time
|
|
1351
1441
|
|
|
1352
|
-
# Maps detection
|
|
1353
|
-
# "detections_human"
|
|
1442
|
+
# Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
|
|
1443
|
+
# result set names, e.g. "detections_human", "detections_cat_truck".
|
|
1354
1444
|
detection_categories_to_results_name = {}
|
|
1355
1445
|
|
|
1356
1446
|
# Keep track of which categories are single-class (e.g. "animal") and which are
|
|
1357
1447
|
# combinations (e.g. "animal_vehicle")
|
|
1358
1448
|
detection_categories_to_category_count = {}
|
|
1359
|
-
|
|
1449
|
+
|
|
1450
|
+
# For the creation of a "non-detections" category
|
|
1451
|
+
images_html['non_detections']
|
|
1360
1452
|
detection_categories_to_category_count['non_detections'] = 0
|
|
1361
|
-
|
|
1453
|
+
|
|
1362
1454
|
|
|
1363
1455
|
if not options.separate_detections_by_category:
|
|
1364
1456
|
# For the creation of a "detections" category
|
|
1365
1457
|
images_html['detections']
|
|
1458
|
+
detection_categories_to_category_count['detections'] = 0
|
|
1366
1459
|
else:
|
|
1367
1460
|
# Add a set of results for each category and combination of categories, e.g.
|
|
1368
1461
|
# "detections_animal_vehicle". When we're using this script for non-MegaDetector
|
|
1369
1462
|
# results, this can generate lots of categories, e.g. detections_bear_bird_cat_dog_pig.
|
|
1370
1463
|
# We'll keep that huge set of combinations in this map, but we'll only write
|
|
1371
1464
|
# out links for the ones that are non-empty.
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1465
|
+
used_combinations = set()
|
|
1466
|
+
|
|
1467
|
+
# row = images_to_visualize.iloc[0]
|
|
1468
|
+
for i_row, row in images_to_visualize.iterrows():
|
|
1469
|
+
detections_this_row = row['detections']
|
|
1470
|
+
above_threshold_category_ids_this_row = set()
|
|
1471
|
+
for detection in detections_this_row:
|
|
1472
|
+
threshold = get_threshold_for_category_id(detection['category'], options, detection_categories)
|
|
1473
|
+
if detection['conf'] >= threshold:
|
|
1474
|
+
above_threshold_category_ids_this_row.add(detection['category'])
|
|
1475
|
+
if len(above_threshold_category_ids_this_row) == 0:
|
|
1476
|
+
continue
|
|
1477
|
+
sorted_categories_this_row = tuple(sorted(above_threshold_category_ids_this_row))
|
|
1478
|
+
used_combinations.add(sorted_categories_this_row)
|
|
1479
|
+
|
|
1480
|
+
for sorted_subset in used_combinations:
|
|
1481
|
+
assert len(sorted_subset) > 0
|
|
1379
1482
|
results_name = 'detections'
|
|
1380
1483
|
for category_id in sorted_subset:
|
|
1381
1484
|
results_name = results_name + '_' + detection_categories[category_id]
|
|
1382
1485
|
images_html[results_name]
|
|
1383
1486
|
detection_categories_to_results_name[sorted_subset] = results_name
|
|
1384
|
-
detection_categories_to_category_count[results_name] = len(sorted_subset)
|
|
1487
|
+
detection_categories_to_category_count[results_name] = len(sorted_subset)
|
|
1385
1488
|
|
|
1386
1489
|
if options.include_almost_detections:
|
|
1387
1490
|
images_html['almost_detections']
|
|
1491
|
+
detection_categories_to_category_count['almost_detections'] = 0
|
|
1388
1492
|
|
|
1389
1493
|
# Create output directories
|
|
1390
1494
|
for res in images_html.keys():
|
|
@@ -1495,9 +1599,15 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1495
1599
|
almost_detection_string = ' (“almost detection” threshold at {:.1%})'.format(
|
|
1496
1600
|
options.almost_detection_confidence_threshold)
|
|
1497
1601
|
|
|
1602
|
+
confidence_threshold_string = ''
|
|
1603
|
+
if isinstance(options.confidence_threshold,float):
|
|
1604
|
+
confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
|
|
1605
|
+
else:
|
|
1606
|
+
confidence_threshold_string = str(options.confidence_threshold)
|
|
1607
|
+
|
|
1498
1608
|
index_page = """<html>\n{}\n<body>\n
|
|
1499
1609
|
<h2>Visualization of results for {}</h2>\n
|
|
1500
|
-
<p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above {
|
|
1610
|
+
<p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above confidence {}{}.</p>\n
|
|
1501
1611
|
|
|
1502
1612
|
<div class="contentdiv">
|
|
1503
1613
|
<p>Model version: {}</p>
|
|
@@ -1505,7 +1615,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1505
1615
|
|
|
1506
1616
|
<h3>Sample images</h3>\n
|
|
1507
1617
|
<div class="contentdiv">\n""".format(
|
|
1508
|
-
style_header, job_name_string, image_count, len(detections_df),
|
|
1618
|
+
style_header, job_name_string, image_count, len(detections_df), confidence_threshold_string,
|
|
1509
1619
|
almost_detection_string, model_version_string)
|
|
1510
1620
|
|
|
1511
1621
|
failure_string = ''
|
|
@@ -1521,7 +1631,17 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1521
1631
|
friendly_name = friendly_name.capitalize()
|
|
1522
1632
|
return friendly_name
|
|
1523
1633
|
|
|
1524
|
-
|
|
1634
|
+
sorted_result_set_names = sorted(list(images_html.keys()))
|
|
1635
|
+
|
|
1636
|
+
result_set_name_to_count = {}
|
|
1637
|
+
for result_set_name in sorted_result_set_names:
|
|
1638
|
+
image_count = image_counts[result_set_name]
|
|
1639
|
+
result_set_name_to_count[result_set_name] = image_count
|
|
1640
|
+
sorted_result_set_names = sorted(sorted_result_set_names,
|
|
1641
|
+
key=lambda x: result_set_name_to_count[x],
|
|
1642
|
+
reverse=True)
|
|
1643
|
+
|
|
1644
|
+
for result_set_name in sorted_result_set_names:
|
|
1525
1645
|
|
|
1526
1646
|
# Don't print classification classes here; we'll do that later with a slightly
|
|
1527
1647
|
# different structure
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
########
|
|
2
|
+
#
|
|
3
|
+
# remap_detection_categories.py
|
|
4
|
+
#
|
|
5
|
+
# Given a MegaDetector results file, remap the category IDs according to a specified
|
|
6
|
+
# dictionary, writing the results to a new file.
|
|
7
|
+
#
|
|
8
|
+
# Currently only supports remapping detection categories, not classification categories.
|
|
9
|
+
#
|
|
10
|
+
########
|
|
11
|
+
|
|
12
|
+
#%% Constants and imports
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
from tqdm import tqdm
|
|
18
|
+
|
|
19
|
+
from md_utils.ct_utils import invert_dictionary
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
#%% Main function
|
|
23
|
+
|
|
24
|
+
def remap_detection_categories(input_file,
|
|
25
|
+
output_file,
|
|
26
|
+
target_category_map,
|
|
27
|
+
extra_category_handling='error',
|
|
28
|
+
overwrite=False):
|
|
29
|
+
"""
|
|
30
|
+
Given a MD results file [input_file], remap the category IDs according to the dictionary
|
|
31
|
+
[target_category_map], writing the results to [output_file]. The remapped dictionary needs to have
|
|
32
|
+
the same category names as the input file's detection_categories dictionary.
|
|
33
|
+
|
|
34
|
+
Currently only supports remapping detection categories, not classification categories.
|
|
35
|
+
|
|
36
|
+
target_category_map can also be a MD results file, in which case we'll use that file's
|
|
37
|
+
detection_categories dictionary.
|
|
38
|
+
|
|
39
|
+
[extra_category_handling] specifies what we should do if categories are present in the source file
|
|
40
|
+
that are not present in the target mapping.
|
|
41
|
+
|
|
42
|
+
'error' == Error in this case.
|
|
43
|
+
'drop_if_unused' == Don't include these in the output file's category mappings if they are unused,
|
|
44
|
+
error if they are.
|
|
45
|
+
'remap' == Remap to unused category IDs. This is reserved for future use, not currently implemented.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
if os.path.exists(output_file) and (not overwrite):
|
|
50
|
+
print('File {} exists, bypassing remapping'.format(output_file))
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
assert os.path.isfile(input_file), \
|
|
54
|
+
'File {} does not exist'.format(input_file)
|
|
55
|
+
|
|
56
|
+
# If "target_category_map" is passed as a filename, load the "detection_categories"
|
|
57
|
+
# dict.
|
|
58
|
+
if isinstance(target_category_map,str):
|
|
59
|
+
target_categories_file = target_category_map
|
|
60
|
+
with open(target_categories_file,'r') as f:
|
|
61
|
+
d = json.load(f)
|
|
62
|
+
target_category_map = d['detection_categories']
|
|
63
|
+
assert isinstance(target_category_map,dict)
|
|
64
|
+
|
|
65
|
+
with open(input_file,'r') as f:
|
|
66
|
+
input_data = json.load(f)
|
|
67
|
+
|
|
68
|
+
input_images = input_data['images']
|
|
69
|
+
input_categories = input_data['detection_categories']
|
|
70
|
+
|
|
71
|
+
# Figure out which categories are actually used
|
|
72
|
+
used_category_ids = set()
|
|
73
|
+
for im in input_images:
|
|
74
|
+
|
|
75
|
+
if 'detections' not in im or im['detections'] is None:
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
for det in im['detections']:
|
|
79
|
+
used_category_ids.add(det['category'])
|
|
80
|
+
used_category_names = [input_categories[cid] for cid in used_category_ids]
|
|
81
|
+
|
|
82
|
+
input_names_set = set(input_categories.values())
|
|
83
|
+
output_names_set = set(target_category_map.values())
|
|
84
|
+
|
|
85
|
+
# category_name = list(input_names_set)[0]
|
|
86
|
+
for category_name in input_names_set:
|
|
87
|
+
if category_name in output_names_set:
|
|
88
|
+
continue
|
|
89
|
+
if extra_category_handling == 'error':
|
|
90
|
+
raise ValueError('Category {} present in source but not in target'.format(category_name))
|
|
91
|
+
elif extra_category_handling == 'drop_if_unused':
|
|
92
|
+
if category_name in used_category_names:
|
|
93
|
+
raise ValueError('Category {} present (and used) in source but not in target'.format(
|
|
94
|
+
category_name))
|
|
95
|
+
else:
|
|
96
|
+
print('Category {} is unused and not present in the target mapping, ignoring'.format(
|
|
97
|
+
category_name))
|
|
98
|
+
continue
|
|
99
|
+
elif extra_category_handling == 'remap':
|
|
100
|
+
raise NotImplementedError('Remapping of extra category IDs not yet implemented')
|
|
101
|
+
else:
|
|
102
|
+
raise ValueError('Unrecognized extra category handling scheme {}'.format(
|
|
103
|
+
extra_category_handling))
|
|
104
|
+
|
|
105
|
+
output_category_name_to_output_category_id = invert_dictionary(target_category_map)
|
|
106
|
+
|
|
107
|
+
input_category_id_to_output_category_id = {}
|
|
108
|
+
for input_category_id in input_categories.keys():
|
|
109
|
+
category_name = input_categories[input_category_id]
|
|
110
|
+
if category_name not in output_category_name_to_output_category_id:
|
|
111
|
+
assert category_name not in used_category_names
|
|
112
|
+
else:
|
|
113
|
+
output_category_id = output_category_name_to_output_category_id[category_name]
|
|
114
|
+
input_category_id_to_output_category_id[input_category_id] = output_category_id
|
|
115
|
+
|
|
116
|
+
# im = input_images[0]
|
|
117
|
+
for im in tqdm(input_images):
|
|
118
|
+
|
|
119
|
+
if 'detections' not in im or im['detections'] is None:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
# det = im['detections'][0]
|
|
123
|
+
for det in im['detections']:
|
|
124
|
+
det['category'] = input_category_id_to_output_category_id[det['category']]
|
|
125
|
+
|
|
126
|
+
input_data['detection_categories'] = target_category_map
|
|
127
|
+
|
|
128
|
+
with open(output_file,'w') as f:
|
|
129
|
+
json.dump(input_data,f,indent=1)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
print('Saved remapped results to {}'.format(output_file))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
#%% Interactive driver
|
|
136
|
+
|
|
137
|
+
if False:
|
|
138
|
+
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
#%%
|
|
142
|
+
|
|
143
|
+
target_categories_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-only_yolov5x6.json'
|
|
144
|
+
target_category_map = target_categories_file
|
|
145
|
+
input_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-goannas-lilablanks_yolov5x6-20240223.json'
|
|
146
|
+
|
|
147
|
+
output_file = input_file.replace('.json','_remapped.json')
|
|
148
|
+
assert output_file != input_file
|
|
149
|
+
overwrite = True
|
|
150
|
+
|
|
151
|
+
extra_category_handling = 'drop_if_unused'
|
|
152
|
+
|
|
153
|
+
remap_detection_categories(input_file=input_file,
|
|
154
|
+
output_file=output_file,
|
|
155
|
+
target_category_map=target_category_map,
|
|
156
|
+
extra_category_handling=extra_category_handling,
|
|
157
|
+
overwrite=overwrite)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
#%% Command-line driver
|
|
161
|
+
|
|
162
|
+
# TODO
|
|
163
|
+
|