megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -23,12 +23,10 @@ import collections
23
23
  import copy
24
24
  import errno
25
25
  import io
26
- import itertools
27
26
  import os
28
27
  import sys
29
28
  import time
30
29
  import uuid
31
- import urllib
32
30
  import warnings
33
31
  import random
34
32
 
@@ -114,10 +112,18 @@ class PostProcessingOptions:
114
112
  # detections_animal, detections_person, detections_vehicle
115
113
  rendering_bypass_sets = []
116
114
 
117
- # By default, choose a confidence threshold based on the detector version
115
+ # If this is None, choose a confidence threshold based on the detector version.
116
+ #
117
+ # This can either be a float or a dictionary mapping category names (not IDs) to
118
+ # thresholds. The category "default" can be used to specify thresholds for
119
+ # other categories. Currently the use of a dict here is not supported when
120
+ # ground truth is supplied.
118
121
  confidence_threshold = None
119
122
 
120
123
  # Confidence threshold to apply to classification (not detection) results
124
+ #
125
+ # Only a float is supported here (unlike the "confidence_threshold" parameter, which
126
+ # can be a dict).
121
127
  classification_confidence_threshold = 0.5
122
128
 
123
129
  # Used for summary statistics only
@@ -163,6 +169,9 @@ class PostProcessingOptions:
163
169
  #
164
170
  # Currently only supported when ground truth is unavailable
165
171
  include_almost_detections = False
172
+
173
+ # Only a float is supported here (unlike the "confidence_threshold" parameter, which
174
+ # can be a dict).
166
175
  almost_detection_confidence_threshold = None
167
176
 
168
177
  # Control rendering parallelization
@@ -427,12 +436,25 @@ def render_bounding_boxes(
427
436
  vis_utils.render_db_bounding_boxes(ground_truth_boxes, gt_classes, image,
428
437
  original_size=original_size,label_map=label_map,
429
438
  thickness=4,expansion=4)
439
+
440
+ # render_detection_bounding_boxes expects either a float or a dict mapping
441
+ # category IDs to names.
442
+ if isinstance(options.confidence_threshold,float):
443
+ rendering_confidence_threshold = options.confidence_threshold
444
+ else:
445
+ category_ids = set()
446
+ for d in detections:
447
+ category_ids.add(d['category'])
448
+ rendering_confidence_threshold = {}
449
+ for category_id in category_ids:
450
+ rendering_confidence_threshold[category_id] = \
451
+ get_threshold_for_category_id(category_id, options, detection_categories)
430
452
 
431
453
  vis_utils.render_detection_bounding_boxes(
432
454
  detections, image,
433
455
  label_map=detection_categories,
434
456
  classification_label_map=classification_categories,
435
- confidence_threshold=options.confidence_threshold,
457
+ confidence_threshold=rendering_confidence_threshold,
436
458
  thickness=options.line_thickness,
437
459
  expansion=options.box_expansion)
438
460
 
@@ -460,7 +482,14 @@ def render_bounding_boxes(
460
482
 
461
483
  # Optionally add links back to the original images
462
484
  if options.link_images_to_originals and (image_full_path is not None):
463
- info['linkTarget'] = urllib.parse.quote(image_full_path)
485
+
486
+ # Handling special characters in links has been pushed down into
487
+ # write_html_image_list
488
+ #
489
+ # link_target = image_full_path.replace('\\','/')
490
+ # link_target = urllib.parse.quote(link_target)
491
+ link_target = image_full_path
492
+ info['linkTarget'] = link_target
464
493
 
465
494
  return info
466
495
 
@@ -535,15 +564,68 @@ def prepare_html_subpages(images_html, output_dir, options=None):
535
564
 
536
565
  # ...prepare_html_subpages()
537
566
 
538
- # Get unique categories above the threshold for this image
539
- def get_positive_categories(detections,options):
567
+
568
+ # Determine the confidence threshold we should use for a specific category name
569
+ def get_threshold_for_category_name(category_name,options):
570
+
571
+ if isinstance(options.confidence_threshold,float):
572
+ return options.confidence_threshold
573
+ else:
574
+ assert isinstance(options.confidence_threshold,dict), \
575
+ 'confidence_threshold must either be a float or a dict'
576
+
577
+ if category_name in options.confidence_threshold:
578
+
579
+ return options.confidence_threshold[category_name]
580
+
581
+ else:
582
+ assert 'default' in options.confidence_threshold, \
583
+ 'category {} not in confidence_threshold dict, and no default supplied'.format(
584
+ category_name)
585
+ return options.confidence_threshold['default']
586
+
587
+
588
+ # Determine the confidence threshold we should use for a specific category ID
589
+ #
590
+ # detection_categories is a dict mapping category IDs to names.
591
+ def get_threshold_for_category_id(category_id,options,detection_categories):
592
+
593
+ if isinstance(options.confidence_threshold,float):
594
+ return options.confidence_threshold
595
+
596
+ assert category_id in detection_categories, \
597
+ 'Invalid category ID {}'.format(category_id)
598
+
599
+ category_name = detection_categories[category_id]
600
+
601
+ return get_threshold_for_category_name(category_name,options)
602
+
603
+
604
+ # Get a sorted list of unique categories (as string IDs) above the threshold for this image
605
+ #
606
+ # "detection_categories" is a dict mapping category IDs to names.
607
+ def get_positive_categories(detections,options,detection_categories):
540
608
  positive_categories = set()
541
609
  for d in detections:
542
- if d['conf'] >= options.confidence_threshold:
610
+ threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
611
+ if d['conf'] >= threshold:
543
612
  positive_categories.add(d['category'])
544
613
  return sorted(positive_categories)
545
614
 
546
615
 
616
+ # Determine whether any positive detections are present in the detection list
617
+ # [detections].
618
+ def has_positive_detection(detections,options,detection_categories):
619
+
620
+ found_positive_detection = False
621
+ for d in detections:
622
+ threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
623
+ if d['conf'] >= threshold:
624
+ found_positive_detection = True
625
+ break
626
+ return found_positive_detection
627
+
628
+
547
629
  # Render an image (with no ground truth information)
548
630
  #
549
631
  # Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
@@ -573,8 +655,12 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
573
655
  max_conf = file_info[1]
574
656
  detections = file_info[2]
575
657
 
658
+ # Determine whether any positive detections are present (using a threshold that
659
+ # may vary by category)
660
+ found_positive_detection = has_positive_detection(detections,options,detection_categories)
661
+
576
662
  detection_status = DetectionStatus.DS_UNASSIGNED
577
- if max_conf >= options.confidence_threshold:
663
+ if found_positive_detection:
578
664
  detection_status = DetectionStatus.DS_POSITIVE
579
665
  else:
580
666
  if options.include_almost_detections:
@@ -587,7 +673,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
587
673
 
588
674
  if detection_status == DetectionStatus.DS_POSITIVE:
589
675
  if options.separate_detections_by_category:
590
- positive_categories = tuple(get_positive_categories(detections,options))
676
+ positive_categories = tuple(get_positive_categories(detections,options,detection_categories))
591
677
  if positive_categories not in detection_categories_to_results_name:
592
678
  raise ValueError('Error: {} not in category mapping (file {})'.format(
593
679
  str(positive_categories),image_relative_path))
@@ -703,7 +789,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
703
789
  f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
704
790
  return None
705
791
 
706
- detected = max_conf > options.confidence_threshold
792
+ detected = has_positive_detection(detections, options, detection_categories)
707
793
 
708
794
  if gt_presence and detected:
709
795
  if '_classification_accuracy' not in image.keys():
@@ -766,6 +852,10 @@ def process_batch_results(options: PostProcessingOptions
766
852
 
767
853
  ground_truth_indexed_db = None
768
854
 
855
+ if (options.ground_truth_json_file is not None):
856
+ assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
857
+ 'Variable confidence thresholds are not supported when supplying ground truth'
858
+
769
859
  if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
770
860
 
771
861
  if options.separate_detections_by_category:
@@ -791,7 +881,7 @@ def process_batch_results(options: PostProcessingOptions
791
881
  # If the caller hasn't supplied results, load them
792
882
  if options.api_detection_results is None:
793
883
  detections_df, other_fields = load_api_results(
794
- options.api_output_file, normalize_paths=True,
884
+ options.api_output_file, force_forward_slashes=True,
795
885
  filename_replacements=options.api_output_filename_replacements)
796
886
  ppresults.api_detection_results = detections_df
797
887
  ppresults.api_other_fields = other_fields
@@ -821,7 +911,7 @@ def process_batch_results(options: PostProcessingOptions
821
911
  n_failures = detections_df['failure'].count()
822
912
  print('Ignoring {} failed images'.format(n_failures))
823
913
  # Explicitly forcing a copy() operation here to suppress "trying to be set
824
- # on a copy" # warnings (and associated risks) below.
914
+ # on a copy" warnings (and associated risks) below.
825
915
  detections_df = detections_df[detections_df['failure'].isna()].copy()
826
916
 
827
917
  assert other_fields is not None
@@ -836,31 +926,24 @@ def process_batch_results(options: PostProcessingOptions
836
926
  for k, v in classification_categories.items()
837
927
  }
838
928
 
839
- # Add column 'pred_detection_label' to indicate predicted detection status.
840
- #
841
- # This column doesn't capture category information, it's just about detections,
842
- # non-detections, and almost-detections.
843
- det_status = 'pred_detection_label'
844
- if options.include_almost_detections:
845
- detections_df[det_status] = DetectionStatus.DS_ALMOST
846
- confidences = detections_df['max_detection_conf']
847
-
848
- pos_mask = (confidences >= options.confidence_threshold)
849
- detections_df.loc[pos_mask, det_status] = DetectionStatus.DS_POSITIVE
850
-
851
- neg_mask = (confidences < options.almost_detection_confidence_threshold)
852
- detections_df.loc[neg_mask, det_status] = DetectionStatus.DS_NEGATIVE
853
- else:
854
- detections_df[det_status] = np.where(
855
- detections_df['max_detection_conf'] >= options.confidence_threshold,
856
- DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE)
857
-
858
- n_positives = sum(detections_df[det_status] == DetectionStatus.DS_POSITIVE)
929
+ # Count detections and almost-detections for reporting purposes
930
+ n_positives = 0
931
+ n_almosts = 0
932
+
933
+ for i_row,row in tqdm(detections_df.iterrows(),total=len(detections_df)):
934
+
935
+ detections = row['detections']
936
+ max_conf = row['max_detection_conf']
937
+ if has_positive_detection(detections, options, detection_categories):
938
+ n_positives += 1
939
+ elif (options.almost_detection_confidence_threshold is not None) and \
940
+ (max_conf >= options.almost_detection_confidence_threshold):
941
+ n_almosts += 1
942
+
859
943
  print(f'Finished loading and preprocessing {len(detections_df)} rows '
860
944
  f'from detector output, predicted {n_positives} positives.')
861
945
 
862
946
  if options.include_almost_detections:
863
- n_almosts = sum(detections_df[det_status] == DetectionStatus.DS_ALMOST)
864
947
  print('...and {} almost-positives'.format(n_almosts))
865
948
 
866
949
 
@@ -1211,7 +1294,8 @@ def process_batch_results(options: PostProcessingOptions
1211
1294
  for file_info in tqdm(files_to_render):
1212
1295
  rendering_results.append(render_image_with_gt(
1213
1296
  file_info,ground_truth_indexed_db,
1214
- detection_categories,classification_categories))
1297
+ detection_categories,classification_categories,
1298
+ options=options))
1215
1299
  elapsed = time.time() - start_time
1216
1300
 
1217
1301
  # Map all the rendering results in the list rendering_results into the
@@ -1241,6 +1325,12 @@ def process_batch_results(options: PostProcessingOptions
1241
1325
  image_counts['tp']
1242
1326
  )
1243
1327
 
1328
+ confidence_threshold_string = ''
1329
+ if isinstance(options.confidence_threshold,float):
1330
+ confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
1331
+ else:
1332
+ confidence_threshold_string = str(options.confidence_threshold)
1333
+
1244
1334
  index_page = """<html>
1245
1335
  {}
1246
1336
  <body>
@@ -1255,7 +1345,7 @@ def process_batch_results(options: PostProcessingOptions
1255
1345
 
1256
1346
  <h3>Sample images</h3>
1257
1347
  <div class="contentdiv">
1258
- <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p>
1348
+ <p>A sample of {} images, annotated with detections above confidence {}.</p>
1259
1349
  <a href="tp.html">True positives (TP)</a> ({}) ({:0.1%})<br/>
1260
1350
  CLASSIFICATION_PLACEHOLDER_1
1261
1351
  <a href="tn.html">True negatives (TN)</a> ({}) ({:0.1%})<br/>
@@ -1265,7 +1355,7 @@ def process_batch_results(options: PostProcessingOptions
1265
1355
  </div>
1266
1356
  """.format(
1267
1357
  style_header,job_name_string,model_version_string,
1268
- image_count, options.confidence_threshold,
1358
+ image_count, confidence_threshold_string,
1269
1359
  all_tp_count, all_tp_count/total_count,
1270
1360
  image_counts['tn'], image_counts['tn']/total_count,
1271
1361
  image_counts['fp'], image_counts['fp']/total_count,
@@ -1275,11 +1365,11 @@ def process_batch_results(options: PostProcessingOptions
1275
1365
  index_page += """
1276
1366
  <h3>Detection results</h3>
1277
1367
  <div class="contentdiv">
1278
- <p>At a confidence threshold of {:0.1%}, precision={:0.1%}, recall={:0.1%}</p>
1368
+ <p>At a confidence threshold of {}, precision={:0.1%}, recall={:0.1%}</p>
1279
1369
  <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
1280
1370
  </div>
1281
1371
  """.format(
1282
- options.confidence_threshold, precision_at_confidence_threshold, recall_at_confidence_threshold,
1372
+ confidence_threshold_string, precision_at_confidence_threshold, recall_at_confidence_threshold,
1283
1373
  len(detections_df), pr_figure_relative_filename
1284
1374
  )
1285
1375
 
@@ -1345,46 +1435,60 @@ def process_batch_results(options: PostProcessingOptions
1345
1435
  # Accumulate html image structs (in the format expected by write_html_image_list)
1346
1436
  # for each category
1347
1437
  images_html = collections.defaultdict(list)
1348
- images_html['non_detections']
1438
+
1349
1439
 
1350
1440
  # Add default entries by accessing them for the first time
1351
1441
 
1352
- # Maps detection categories - e.g. "human" - to result set names, e.g.
1353
- # "detections_human"
1442
+ # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
1443
+ # result set names, e.g. "detections_human", "detections_cat_truck".
1354
1444
  detection_categories_to_results_name = {}
1355
1445
 
1356
1446
  # Keep track of which categories are single-class (e.g. "animal") and which are
1357
1447
  # combinations (e.g. "animal_vehicle")
1358
1448
  detection_categories_to_category_count = {}
1359
- detection_categories_to_category_count['detections'] = 0
1449
+
1450
+ # For the creation of a "non-detections" category
1451
+ images_html['non_detections']
1360
1452
  detection_categories_to_category_count['non_detections'] = 0
1361
- detection_categories_to_category_count['almost_detections'] = 0
1453
+
1362
1454
 
1363
1455
  if not options.separate_detections_by_category:
1364
1456
  # For the creation of a "detections" category
1365
1457
  images_html['detections']
1458
+ detection_categories_to_category_count['detections'] = 0
1366
1459
  else:
1367
1460
  # Add a set of results for each category and combination of categories, e.g.
1368
1461
  # "detections_animal_vehicle". When we're using this script for non-MegaDetector
1369
1462
  # results, this can generate lots of categories, e.g. detections_bear_bird_cat_dog_pig.
1370
1463
  # We'll keep that huge set of combinations in this map, but we'll only write
1371
1464
  # out links for the ones that are non-empty.
1372
- keys = detection_categories.keys()
1373
- subsets = []
1374
- for L in range(1, len(keys)+1):
1375
- for subset in itertools.combinations(keys, L):
1376
- subsets.append(subset)
1377
- for subset in subsets:
1378
- sorted_subset = tuple(sorted(subset))
1465
+ used_combinations = set()
1466
+
1467
+ # row = images_to_visualize.iloc[0]
1468
+ for i_row, row in images_to_visualize.iterrows():
1469
+ detections_this_row = row['detections']
1470
+ above_threshold_category_ids_this_row = set()
1471
+ for detection in detections_this_row:
1472
+ threshold = get_threshold_for_category_id(detection['category'], options, detection_categories)
1473
+ if detection['conf'] >= threshold:
1474
+ above_threshold_category_ids_this_row.add(detection['category'])
1475
+ if len(above_threshold_category_ids_this_row) == 0:
1476
+ continue
1477
+ sorted_categories_this_row = tuple(sorted(above_threshold_category_ids_this_row))
1478
+ used_combinations.add(sorted_categories_this_row)
1479
+
1480
+ for sorted_subset in used_combinations:
1481
+ assert len(sorted_subset) > 0
1379
1482
  results_name = 'detections'
1380
1483
  for category_id in sorted_subset:
1381
1484
  results_name = results_name + '_' + detection_categories[category_id]
1382
1485
  images_html[results_name]
1383
1486
  detection_categories_to_results_name[sorted_subset] = results_name
1384
- detection_categories_to_category_count[results_name] = len(sorted_subset)
1487
+ detection_categories_to_category_count[results_name] = len(sorted_subset)
1385
1488
 
1386
1489
  if options.include_almost_detections:
1387
1490
  images_html['almost_detections']
1491
+ detection_categories_to_category_count['almost_detections'] = 0
1388
1492
 
1389
1493
  # Create output directories
1390
1494
  for res in images_html.keys():
@@ -1495,9 +1599,15 @@ def process_batch_results(options: PostProcessingOptions
1495
1599
  almost_detection_string = ' (&ldquo;almost detection&rdquo; threshold at {:.1%})'.format(
1496
1600
  options.almost_detection_confidence_threshold)
1497
1601
 
1602
+ confidence_threshold_string = ''
1603
+ if isinstance(options.confidence_threshold,float):
1604
+ confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
1605
+ else:
1606
+ confidence_threshold_string = str(options.confidence_threshold)
1607
+
1498
1608
  index_page = """<html>\n{}\n<body>\n
1499
1609
  <h2>Visualization of results for {}</h2>\n
1500
- <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above {:.1%} confidence{}.</p>\n
1610
+ <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above confidence {}{}.</p>\n
1501
1611
 
1502
1612
  <div class="contentdiv">
1503
1613
  <p>Model version: {}</p>
@@ -1505,7 +1615,7 @@ def process_batch_results(options: PostProcessingOptions
1505
1615
 
1506
1616
  <h3>Sample images</h3>\n
1507
1617
  <div class="contentdiv">\n""".format(
1508
- style_header, job_name_string, image_count, len(detections_df), options.confidence_threshold,
1618
+ style_header, job_name_string, image_count, len(detections_df), confidence_threshold_string,
1509
1619
  almost_detection_string, model_version_string)
1510
1620
 
1511
1621
  failure_string = ''
@@ -1521,7 +1631,17 @@ def process_batch_results(options: PostProcessingOptions
1521
1631
  friendly_name = friendly_name.capitalize()
1522
1632
  return friendly_name
1523
1633
 
1524
- for result_set_name in images_html.keys():
1634
+ sorted_result_set_names = sorted(list(images_html.keys()))
1635
+
1636
+ result_set_name_to_count = {}
1637
+ for result_set_name in sorted_result_set_names:
1638
+ image_count = image_counts[result_set_name]
1639
+ result_set_name_to_count[result_set_name] = image_count
1640
+ sorted_result_set_names = sorted(sorted_result_set_names,
1641
+ key=lambda x: result_set_name_to_count[x],
1642
+ reverse=True)
1643
+
1644
+ for result_set_name in sorted_result_set_names:
1525
1645
 
1526
1646
  # Don't print classification classes here; we'll do that later with a slightly
1527
1647
  # different structure
@@ -0,0 +1,163 @@
1
+ ########
2
+ #
3
+ # remap_detection_categories.py
4
+ #
5
+ # Given a MegaDetector results file, remap the category IDs according to a specified
6
+ # dictionary, writing the results to a new file.
7
+ #
8
+ # Currently only supports remapping detection categories, not classification categories.
9
+ #
10
+ ########
11
+
12
+ #%% Constants and imports
13
+
14
+ import json
15
+ import os
16
+
17
+ from tqdm import tqdm
18
+
19
+ from md_utils.ct_utils import invert_dictionary
20
+
21
+
22
+ #%% Main function
23
+
24
+ def remap_detection_categories(input_file,
25
+ output_file,
26
+ target_category_map,
27
+ extra_category_handling='error',
28
+ overwrite=False):
29
+ """
30
+ Given a MD results file [input_file], remap the category IDs according to the dictionary
31
+ [target_category_map], writing the results to [output_file]. The remapped dictionary needs to have
32
+ the same category names as the input file's detection_categories dictionary.
33
+
34
+ Currently only supports remapping detection categories, not classification categories.
35
+
36
+ target_category_map can also be a MD results file, in which case we'll use that file's
37
+ detection_categories dictionary.
38
+
39
+ [extra_category_handling] specifies what we should do if categories are present in the source file
40
+ that are not present in the target mapping.
41
+
42
+ 'error' == Error in this case.
43
+ 'drop_if_unused' == Don't include these in the output file's category mappings if they are unused,
44
+ error if they are.
45
+ 'remap' == Remap to unused category IDs. This is reserved for future use, not currently implemented.
46
+
47
+ """
48
+
49
+ if os.path.exists(output_file) and (not overwrite):
50
+ print('File {} exists, bypassing remapping'.format(output_file))
51
+ return
52
+
53
+ assert os.path.isfile(input_file), \
54
+ 'File {} does not exist'.format(input_file)
55
+
56
+ # If "target_category_map" is passed as a filename, load the "detection_categories"
57
+ # dict.
58
+ if isinstance(target_category_map,str):
59
+ target_categories_file = target_category_map
60
+ with open(target_categories_file,'r') as f:
61
+ d = json.load(f)
62
+ target_category_map = d['detection_categories']
63
+ assert isinstance(target_category_map,dict)
64
+
65
+ with open(input_file,'r') as f:
66
+ input_data = json.load(f)
67
+
68
+ input_images = input_data['images']
69
+ input_categories = input_data['detection_categories']
70
+
71
+ # Figure out which categories are actually used
72
+ used_category_ids = set()
73
+ for im in input_images:
74
+
75
+ if 'detections' not in im or im['detections'] is None:
76
+ continue
77
+
78
+ for det in im['detections']:
79
+ used_category_ids.add(det['category'])
80
+ used_category_names = [input_categories[cid] for cid in used_category_ids]
81
+
82
+ input_names_set = set(input_categories.values())
83
+ output_names_set = set(target_category_map.values())
84
+
85
+ # category_name = list(input_names_set)[0]
86
+ for category_name in input_names_set:
87
+ if category_name in output_names_set:
88
+ continue
89
+ if extra_category_handling == 'error':
90
+ raise ValueError('Category {} present in source but not in target'.format(category_name))
91
+ elif extra_category_handling == 'drop_if_unused':
92
+ if category_name in used_category_names:
93
+ raise ValueError('Category {} present (and used) in source but not in target'.format(
94
+ category_name))
95
+ else:
96
+ print('Category {} is unused and not present in the target mapping, ignoring'.format(
97
+ category_name))
98
+ continue
99
+ elif extra_category_handling == 'remap':
100
+ raise NotImplementedError('Remapping of extra category IDs not yet implemented')
101
+ else:
102
+ raise ValueError('Unrecognized extra category handling scheme {}'.format(
103
+ extra_category_handling))
104
+
105
+ output_category_name_to_output_category_id = invert_dictionary(target_category_map)
106
+
107
+ input_category_id_to_output_category_id = {}
108
+ for input_category_id in input_categories.keys():
109
+ category_name = input_categories[input_category_id]
110
+ if category_name not in output_category_name_to_output_category_id:
111
+ assert category_name not in used_category_names
112
+ else:
113
+ output_category_id = output_category_name_to_output_category_id[category_name]
114
+ input_category_id_to_output_category_id[input_category_id] = output_category_id
115
+
116
+ # im = input_images[0]
117
+ for im in tqdm(input_images):
118
+
119
+ if 'detections' not in im or im['detections'] is None:
120
+ continue
121
+
122
+ # det = im['detections'][0]
123
+ for det in im['detections']:
124
+ det['category'] = input_category_id_to_output_category_id[det['category']]
125
+
126
+ input_data['detection_categories'] = target_category_map
127
+
128
+ with open(output_file,'w') as f:
129
+ json.dump(input_data,f,indent=1)
130
+
131
+
132
+ print('Saved remapped results to {}'.format(output_file))
133
+
134
+
135
+ #%% Interactive driver
136
+
137
+ if False:
138
+
139
+ pass
140
+
141
+ #%%
142
+
143
+ target_categories_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-only_yolov5x6.json'
144
+ target_category_map = target_categories_file
145
+ input_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-goannas-lilablanks_yolov5x6-20240223.json'
146
+
147
+ output_file = input_file.replace('.json','_remapped.json')
148
+ assert output_file != input_file
149
+ overwrite = True
150
+
151
+ extra_category_handling = 'drop_if_unused'
152
+
153
+ remap_detection_categories(input_file=input_file,
154
+ output_file=output_file,
155
+ target_category_map=target_category_map,
156
+ extra_category_handling=extra_category_handling,
157
+ overwrite=overwrite)
158
+
159
+
160
+ #%% Command-line driver
161
+
162
+ # TODO
163
+