megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (132) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +302 -263
  2. api/batch_processing/data_preparation/manage_video_batch.py +81 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
  5. api/batch_processing/postprocessing/compare_batch_results.py +110 -60
  6. api/batch_processing/postprocessing/load_api_results.py +56 -70
  7. api/batch_processing/postprocessing/md_to_coco.py +1 -1
  8. api/batch_processing/postprocessing/md_to_labelme.py +2 -1
  9. api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
  10. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
  11. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  12. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  13. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
  14. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  15. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  16. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
  17. classification/prepare_classification_script.py +191 -191
  18. data_management/coco_to_yolo.py +68 -45
  19. data_management/databases/integrity_check_json_db.py +7 -5
  20. data_management/generate_crops_from_cct.py +3 -3
  21. data_management/get_image_sizes.py +8 -6
  22. data_management/importers/add_timestamps_to_icct.py +79 -0
  23. data_management/importers/animl_results_to_md_results.py +160 -0
  24. data_management/importers/auckland_doc_test_to_json.py +4 -4
  25. data_management/importers/auckland_doc_to_json.py +1 -1
  26. data_management/importers/awc_to_json.py +5 -5
  27. data_management/importers/bellevue_to_json.py +5 -5
  28. data_management/importers/carrizo_shrubfree_2018.py +5 -5
  29. data_management/importers/carrizo_trail_cam_2017.py +5 -5
  30. data_management/importers/cct_field_adjustments.py +2 -3
  31. data_management/importers/channel_islands_to_cct.py +4 -4
  32. data_management/importers/ena24_to_json.py +5 -5
  33. data_management/importers/helena_to_cct.py +10 -10
  34. data_management/importers/idaho-camera-traps.py +12 -12
  35. data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
  36. data_management/importers/jb_csv_to_json.py +4 -4
  37. data_management/importers/missouri_to_json.py +1 -1
  38. data_management/importers/noaa_seals_2019.py +1 -1
  39. data_management/importers/pc_to_json.py +5 -5
  40. data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
  41. data_management/importers/prepare_zsl_imerit.py +5 -5
  42. data_management/importers/rspb_to_json.py +4 -4
  43. data_management/importers/save_the_elephants_survey_A.py +5 -5
  44. data_management/importers/save_the_elephants_survey_B.py +6 -6
  45. data_management/importers/snapshot_safari_importer.py +9 -9
  46. data_management/importers/snapshot_serengeti_lila.py +9 -9
  47. data_management/importers/timelapse_csv_set_to_json.py +5 -7
  48. data_management/importers/ubc_to_json.py +4 -4
  49. data_management/importers/umn_to_json.py +4 -4
  50. data_management/importers/wellington_to_json.py +1 -1
  51. data_management/importers/wi_to_json.py +2 -2
  52. data_management/importers/zamba_results_to_md_results.py +181 -0
  53. data_management/labelme_to_coco.py +35 -7
  54. data_management/labelme_to_yolo.py +229 -0
  55. data_management/lila/add_locations_to_island_camera_traps.py +1 -1
  56. data_management/lila/add_locations_to_nacti.py +147 -0
  57. data_management/lila/create_lila_blank_set.py +474 -0
  58. data_management/lila/create_lila_test_set.py +2 -1
  59. data_management/lila/create_links_to_md_results_files.py +106 -0
  60. data_management/lila/download_lila_subset.py +46 -21
  61. data_management/lila/generate_lila_per_image_labels.py +23 -14
  62. data_management/lila/get_lila_annotation_counts.py +17 -11
  63. data_management/lila/lila_common.py +14 -11
  64. data_management/lila/test_lila_metadata_urls.py +116 -0
  65. data_management/ocr_tools.py +829 -0
  66. data_management/resize_coco_dataset.py +13 -11
  67. data_management/yolo_output_to_md_output.py +84 -12
  68. data_management/yolo_to_coco.py +38 -20
  69. detection/process_video.py +36 -14
  70. detection/pytorch_detector.py +23 -8
  71. detection/run_detector.py +76 -19
  72. detection/run_detector_batch.py +178 -63
  73. detection/run_inference_with_yolov5_val.py +326 -57
  74. detection/run_tiled_inference.py +153 -43
  75. detection/video_utils.py +34 -8
  76. md_utils/ct_utils.py +172 -1
  77. md_utils/md_tests.py +372 -51
  78. md_utils/path_utils.py +167 -39
  79. md_utils/process_utils.py +26 -7
  80. md_utils/split_locations_into_train_val.py +215 -0
  81. md_utils/string_utils.py +10 -0
  82. md_utils/url_utils.py +0 -2
  83. md_utils/write_html_image_list.py +9 -26
  84. md_visualization/plot_utils.py +12 -8
  85. md_visualization/visualization_utils.py +106 -7
  86. md_visualization/visualize_db.py +16 -8
  87. md_visualization/visualize_detector_output.py +208 -97
  88. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
  89. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
  90. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
  91. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  92. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  93. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  94. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  95. taxonomy_mapping/species_lookup.py +33 -13
  96. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  97. api/synchronous/api_core/yolov5/detect.py +0 -252
  98. api/synchronous/api_core/yolov5/export.py +0 -607
  99. api/synchronous/api_core/yolov5/hubconf.py +0 -146
  100. api/synchronous/api_core/yolov5/models/__init__.py +0 -0
  101. api/synchronous/api_core/yolov5/models/common.py +0 -738
  102. api/synchronous/api_core/yolov5/models/experimental.py +0 -104
  103. api/synchronous/api_core/yolov5/models/tf.py +0 -574
  104. api/synchronous/api_core/yolov5/models/yolo.py +0 -338
  105. api/synchronous/api_core/yolov5/train.py +0 -670
  106. api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
  107. api/synchronous/api_core/yolov5/utils/activations.py +0 -103
  108. api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
  109. api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
  110. api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
  111. api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
  112. api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
  113. api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
  114. api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
  115. api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
  116. api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
  117. api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
  118. api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
  119. api/synchronous/api_core/yolov5/utils/general.py +0 -1018
  120. api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
  121. api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
  122. api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
  123. api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
  124. api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
  125. api/synchronous/api_core/yolov5/utils/loss.py +0 -234
  126. api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
  127. api/synchronous/api_core/yolov5/utils/plots.py +0 -489
  128. api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
  129. api/synchronous/api_core/yolov5/val.py +0 -394
  130. md_utils/matlab_porting_tools.py +0 -97
  131. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
  132. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
@@ -23,13 +23,13 @@ import collections
23
23
  import copy
24
24
  import errno
25
25
  import io
26
- import itertools
27
26
  import os
28
27
  import sys
29
28
  import time
30
29
  import uuid
31
30
  import urllib
32
31
  import warnings
32
+ import random
33
33
 
34
34
  from typing import Any, Dict, Iterable, Optional, Tuple
35
35
  from enum import IntEnum
@@ -53,6 +53,7 @@ from md_utils import path_utils
53
53
  from data_management.cct_json_utils import (CameraTrapJsonUtils, IndexedJsonDb)
54
54
  from api.batch_processing.postprocessing.load_api_results import load_api_results
55
55
  from md_utils.ct_utils import args_to_object
56
+ from md_utils.ct_utils import invert_dictionary
56
57
 
57
58
  from detection.run_detector import get_typical_confidence_threshold_from_results
58
59
 
@@ -113,8 +114,18 @@ class PostProcessingOptions:
113
114
  # detections_animal, detections_person, detections_vehicle
114
115
  rendering_bypass_sets = []
115
116
 
116
- # By default, choose a confidence threshold based on the detector version
117
+ # If this is None, choose a confidence threshold based on the detector version.
118
+ #
119
+ # This can either be a float or a dictionary mapping category names (not IDs) to
120
+ # thresholds. The category "default" can be used to specify thresholds for
121
+ # other categories. Currently the use of a dict here is not supported when
122
+ # ground truth is supplied.
117
123
  confidence_threshold = None
124
+
125
+ # Confidence threshold to apply to classification (not detection) results
126
+ #
127
+ # Only a float is supported here (unlike the "confidence_threshold" parameter, which
128
+ # can be a dict).
118
129
  classification_confidence_threshold = 0.5
119
130
 
120
131
  # Used for summary statistics only
@@ -134,13 +145,9 @@ class PostProcessingOptions:
134
145
  job_name_string = None
135
146
  model_version_string = None
136
147
 
137
- # These should really be mutually exclusive, but I'm not enforcing this.
138
- #
139
- # Nothing bad happens if you set both to true; the confidence sort happens
140
- # second.
141
- sort_html_by_filename = True
142
- sort_html_by_confidence = False
143
-
148
+ # Sort order for the output, should be one of "filename", "confidence", or "random"
149
+ html_sort_order = 'filename'
150
+
144
151
  link_images_to_originals = True
145
152
 
146
153
  # Optionally separate detections into categories (animal/vehicle/human)
@@ -164,6 +171,9 @@ class PostProcessingOptions:
164
171
  #
165
172
  # Currently only supported when ground truth is unavailable
166
173
  include_almost_detections = False
174
+
175
+ # Only a float is supported here (unlike the "confidence_threshold" parameter, which
176
+ # can be a dict).
167
177
  almost_detection_confidence_threshold = None
168
178
 
169
179
  # Control rendering parallelization
@@ -407,8 +417,7 @@ def render_bounding_boxes(
407
417
  image = None
408
418
  # return ''
409
419
 
410
- # Render images to a flat folder... we can use os.sep here because we've
411
- # already normalized paths
420
+ # Render images to a flat folder
412
421
  sample_name = res + '_' + path_utils.flatten_path(image_relative_path)
413
422
  fullpath = os.path.join(options.output_dir, res, sample_name)
414
423
 
@@ -429,12 +438,25 @@ def render_bounding_boxes(
429
438
  vis_utils.render_db_bounding_boxes(ground_truth_boxes, gt_classes, image,
430
439
  original_size=original_size,label_map=label_map,
431
440
  thickness=4,expansion=4)
441
+
442
+ # render_detection_bounding_boxes expects either a float or a dict mapping
443
+ # category IDs to names.
444
+ if isinstance(options.confidence_threshold,float):
445
+ rendering_confidence_threshold = options.confidence_threshold
446
+ else:
447
+ category_ids = set()
448
+ for d in detections:
449
+ category_ids.add(d['category'])
450
+ rendering_confidence_threshold = {}
451
+ for category_id in category_ids:
452
+ rendering_confidence_threshold[category_id] = \
453
+ get_threshold_for_category_id(category_id, options, detection_categories)
432
454
 
433
455
  vis_utils.render_detection_bounding_boxes(
434
456
  detections, image,
435
457
  label_map=detection_categories,
436
458
  classification_label_map=classification_categories,
437
- confidence_threshold=options.confidence_threshold,
459
+ confidence_threshold=rendering_confidence_threshold,
438
460
  thickness=options.line_thickness,
439
461
  expansion=options.box_expansion)
440
462
 
@@ -471,10 +493,13 @@ def render_bounding_boxes(
471
493
 
472
494
  def prepare_html_subpages(images_html, output_dir, options=None):
473
495
  """
474
- Write out a series of html image lists, e.g. the fp/tp/fn/tn pages.
496
+ Write out a series of html image lists, e.g. the "detections" or "non-detections"
497
+ pages.
475
498
 
476
- image_html is a dictionary mapping an html page name (e.g. "fp") to a list
477
- of image structs friendly to write_html_image_list
499
+ image_html is a dictionary mapping an html page name (e.g. "detections_animal") to
500
+ a list of image structs friendly to write_html_image_list.
501
+
502
+ Returns a dictionary mapping category names to image counts.
478
503
  """
479
504
 
480
505
  if options is None:
@@ -486,7 +511,7 @@ def prepare_html_subpages(images_html, output_dir, options=None):
486
511
  image_counts[res] = len(array)
487
512
 
488
513
  # Optionally sort by filename before writing to html
489
- if options.sort_html_by_filename:
514
+ if options.html_sort_order == 'filename':
490
515
  images_html_sorted = {}
491
516
  for res, array in images_html.items():
492
517
  sorted_array = sorted(array, key=lambda x: x['filename'])
@@ -494,18 +519,26 @@ def prepare_html_subpages(images_html, output_dir, options=None):
494
519
  images_html = images_html_sorted
495
520
 
496
521
  # Optionally sort by confidence before writing to html
497
- if options.sort_html_by_confidence:
522
+ elif options.html_sort_order == 'confidence':
498
523
  images_html_sorted = {}
499
524
  for res, array in images_html.items():
500
525
 
501
526
  if not all(['max_conf' in d for d in array]):
502
- print("Warning: some elements in the {} page don't have confidence values, can't sort by confidence".format(
503
- res))
527
+ print("Warning: some elements in the {} page don't have confidence values, can't sort by confidence".format(res))
504
528
  else:
505
529
  sorted_array = sorted(array, key=lambda x: x['max_conf'], reverse=True)
506
530
  images_html_sorted[res] = sorted_array
507
531
  images_html = images_html_sorted
508
532
 
533
+ else:
534
+ assert options.html_sort_order == 'random',\
535
+ 'Unrecognized sort order {}'.format(options.html_sort_order)
536
+ images_html_sorted = {}
537
+ for res, array in images_html.items():
538
+ sorted_array = random.sample(array,len(array))
539
+ images_html_sorted[res] = sorted_array
540
+ images_html = images_html_sorted
541
+
509
542
  # Write the individual HTML files
510
543
  for res, array in images_html.items():
511
544
 
@@ -513,24 +546,81 @@ def prepare_html_subpages(images_html, output_dir, options=None):
513
546
  html_image_list_options['maxFiguresPerHtmlFile'] = options.max_figures_per_html_file
514
547
  html_image_list_options['headerHtml'] = '<h1>{}</h1>'.format(res.upper())
515
548
 
516
- write_html_image_list(
517
- filename=os.path.join(output_dir, '{}.html'.format(res)),
518
- images=array,
519
- options=html_image_list_options)
549
+ # Don't write empty pages
550
+ if len(array) == 0:
551
+ continue
552
+ else:
553
+ write_html_image_list(
554
+ filename=os.path.join(output_dir, '{}.html'.format(res)),
555
+ images=array,
556
+ options=html_image_list_options)
520
557
 
521
558
  return image_counts
522
559
 
523
560
  # ...prepare_html_subpages()
524
561
 
525
- # Get unique categories above the threshold for this image
526
- def get_positive_categories(detections,options):
562
+
563
+ # Determine the confidence threshold we should use for a specific category name
564
+ def get_threshold_for_category_name(category_name,options):
565
+
566
+ if isinstance(options.confidence_threshold,float):
567
+ return options.confidence_threshold
568
+ else:
569
+ assert isinstance(options.confidence_threshold,dict), \
570
+ 'confidence_threshold must either be a float or a dict'
571
+
572
+ if category_name in options.confidence_threshold:
573
+
574
+ return options.confidence_threshold[category_name]
575
+
576
+ else:
577
+ assert 'default' in options.confidence_threshold, \
578
+ 'category {} not in confidence_threshold dict, and no default supplied'.format(
579
+ category_name)
580
+ return options.confidence_threshold['default']
581
+
582
+
583
+ # Determine the confidence threshold we should use for a specific category ID
584
+ #
585
+ # detection_categories is a dict mapping category IDs to names.
586
+ def get_threshold_for_category_id(category_id,options,detection_categories):
587
+
588
+ if isinstance(options.confidence_threshold,float):
589
+ return options.confidence_threshold
590
+
591
+ assert category_id in detection_categories, \
592
+ 'Invalid category ID {}'.format(category_id)
593
+
594
+ category_name = detection_categories[category_id]
595
+
596
+ return get_threshold_for_category_name(category_name,options)
597
+
598
+
599
+ # Get a sorted list of unique categories (as string IDs) above the threshold for this image
600
+ #
601
+ # "detection_categories" is a dict mapping category IDs to names.
602
+ def get_positive_categories(detections,options,detection_categories):
527
603
  positive_categories = set()
528
604
  for d in detections:
529
- if d['conf'] >= options.confidence_threshold:
605
+ threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
606
+ if d['conf'] >= threshold:
530
607
  positive_categories.add(d['category'])
531
608
  return sorted(positive_categories)
532
609
 
533
610
 
611
+ # Determine whether any positive detections are present in the detection list
612
+ # [detections].
613
+ def has_positive_detection(detections,options,detection_categories):
614
+
615
+ found_positive_detection = False
616
+ for d in detections:
617
+ threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
618
+ if d['conf'] >= threshold:
619
+ found_positive_detection = True
620
+ break
621
+ return found_positive_detection
622
+
623
+
534
624
  # Render an image (with no ground truth information)
535
625
  #
536
626
  # Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
@@ -560,8 +650,12 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
560
650
  max_conf = file_info[1]
561
651
  detections = file_info[2]
562
652
 
653
+ # Determine whether any positive detections are present (using a threshold that
654
+ # may vary by category)
655
+ found_positive_detection = has_positive_detection(detections,options,detection_categories)
656
+
563
657
  detection_status = DetectionStatus.DS_UNASSIGNED
564
- if max_conf >= options.confidence_threshold:
658
+ if found_positive_detection:
565
659
  detection_status = DetectionStatus.DS_POSITIVE
566
660
  else:
567
661
  if options.include_almost_detections:
@@ -574,7 +668,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
574
668
 
575
669
  if detection_status == DetectionStatus.DS_POSITIVE:
576
670
  if options.separate_detections_by_category:
577
- positive_categories = tuple(get_positive_categories(detections,options))
671
+ positive_categories = tuple(get_positive_categories(detections,options,detection_categories))
578
672
  if positive_categories not in detection_categories_to_results_name:
579
673
  raise ValueError('Error: {} not in category mapping (file {})'.format(
580
674
  str(positive_categories),image_relative_path))
@@ -690,7 +784,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
690
784
  f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
691
785
  return None
692
786
 
693
- detected = max_conf > options.confidence_threshold
787
+ detected = has_positive_detection(detections, options, detection_categories)
694
788
 
695
789
  if gt_presence and detected:
696
790
  if '_classification_accuracy' not in image.keys():
@@ -753,6 +847,10 @@ def process_batch_results(options: PostProcessingOptions
753
847
 
754
848
  ground_truth_indexed_db = None
755
849
 
850
+ if (options.ground_truth_json_file is not None):
851
+ assert (options.confidence_threshold is None) or (isinstance(confidence_threshold,float)), \
852
+ 'Variable confidence thresholds are not supported when supplying ground truth'
853
+
756
854
  if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
757
855
 
758
856
  if options.separate_detections_by_category:
@@ -802,13 +900,13 @@ def process_batch_results(options: PostProcessingOptions
802
900
  if options.almost_detection_confidence_threshold < 0:
803
901
  options.almost_detection_confidence_threshold = 0
804
902
 
805
- # Remove failed rows
903
+ # Remove rows with inference failures (typically due to corrupt images)
806
904
  n_failures = 0
807
905
  if 'failure' in detections_df.columns:
808
906
  n_failures = detections_df['failure'].count()
809
907
  print('Ignoring {} failed images'.format(n_failures))
810
908
  # Explicitly forcing a copy() operation here to suppress "trying to be set
811
- # on a copy" # warnings (and associated risks) below.
909
+ # on a copy" warnings (and associated risks) below.
812
910
  detections_df = detections_df[detections_df['failure'].isna()].copy()
813
911
 
814
912
  assert other_fields is not None
@@ -823,33 +921,28 @@ def process_batch_results(options: PostProcessingOptions
823
921
  for k, v in classification_categories.items()
824
922
  }
825
923
 
826
- # Add column 'pred_detection_label' to indicate predicted detection status,
827
- # not separating out the classes
828
- det_status = 'pred_detection_label'
829
- if options.include_almost_detections:
830
- detections_df[det_status] = DetectionStatus.DS_ALMOST
831
- confidences = detections_df['max_detection_conf']
832
-
833
- pos_mask = (confidences >= options.confidence_threshold)
834
- detections_df.loc[pos_mask, det_status] = DetectionStatus.DS_POSITIVE
835
-
836
- neg_mask = (confidences < options.almost_detection_confidence_threshold)
837
- detections_df.loc[neg_mask, det_status] = DetectionStatus.DS_NEGATIVE
838
- else:
839
- detections_df[det_status] = np.where(
840
- detections_df['max_detection_conf'] >= options.confidence_threshold,
841
- DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE)
842
-
843
- n_positives = sum(detections_df[det_status] == DetectionStatus.DS_POSITIVE)
924
+ # Count detections and almost-detections for reporting purposes
925
+ n_positives = 0
926
+ n_almosts = 0
927
+
928
+ for i_row,row in tqdm(detections_df.iterrows(),total=len(detections_df)):
929
+
930
+ detections = row['detections']
931
+ max_conf = row['max_detection_conf']
932
+ if has_positive_detection(detections, options, detection_categories):
933
+ n_positives += 1
934
+ elif (options.almost_detection_confidence_threshold is not None) and \
935
+ (max_conf >= options.almost_detection_confidence_threshold):
936
+ n_almosts += 1
937
+
844
938
  print(f'Finished loading and preprocessing {len(detections_df)} rows '
845
939
  f'from detector output, predicted {n_positives} positives.')
846
940
 
847
941
  if options.include_almost_detections:
848
- n_almosts = sum(detections_df[det_status] == DetectionStatus.DS_ALMOST)
849
942
  print('...and {} almost-positives'.format(n_almosts))
850
943
 
851
944
 
852
- ##%% Pull out descriptive metadata
945
+ ##%% Find descriptive metadata to include at the top of the page
853
946
 
854
947
  if options.job_name_string is not None:
855
948
  job_name_string = options.job_name_string
@@ -890,7 +983,7 @@ def process_batch_results(options: PostProcessingOptions
890
983
  print('Trimmed detection results to {} files'.format(len(detector_files)))
891
984
 
892
985
 
893
- ##%% Sample images for visualization
986
+ ##%% (Optionally) sample from the full set of images
894
987
 
895
988
  images_to_visualize = detections_df
896
989
 
@@ -994,7 +1087,7 @@ def process_batch_results(options: PostProcessingOptions
994
1087
  (precision_at_confidence_threshold + recall_at_confidence_threshold)
995
1088
 
996
1089
  print('At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}'.format(
997
- options.confidence_threshold, precision_at_confidence_threshold,
1090
+ str(options.confidence_threshold), precision_at_confidence_threshold,
998
1091
  recall_at_confidence_threshold, f1))
999
1092
 
1000
1093
  ##%% Collect classification results, if they exist
@@ -1200,7 +1293,7 @@ def process_batch_results(options: PostProcessingOptions
1200
1293
  elapsed = time.time() - start_time
1201
1294
 
1202
1295
  # Map all the rendering results in the list rendering_results into the
1203
- # dictionary images_html
1296
+ # dictionary images_html, which maps category names to lists of results
1204
1297
  image_rendered_count = 0
1205
1298
  for rendering_result in rendering_results:
1206
1299
  if rendering_result is None:
@@ -1250,7 +1343,7 @@ def process_batch_results(options: PostProcessingOptions
1250
1343
  </div>
1251
1344
  """.format(
1252
1345
  style_header,job_name_string,model_version_string,
1253
- image_count, options.confidence_threshold,
1346
+ image_count, str(options.confidence_threshold),
1254
1347
  all_tp_count, all_tp_count/total_count,
1255
1348
  image_counts['tn'], image_counts['tn']/total_count,
1256
1349
  image_counts['fp'], image_counts['fp']/total_count,
@@ -1264,7 +1357,7 @@ def process_batch_results(options: PostProcessingOptions
1264
1357
  <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
1265
1358
  </div>
1266
1359
  """.format(
1267
- options.confidence_threshold, precision_at_confidence_threshold, recall_at_confidence_threshold,
1360
+ str(options.confidence_threshold), precision_at_confidence_threshold, recall_at_confidence_threshold,
1268
1361
  len(detections_df), pr_figure_relative_filename
1269
1362
  )
1270
1363
 
@@ -1330,41 +1423,67 @@ def process_batch_results(options: PostProcessingOptions
1330
1423
  # Accumulate html image structs (in the format expected by write_html_image_list)
1331
1424
  # for each category
1332
1425
  images_html = collections.defaultdict(list)
1333
- images_html['non_detections']
1426
+
1334
1427
 
1335
1428
  # Add default entries by accessing them for the first time
1336
1429
 
1337
- # Maps detection categories - e.g. "human" - to result set names, e.g.
1338
- # "detections_human"
1430
+ # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
1431
+ # result set names, e.g. "detections_human", "detections_cat_truck".
1339
1432
  detection_categories_to_results_name = {}
1340
-
1433
+
1434
+ # Keep track of which categories are single-class (e.g. "animal") and which are
1435
+ # combinations (e.g. "animal_vehicle")
1436
+ detection_categories_to_category_count = {}
1437
+
1438
+ # For the creation of a "non-detections" category
1439
+ images_html['non_detections']
1440
+ detection_categories_to_category_count['non_detections'] = 0
1441
+
1442
+
1341
1443
  if not options.separate_detections_by_category:
1444
+ # For the creation of a "detections" category
1342
1445
  images_html['detections']
1446
+ detection_categories_to_category_count['detections'] = 0
1343
1447
  else:
1344
- # Add a set of results for each category and combination of categories
1345
- keys = detection_categories.keys()
1346
- subsets = []
1347
- for L in range(1, len(keys)+1):
1348
- for subset in itertools.combinations(keys, L):
1349
- subsets.append(subset)
1350
- for subset in subsets:
1351
- sorted_subset = tuple(sorted(subset))
1448
+ # Add a set of results for each category and combination of categories, e.g.
1449
+ # "detections_animal_vehicle". When we're using this script for non-MegaDetector
1450
+ # results, this can generate lots of categories, e.g. detections_bear_bird_cat_dog_pig.
1451
+ # We'll keep that huge set of combinations in this map, but we'll only write
1452
+ # out links for the ones that are non-empty.
1453
+ used_combinations = set()
1454
+
1455
+ # row = images_to_visualize.iloc[0]
1456
+ for i_row, row in images_to_visualize.iterrows():
1457
+ detections_this_row = row['detections']
1458
+ above_threshold_category_ids_this_row = set()
1459
+ for detection in detections_this_row:
1460
+ threshold = get_threshold_for_category_id(detection['category'], options, detection_categories)
1461
+ if detection['conf'] >= threshold:
1462
+ above_threshold_category_ids_this_row.add(detection['category'])
1463
+ if len(above_threshold_category_ids_this_row) == 0:
1464
+ continue
1465
+ sorted_categories_this_row = tuple(sorted(above_threshold_category_ids_this_row))
1466
+ used_combinations.add(sorted_categories_this_row)
1467
+
1468
+ for sorted_subset in used_combinations:
1469
+ assert len(sorted_subset) > 0
1352
1470
  results_name = 'detections'
1353
1471
  for category_id in sorted_subset:
1354
1472
  results_name = results_name + '_' + detection_categories[category_id]
1355
1473
  images_html[results_name]
1356
1474
  detection_categories_to_results_name[sorted_subset] = results_name
1475
+ detection_categories_to_category_count[results_name] = len(sorted_subset)
1357
1476
 
1358
1477
  if options.include_almost_detections:
1359
1478
  images_html['almost_detections']
1479
+ detection_categories_to_category_count['almost_detections'] = 0
1360
1480
 
1361
1481
  # Create output directories
1362
1482
  for res in images_html.keys():
1363
1483
  os.makedirs(os.path.join(output_dir, res), exist_ok=True)
1364
1484
 
1365
1485
  image_count = len(images_to_visualize)
1366
- has_classification_info = False
1367
-
1486
+
1368
1487
  # Each element will be a list of 2-tuples, with elements [collection name,html info struct]
1369
1488
  rendering_results = []
1370
1489
 
@@ -1421,6 +1540,9 @@ def process_batch_results(options: PostProcessingOptions
1421
1540
 
1422
1541
  elapsed = time.time() - start_time
1423
1542
 
1543
+ # Do we have classification results in addition to detection results?
1544
+ has_classification_info = False
1545
+
1424
1546
  # Map all the rendering results in the list rendering_results into the
1425
1547
  # dictionary images_html
1426
1548
  image_rendered_count = 0
@@ -1435,7 +1557,7 @@ def process_batch_results(options: PostProcessingOptions
1435
1557
 
1436
1558
  # Prepare the individual html image files
1437
1559
  image_counts = prepare_html_subpages(images_html, output_dir, options)
1438
-
1560
+
1439
1561
  if image_rendered_count == 0:
1440
1562
  seconds_per_image = 0.0
1441
1563
  else:
@@ -1465,9 +1587,15 @@ def process_batch_results(options: PostProcessingOptions
1465
1587
  almost_detection_string = ' (&ldquo;almost detection&rdquo; threshold at {:.1%})'.format(
1466
1588
  options.almost_detection_confidence_threshold)
1467
1589
 
1590
+ confidence_threshold_string = ''
1591
+ if isinstance(options.confidence_threshold,float):
1592
+ confidence_threshold_string = '{:.1%}'.format(options.confidence_threshold)
1593
+ else:
1594
+ confidence_threshold_string = str(options.confidence_threshold)
1595
+
1468
1596
  index_page = """<html>\n{}\n<body>\n
1469
1597
  <h2>Visualization of results for {}</h2>\n
1470
- <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above {:.1%} confidence{}.</p>\n
1598
+ <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above confidence {}{}.</p>\n
1471
1599
 
1472
1600
  <div class="contentdiv">
1473
1601
  <p>Model version: {}</p>
@@ -1475,7 +1603,7 @@ def process_batch_results(options: PostProcessingOptions
1475
1603
 
1476
1604
  <h3>Sample images</h3>\n
1477
1605
  <div class="contentdiv">\n""".format(
1478
- style_header, job_name_string, image_count, len(detections_df), options.confidence_threshold,
1606
+ style_header, job_name_string, image_count, len(detections_df), confidence_threshold_string,
1479
1607
  almost_detection_string, model_version_string)
1480
1608
 
1481
1609
  failure_string = ''
@@ -1491,7 +1619,17 @@ def process_batch_results(options: PostProcessingOptions
1491
1619
  friendly_name = friendly_name.capitalize()
1492
1620
  return friendly_name
1493
1621
 
1494
- for result_set_name in images_html.keys():
1622
+ sorted_result_set_names = sorted(list(images_html.keys()))
1623
+
1624
+ result_set_name_to_count = {}
1625
+ for result_set_name in sorted_result_set_names:
1626
+ image_count = image_counts[result_set_name]
1627
+ result_set_name_to_count[result_set_name] = image_count
1628
+ sorted_result_set_names = sorted(sorted_result_set_names,
1629
+ key=lambda x: result_set_name_to_count[x],
1630
+ reverse=True)
1631
+
1632
+ for result_set_name in sorted_result_set_names:
1495
1633
 
1496
1634
  # Don't print classification classes here; we'll do that later with a slightly
1497
1635
  # different structure
@@ -1501,18 +1639,32 @@ def process_batch_results(options: PostProcessingOptions
1501
1639
  filename = result_set_name + '.html'
1502
1640
  label = result_set_name_to_friendly_name(result_set_name)
1503
1641
  image_count = image_counts[result_set_name]
1642
+
1643
+ # Don't include line items for empty multi-category pages
1644
+ if image_count == 0 and \
1645
+ detection_categories_to_category_count[result_set_name] > 1:
1646
+ continue
1647
+
1504
1648
  if total_images == 0:
1505
1649
  image_fraction = -1
1506
1650
  else:
1507
1651
  image_fraction = image_count / total_images
1508
- index_page += '<a href="{}">{}</a> ({}, {:.1%})<br/>\n'.format(
1509
- filename,label,image_count,image_fraction)
1652
+
1653
+ # Write the line item for this category, including a link only if the
1654
+ # category is non-empty
1655
+ if image_count == 0:
1656
+ index_page += '{} ({}, {:.1%})<br/>\n'.format(
1657
+ label,image_count,image_fraction)
1658
+ else:
1659
+ index_page += '<a href="{}">{}</a> ({}, {:.1%})<br/>\n'.format(
1660
+ filename,label,image_count,image_fraction)
1510
1661
 
1511
1662
  index_page += '</div>\n'
1512
1663
 
1513
1664
  if has_classification_info:
1514
1665
  index_page += '<h3>Images of detected classes</h3>'
1515
- index_page += '<p>The same image might appear under multiple classes if multiple species were detected.</p>\n'
1666
+ index_page += '<p>The same image might appear under multiple classes ' + \
1667
+ 'if multiple species were detected.</p>\n'
1516
1668
  index_page += '<p>Classifications with confidence less than {:.1%} confidence are considered "unreliable".</p>\n'.format(
1517
1669
  options.classification_confidence_threshold)
1518
1670
  index_page += '<div class="contentdiv">\n'
@@ -1616,8 +1768,11 @@ def main():
1616
1768
  '--include_almost_detections', action='store_true',
1617
1769
  help='Include a separate category for images just above a second confidence threshold')
1618
1770
  parser.add_argument(
1619
- '--random_output_sort', action='store_true',
1620
- help='Sort output randomly (defaults to sorting by filename)')
1771
+ '--html_sort_order', type=str, default='filename',
1772
+ help='Sort order for output pages, should be one of [filename,confidence,random] (defaults to filename)')
1773
+ parser.add_argument(
1774
+ '--sort_by_confidence', action='store_true',
1775
+ help='Sort output in decreasing order by confidence (defaults to sorting by filename)')
1621
1776
  parser.add_argument(
1622
1777
  '--n_cores', type=int, default=1,
1623
1778
  help='Number of threads to use for rendering (default: 1)')
@@ -1633,13 +1788,17 @@ def main():
1633
1788
  '--open_output_file',
1634
1789
  action='store_true',
1635
1790
  help='Open the HTML output file when finished')
1791
+ parser.add_argument(
1792
+ '--max_figures_per_html_file',
1793
+ type=int, default=None,
1794
+ help='Maximum number of images to put on a single HTML page')
1636
1795
 
1637
1796
  if len(sys.argv[1:]) == 0:
1638
1797
  parser.print_help()
1639
1798
  parser.exit()
1640
1799
 
1641
1800
  args = parser.parse_args()
1642
- args.sort_html_by_filename = (not args.random_output_sort)
1801
+
1643
1802
  if args.n_cores != 1:
1644
1803
  assert (args.n_cores > 1), 'Illegal number of cores: {}'.format(args.n_cores)
1645
1804
  if args.parallelize_rendering_with_processes:
@@ -1647,7 +1806,7 @@ def main():
1647
1806
  args.parallelize_rendering = True
1648
1807
  args.parallelize_rendering_n_cores = args.n_cores
1649
1808
 
1650
- args_to_object(args, options)
1809
+ args_to_object(args, options)
1651
1810
 
1652
1811
  if args.no_separate_detections_by_category:
1653
1812
  options.separate_detections_by_category = False