PyPI - megadetector - Versions diffs - 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl - Mend

megadetector 5.0.5py3-none-any.whl → 5.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (132) hide show

api/batch_processing/data_preparation/manage_local_batch.py +302 -263
api/batch_processing/data_preparation/manage_video_batch.py +81 -2
api/batch_processing/postprocessing/add_max_conf.py +1 -0
api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
api/batch_processing/postprocessing/compare_batch_results.py +110 -60
api/batch_processing/postprocessing/load_api_results.py +56 -70
api/batch_processing/postprocessing/md_to_coco.py +1 -1
api/batch_processing/postprocessing/md_to_labelme.py +2 -1
api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
classification/prepare_classification_script.py +191 -191
data_management/coco_to_yolo.py +68 -45
data_management/databases/integrity_check_json_db.py +7 -5
data_management/generate_crops_from_cct.py +3 -3
data_management/get_image_sizes.py +8 -6
data_management/importers/add_timestamps_to_icct.py +79 -0
data_management/importers/animl_results_to_md_results.py +160 -0
data_management/importers/auckland_doc_test_to_json.py +4 -4
data_management/importers/auckland_doc_to_json.py +1 -1
data_management/importers/awc_to_json.py +5 -5
data_management/importers/bellevue_to_json.py +5 -5
data_management/importers/carrizo_shrubfree_2018.py +5 -5
data_management/importers/carrizo_trail_cam_2017.py +5 -5
data_management/importers/cct_field_adjustments.py +2 -3
data_management/importers/channel_islands_to_cct.py +4 -4
data_management/importers/ena24_to_json.py +5 -5
data_management/importers/helena_to_cct.py +10 -10
data_management/importers/idaho-camera-traps.py +12 -12
data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
data_management/importers/jb_csv_to_json.py +4 -4
data_management/importers/missouri_to_json.py +1 -1
data_management/importers/noaa_seals_2019.py +1 -1
data_management/importers/pc_to_json.py +5 -5
data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
data_management/importers/prepare_zsl_imerit.py +5 -5
data_management/importers/rspb_to_json.py +4 -4
data_management/importers/save_the_elephants_survey_A.py +5 -5
data_management/importers/save_the_elephants_survey_B.py +6 -6
data_management/importers/snapshot_safari_importer.py +9 -9
data_management/importers/snapshot_serengeti_lila.py +9 -9
data_management/importers/timelapse_csv_set_to_json.py +5 -7
data_management/importers/ubc_to_json.py +4 -4
data_management/importers/umn_to_json.py +4 -4
data_management/importers/wellington_to_json.py +1 -1
data_management/importers/wi_to_json.py +2 -2
data_management/importers/zamba_results_to_md_results.py +181 -0
data_management/labelme_to_coco.py +35 -7
data_management/labelme_to_yolo.py +229 -0
data_management/lila/add_locations_to_island_camera_traps.py +1 -1
data_management/lila/add_locations_to_nacti.py +147 -0
data_management/lila/create_lila_blank_set.py +474 -0
data_management/lila/create_lila_test_set.py +2 -1
data_management/lila/create_links_to_md_results_files.py +106 -0
data_management/lila/download_lila_subset.py +46 -21
data_management/lila/generate_lila_per_image_labels.py +23 -14
data_management/lila/get_lila_annotation_counts.py +17 -11
data_management/lila/lila_common.py +14 -11
data_management/lila/test_lila_metadata_urls.py +116 -0
data_management/ocr_tools.py +829 -0
data_management/resize_coco_dataset.py +13 -11
data_management/yolo_output_to_md_output.py +84 -12
data_management/yolo_to_coco.py +38 -20
detection/process_video.py +36 -14
detection/pytorch_detector.py +23 -8
detection/run_detector.py +76 -19
detection/run_detector_batch.py +178 -63
detection/run_inference_with_yolov5_val.py +326 -57
detection/run_tiled_inference.py +153 -43
detection/video_utils.py +34 -8
md_utils/ct_utils.py +172 -1
md_utils/md_tests.py +372 -51
md_utils/path_utils.py +167 -39
md_utils/process_utils.py +26 -7
md_utils/split_locations_into_train_val.py +215 -0
md_utils/string_utils.py +10 -0
md_utils/url_utils.py +0 -2
md_utils/write_html_image_list.py +9 -26
md_visualization/plot_utils.py +12 -8
md_visualization/visualization_utils.py +106 -7
md_visualization/visualize_db.py +16 -8
md_visualization/visualize_detector_output.py +208 -97
{megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
{megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
{megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
taxonomy_mapping/map_new_lila_datasets.py +43 -39
taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
taxonomy_mapping/preview_lila_taxonomy.py +27 -27
taxonomy_mapping/species_lookup.py +33 -13
taxonomy_mapping/taxonomy_csv_checker.py +7 -5
api/synchronous/api_core/yolov5/detect.py +0 -252
api/synchronous/api_core/yolov5/export.py +0 -607
api/synchronous/api_core/yolov5/hubconf.py +0 -146
api/synchronous/api_core/yolov5/models/__init__.py +0 -0
api/synchronous/api_core/yolov5/models/common.py +0 -738
api/synchronous/api_core/yolov5/models/experimental.py +0 -104
api/synchronous/api_core/yolov5/models/tf.py +0 -574
api/synchronous/api_core/yolov5/models/yolo.py +0 -338
api/synchronous/api_core/yolov5/train.py +0 -670
api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
api/synchronous/api_core/yolov5/utils/activations.py +0 -103
api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
api/synchronous/api_core/yolov5/utils/general.py +0 -1018
api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
api/synchronous/api_core/yolov5/utils/loss.py +0 -234
api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
api/synchronous/api_core/yolov5/utils/plots.py +0 -489
api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
api/synchronous/api_core/yolov5/val.py +0 -394
md_utils/matlab_porting_tools.py +0 -97
{megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
{megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0

api/batch_processing/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -23,13 +23,13 @@ import collections
 import copy
 import errno
 import io
-import itertools
 import os
 import sys
 import time
 import uuid
 import urllib
 import warnings
+import random
 from typing import Any, Dict, Iterable, Optional, Tuple
 from enum import IntEnum
@@ -53,6 +53,7 @@ from md_utils import path_utils
 from data_management.cct_json_utils import (CameraTrapJsonUtils, IndexedJsonDb)
 from api.batch_processing.postprocessing.load_api_results import load_api_results
 from md_utils.ct_utils import args_to_object
+from md_utils.ct_utils import invert_dictionary
 from detection.run_detector import get_typical_confidence_threshold_from_results
@@ -113,8 +114,18 @@ class PostProcessingOptions:
     # detections_animal, detections_person, detections_vehicle
     rendering_bypass_sets = []
-    # By default, choose a confidence threshold based on the detector version
+    # If this is None, choose a confidence threshold based on the detector version.
+    #
+    # This can either be a float or a dictionary mapping category names (not IDs) to
+    # thresholds.  The category "default" can be used to specify thresholds for
+    # other categories.  Currently the use of a dict here is not supported when
+    # ground truth is supplied.
     confidence_threshold = None
+    # Confidence threshold to apply to classification (not detection) results
+    #
+    # Only a float is supported here (unlike the "confidence_threshold" parameter, which
+    # can be a dict).
     classification_confidence_threshold = 0.5
     # Used for summary statistics only
@@ -134,13 +145,9 @@ class PostProcessingOptions:
     job_name_string = None
     model_version_string = None
-    # These should really be mutually exclusive, but I'm not enforcing this.
-    #
-    # Nothing bad happens if you set both to true; the confidence sort happens
-    # second.
-    sort_html_by_filename = True
-    sort_html_by_confidence = False
+    # Sort order for the output, should be one of "filename", "confidence", or "random"
+    html_sort_order = 'filename'
     link_images_to_originals = True
     # Optionally separate detections into categories (animal/vehicle/human)
@@ -164,6 +171,9 @@ class PostProcessingOptions:
     #
     # Currently only supported when ground truth is unavailable
     include_almost_detections = False
+    # Only a float is supported here (unlike the "confidence_threshold" parameter, which
+    # can be a dict).
     almost_detection_confidence_threshold = None
     # Control rendering parallelization
@@ -407,8 +417,7 @@ def render_bounding_boxes(
             image = None
             # return ''
-        # Render images to a flat folder... we can use os.sep here because we've
-        # already normalized paths
+        # Render images to a flat folder
         sample_name = res + '_' + path_utils.flatten_path(image_relative_path)
         fullpath = os.path.join(options.output_dir, res, sample_name)
@@ -429,12 +438,25 @@ def render_bounding_boxes(
                 vis_utils.render_db_bounding_boxes(ground_truth_boxes, gt_classes, image,
                                                    original_size=original_size,label_map=label_map,
                                                    thickness=4,expansion=4)
+            # render_detection_bounding_boxes expects either a float or a dict mapping
+            # category IDs to names.
+            if isinstance(options.confidence_threshold,float):
+                rendering_confidence_threshold = options.confidence_threshold
+            else:
+                category_ids = set()
+                for d in detections:
+                    category_ids.add(d['category'])
+                rendering_confidence_threshold = {}
+                for category_id in category_ids:
+                    rendering_confidence_threshold[category_id] = \
+                        get_threshold_for_category_id(category_id, options, detection_categories)
             vis_utils.render_detection_bounding_boxes(
                 detections, image,
                 label_map=detection_categories,
                 classification_label_map=classification_categories,
-                confidence_threshold=options.confidence_threshold,
+                confidence_threshold=rendering_confidence_threshold,
                 thickness=options.line_thickness,
                 expansion=options.box_expansion)
@@ -471,10 +493,13 @@ def render_bounding_boxes(
 def prepare_html_subpages(images_html, output_dir, options=None):
     """
-    Write out a series of html image lists, e.g. the fp/tp/fn/tn pages.
+    Write out a series of html image lists, e.g. the "detections" or "non-detections"
+    pages.
-    image_html is a dictionary mapping an html page name (e.g. "fp") to a list
-    of image structs friendly to write_html_image_list
+    image_html is a dictionary mapping an html page name (e.g. "detections_animal") to
+    a list of image structs friendly to write_html_image_list.
+    Returns a dictionary mapping category names to image counts.
     """
     if options is None:
@@ -486,7 +511,7 @@ def prepare_html_subpages(images_html, output_dir, options=None):
         image_counts[res] = len(array)
     # Optionally sort by filename before writing to html
-    if options.sort_html_by_filename:
+    if options.html_sort_order == 'filename':
         images_html_sorted = {}
         for res, array in images_html.items():
             sorted_array = sorted(array, key=lambda x: x['filename'])
@@ -494,18 +519,26 @@ def prepare_html_subpages(images_html, output_dir, options=None):
         images_html = images_html_sorted
     # Optionally sort by confidence before writing to html
-    if options.sort_html_by_confidence:
+    elif options.html_sort_order == 'confidence':
         images_html_sorted = {}
         for res, array in images_html.items():
             if not all(['max_conf' in d for d in array]):
-                print("Warning: some elements in the {} page don't have confidence values, can't sort by confidence".format(
-                    res))
+                print("Warning: some elements in the {} page don't have confidence values, can't sort by confidence".format(res))
             else:
                 sorted_array = sorted(array, key=lambda x: x['max_conf'], reverse=True)
                 images_html_sorted[res] = sorted_array
                 images_html = images_html_sorted
+    else:
+        assert options.html_sort_order == 'random',\
+            'Unrecognized sort order {}'.format(options.html_sort_order)
+        images_html_sorted = {}
+        for res, array in images_html.items():
+            sorted_array = random.sample(array,len(array))
+            images_html_sorted[res] = sorted_array
+        images_html = images_html_sorted
     # Write the individual HTML files
     for res, array in images_html.items():
@@ -513,24 +546,81 @@ def prepare_html_subpages(images_html, output_dir, options=None):
         html_image_list_options['maxFiguresPerHtmlFile'] = options.max_figures_per_html_file
         html_image_list_options['headerHtml'] = '<h1>{}</h1>'.format(res.upper())
-        write_html_image_list(
-            filename=os.path.join(output_dir, '{}.html'.format(res)),
-            images=array,
-            options=html_image_list_options)
+        # Don't write empty pages
+        if len(array) == 0:
+            continue
+        else:
+            write_html_image_list(
+                filename=os.path.join(output_dir, '{}.html'.format(res)),
+                images=array,
+                options=html_image_list_options)
     return image_counts
 # ...prepare_html_subpages()
-# Get unique categories above the threshold for this image
-def get_positive_categories(detections,options):
+# Determine the confidence threshold we should use for a specific category name
+def get_threshold_for_category_name(category_name,options):
+    if isinstance(options.confidence_threshold,float):
+        return options.confidence_threshold
+    else:
+        assert isinstance(options.confidence_threshold,dict), \
+            'confidence_threshold must either be a float or a dict'
+    if category_name in options.confidence_threshold:
+        return options.confidence_threshold[category_name]
+    else:
+        assert 'default' in options.confidence_threshold, \
+            'category {} not in confidence_threshold dict, and no default supplied'.format(
+                category_name)
+        return options.confidence_threshold['default']
+# Determine the confidence threshold we should use for a specific category ID
+#
+# detection_categories is a dict mapping category IDs to names.
+def get_threshold_for_category_id(category_id,options,detection_categories):
+    if isinstance(options.confidence_threshold,float):
+        return options.confidence_threshold
+    assert category_id in detection_categories, \
+        'Invalid category ID {}'.format(category_id)
+    category_name = detection_categories[category_id]
+    return get_threshold_for_category_name(category_name,options)
+# Get a sorted list of unique categories (as string IDs) above the threshold for this image
+#
+# "detection_categories" is a dict mapping category IDs to names.
+def get_positive_categories(detections,options,detection_categories):
     positive_categories = set()
     for d in detections:
-        if d['conf'] >= options.confidence_threshold:
+        threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
+        if d['conf'] >= threshold:
             positive_categories.add(d['category'])
     return sorted(positive_categories)
+# Determine whether any positive detections are present in the detection list
+# [detections].
+def has_positive_detection(detections,options,detection_categories):
+    found_positive_detection = False
+    for d in detections:
+        threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
+        if d['conf'] >= threshold:
+            found_positive_detection = True
+            break
+    return found_positive_detection
 # Render an image (with no ground truth information)
 #
 # Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
@@ -560,8 +650,12 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
     max_conf = file_info[1]
     detections = file_info[2]
+    # Determine whether any positive detections are present (using a threshold that
+    # may vary by category)
+    found_positive_detection = has_positive_detection(detections,options,detection_categories)
     detection_status = DetectionStatus.DS_UNASSIGNED
-    if max_conf >= options.confidence_threshold:
+    if found_positive_detection:
         detection_status = DetectionStatus.DS_POSITIVE
     else:
         if options.include_almost_detections:
@@ -574,7 +668,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
     if detection_status == DetectionStatus.DS_POSITIVE:
         if options.separate_detections_by_category:
-            positive_categories = tuple(get_positive_categories(detections,options))
+            positive_categories = tuple(get_positive_categories(detections,options,detection_categories))
             if positive_categories not in detection_categories_to_results_name:
                 raise ValueError('Error: {} not in category mapping (file {})'.format(
                     str(positive_categories),image_relative_path))
@@ -690,7 +784,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
               f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
         return None
-    detected = max_conf > options.confidence_threshold
+    detected = has_positive_detection(detections, options, detection_categories)
     if gt_presence and detected:
         if '_classification_accuracy' not in image.keys():
@@ -753,6 +847,10 @@ def process_batch_results(options: PostProcessingOptions
     ground_truth_indexed_db = None
+    if (options.ground_truth_json_file is not None):
+        assert (options.confidence_threshold is None) or (isinstance(confidence_threshold,float)), \
+            'Variable confidence thresholds are not supported when supplying ground truth'
     if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
         if options.separate_detections_by_category:
@@ -802,13 +900,13 @@ def process_batch_results(options: PostProcessingOptions
         if options.almost_detection_confidence_threshold < 0:
             options.almost_detection_confidence_threshold = 0
-    # Remove failed rows
+    # Remove rows with inference failures (typically due to corrupt images)
     n_failures = 0
     if 'failure' in detections_df.columns:
         n_failures = detections_df['failure'].count()
         print('Ignoring {} failed images'.format(n_failures))
         # Explicitly forcing a copy() operation here to suppress "trying to be set
-        # on a copy" # warnings (and associated risks) below.
+        # on a copy" warnings (and associated risks) below.
         detections_df = detections_df[detections_df['failure'].isna()].copy()
     assert other_fields is not None
@@ -823,33 +921,28 @@ def process_batch_results(options: PostProcessingOptions
             for k, v in classification_categories.items()
         }
-    # Add column 'pred_detection_label' to indicate predicted detection status,
-    # not separating out the classes
-    det_status = 'pred_detection_label'
-    if options.include_almost_detections:
-        detections_df[det_status] = DetectionStatus.DS_ALMOST
-        confidences = detections_df['max_detection_conf']
-        pos_mask = (confidences >= options.confidence_threshold)
-        detections_df.loc[pos_mask, det_status] = DetectionStatus.DS_POSITIVE
-        neg_mask = (confidences < options.almost_detection_confidence_threshold)
-        detections_df.loc[neg_mask, det_status] = DetectionStatus.DS_NEGATIVE
-    else:
-        detections_df[det_status] = np.where(
-            detections_df['max_detection_conf'] >= options.confidence_threshold,
-            DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE)
-    n_positives = sum(detections_df[det_status] == DetectionStatus.DS_POSITIVE)
+    # Count detections and almost-detections for reporting purposes
+    n_positives = 0
+    n_almosts = 0
+    for i_row,row in tqdm(detections_df.iterrows(),total=len(detections_df)):
+        detections = row['detections']
+        max_conf = row['max_detection_conf']
+        if has_positive_detection(detections, options, detection_categories):
+            n_positives += 1
+        elif (options.almost_detection_confidence_threshold is not None) and \
+             (max_conf >= options.almost_detection_confidence_threshold):
+            n_almosts += 1
     print(f'Finished loading and preprocessing {len(detections_df)} rows '
           f'from detector output, predicted {n_positives} positives.')
     if options.include_almost_detections:
-        n_almosts = sum(detections_df[det_status] == DetectionStatus.DS_ALMOST)
         print('...and {} almost-positives'.format(n_almosts))
-    ##%% Pull out descriptive metadata
+    ##%% Find descriptive metadata to include at the top of the page
     if options.job_name_string is not None:
         job_name_string = options.job_name_string
@@ -890,7 +983,7 @@ def process_batch_results(options: PostProcessingOptions
         print('Trimmed detection results to {} files'.format(len(detector_files)))
-    ##%% Sample images for visualization
+    ##%% (Optionally) sample from the full set of images
     images_to_visualize = detections_df
@@ -994,7 +1087,7 @@ def process_batch_results(options: PostProcessingOptions
             (precision_at_confidence_threshold + recall_at_confidence_threshold)
         print('At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}'.format(
-                options.confidence_threshold, precision_at_confidence_threshold,
+                str(options.confidence_threshold), precision_at_confidence_threshold,
                 recall_at_confidence_threshold, f1))
         ##%% Collect classification results, if they exist
@@ -1200,7 +1293,7 @@ def process_batch_results(options: PostProcessingOptions
         elapsed = time.time() - start_time
         # Map all the rendering results in the list rendering_results into the
-        # dictionary images_html
+        # dictionary images_html, which maps category names to lists of results
         image_rendered_count = 0
         for rendering_result in rendering_results:
             if rendering_result is None:
@@ -1250,7 +1343,7 @@ def process_batch_results(options: PostProcessingOptions
         </div>
         """.format(
             style_header,job_name_string,model_version_string,
-            image_count, options.confidence_threshold,
+            image_count, str(options.confidence_threshold),
             all_tp_count, all_tp_count/total_count,
             image_counts['tn'], image_counts['tn']/total_count,
             image_counts['fp'], image_counts['fp']/total_count,
@@ -1264,7 +1357,7 @@ def process_batch_results(options: PostProcessingOptions
             <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
             </div>
             """.format(
-                options.confidence_threshold, precision_at_confidence_threshold, recall_at_confidence_threshold,
+                str(options.confidence_threshold), precision_at_confidence_threshold, recall_at_confidence_threshold,
                 len(detections_df), pr_figure_relative_filename
            )
@@ -1330,41 +1423,67 @@ def process_batch_results(options: PostProcessingOptions
         # Accumulate html image structs (in the format expected by write_html_image_list)
         # for each category
         images_html = collections.defaultdict(list)
-        images_html['non_detections']
         # Add default entries by accessing them for the first time
-        # Maps detection categories - e.g. "human" - to result set names, e.g.
-        # "detections_human"
+        # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
+        # result set names, e.g. "detections_human", "detections_cat_truck".
         detection_categories_to_results_name = {}
+        # Keep track of which categories are single-class (e.g. "animal") and which are
+        # combinations (e.g. "animal_vehicle")
+        detection_categories_to_category_count = {}
+        # For the creation of a "non-detections" category
+        images_html['non_detections']
+        detection_categories_to_category_count['non_detections'] = 0
         if not options.separate_detections_by_category:
+            # For the creation of a "detections" category
             images_html['detections']
+            detection_categories_to_category_count['detections'] = 0
         else:
-            # Add a set of results for each category and combination of categories
-            keys = detection_categories.keys()
-            subsets = []
-            for L in range(1, len(keys)+1):
-                for subset in itertools.combinations(keys, L):
-                    subsets.append(subset)
-            for subset in subsets:
-                sorted_subset = tuple(sorted(subset))
+            # Add a set of results for each category and combination of categories, e.g.
+            # "detections_animal_vehicle".  When we're using this script for non-MegaDetector
+            # results, this can generate lots of categories, e.g. detections_bear_bird_cat_dog_pig.
+            # We'll keep that huge set of combinations in this map, but we'll only write
+            # out links for the ones that are non-empty.
+            used_combinations = set()
+            # row = images_to_visualize.iloc[0]
+            for i_row, row in images_to_visualize.iterrows():
+                detections_this_row = row['detections']
+                above_threshold_category_ids_this_row = set()
+                for detection in detections_this_row:
+                    threshold = get_threshold_for_category_id(detection['category'], options, detection_categories)
+                    if detection['conf'] >= threshold:
+                        above_threshold_category_ids_this_row.add(detection['category'])
+                if len(above_threshold_category_ids_this_row) == 0:
+                    continue
+                sorted_categories_this_row = tuple(sorted(above_threshold_category_ids_this_row))
+                used_combinations.add(sorted_categories_this_row)
+            for sorted_subset in used_combinations:
+                assert len(sorted_subset) > 0
                 results_name = 'detections'
                 for category_id in sorted_subset:
                     results_name = results_name + '_' + detection_categories[category_id]
                 images_html[results_name]
                 detection_categories_to_results_name[sorted_subset] = results_name
+                detection_categories_to_category_count[results_name] = len(sorted_subset)
         if options.include_almost_detections:
             images_html['almost_detections']
+            detection_categories_to_category_count['almost_detections'] = 0
         # Create output directories
         for res in images_html.keys():
             os.makedirs(os.path.join(output_dir, res), exist_ok=True)
         image_count = len(images_to_visualize)
-        has_classification_info = False
         # Each element will be a list of 2-tuples, with elements [collection name,html info struct]
         rendering_results = []
@@ -1421,6 +1540,9 @@ def process_batch_results(options: PostProcessingOptions
         elapsed = time.time() - start_time
+        # Do we have classification results in addition to detection results?
+        has_classification_info = False
         # Map all the rendering results in the list rendering_results into the
         # dictionary images_html
         image_rendered_count = 0
@@ -1435,7 +1557,7 @@ def process_batch_results(options: PostProcessingOptions
         # Prepare the individual html image files
         image_counts = prepare_html_subpages(images_html, output_dir, options)
         if image_rendered_count == 0:
             seconds_per_image = 0.0
         else:
@@ -1465,9 +1587,15 @@ def process_batch_results(options: PostProcessingOptions
             almost_detection_string = ' (&ldquo;almost detection&rdquo; threshold at {:.1%})'.format(
                 options.almost_detection_confidence_threshold)
+        confidence_threshold_string = ''
+        if isinstance(options.confidence_threshold,float):
+            confidence_threshold_string = '{:.1%}'.format(options.confidence_threshold)
+        else:
+            confidence_threshold_string = str(options.confidence_threshold)
         index_page = """<html>\n{}\n<body>\n
         <h2>Visualization of results for {}</h2>\n
-        <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above {:.1%} confidence{}.</p>\n
+        <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above confidence {}{}.</p>\n
         <div class="contentdiv">
         <p>Model version: {}</p>
@@ -1475,7 +1603,7 @@ def process_batch_results(options: PostProcessingOptions
         <h3>Sample images</h3>\n
         <div class="contentdiv">\n""".format(
-            style_header, job_name_string, image_count, len(detections_df), options.confidence_threshold,
+            style_header, job_name_string, image_count, len(detections_df), confidence_threshold_string,
             almost_detection_string, model_version_string)
         failure_string = ''
@@ -1491,7 +1619,17 @@ def process_batch_results(options: PostProcessingOptions
             friendly_name = friendly_name.capitalize()
             return friendly_name
-        for result_set_name in images_html.keys():
+        sorted_result_set_names = sorted(list(images_html.keys()))
+        result_set_name_to_count = {}
+        for result_set_name in sorted_result_set_names:
+            image_count = image_counts[result_set_name]
+            result_set_name_to_count[result_set_name] = image_count
+        sorted_result_set_names = sorted(sorted_result_set_names,
+                                         key=lambda x: result_set_name_to_count[x],
+                                         reverse=True)
+        for result_set_name in sorted_result_set_names:
             # Don't print classification classes here; we'll do that later with a slightly
             # different structure
@@ -1501,18 +1639,32 @@ def process_batch_results(options: PostProcessingOptions
             filename = result_set_name + '.html'
             label = result_set_name_to_friendly_name(result_set_name)
             image_count = image_counts[result_set_name]
+            # Don't include line items for empty multi-category pages
+            if image_count == 0 and \
+                detection_categories_to_category_count[result_set_name] > 1:
+                    continue
             if total_images == 0:
                 image_fraction = -1
             else:
                 image_fraction = image_count / total_images
-            index_page += '<a href="{}">{}</a> ({}, {:.1%})<br/>\n'.format(
-                filename,label,image_count,image_fraction)
+            # Write the line item for this category, including a link only if the
+            # category is non-empty
+            if image_count == 0:
+                index_page += '{} ({}, {:.1%})<br/>\n'.format(
+                    label,image_count,image_fraction)
+            else:
+                index_page += '<a href="{}">{}</a> ({}, {:.1%})<br/>\n'.format(
+                    filename,label,image_count,image_fraction)
         index_page += '</div>\n'
         if has_classification_info:
             index_page += '<h3>Images of detected classes</h3>'
-            index_page += '<p>The same image might appear under multiple classes if multiple species were detected.</p>\n'
+            index_page += '<p>The same image might appear under multiple classes ' + \
+                'if multiple species were detected.</p>\n'
             index_page += '<p>Classifications with confidence less than {:.1%} confidence are considered "unreliable".</p>\n'.format(
                 options.classification_confidence_threshold)
             index_page += '<div class="contentdiv">\n'
@@ -1616,8 +1768,11 @@ def main():
         '--include_almost_detections', action='store_true',
         help='Include a separate category for images just above a second confidence threshold')
     parser.add_argument(
-        '--random_output_sort', action='store_true',
-        help='Sort output randomly (defaults to sorting by filename)')
+        '--html_sort_order', type=str, default='filename',
+        help='Sort order for output pages, should be one of [filename,confidence,random] (defaults to filename)')
+    parser.add_argument(
+        '--sort_by_confidence', action='store_true',
+        help='Sort output in decreasing order by confidence (defaults to sorting by filename)')
     parser.add_argument(
         '--n_cores', type=int, default=1,
         help='Number of threads to use for rendering (default: 1)')
@@ -1633,13 +1788,17 @@ def main():
         '--open_output_file',
         action='store_true',
         help='Open the HTML output file when finished')
+    parser.add_argument(
+        '--max_figures_per_html_file',
+        type=int, default=None,
+        help='Maximum number of images to put on a single HTML page')
     if len(sys.argv[1:]) == 0:
         parser.print_help()
         parser.exit()
     args = parser.parse_args()
-    args.sort_html_by_filename = (not args.random_output_sort)
     if args.n_cores != 1:
         assert (args.n_cores > 1), 'Illegal number of cores: {}'.format(args.n_cores)
         if args.parallelize_rendering_with_processes:
@@ -1647,7 +1806,7 @@ def main():
         args.parallelize_rendering = True
         args.parallelize_rendering_n_cores = args.n_cores
-    args_to_object(args, options)
+    args_to_object(args, options)
     if args.no_separate_detections_by_category:
         options.separate_detections_by_category = False

megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.5py3-none-any.whl → 5.0.7py3-none-any.whl