PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

api/batch_processing/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -1,20 +1,21 @@
-########
-#
-# postprocess_batch_results.py
-#
-# Given a .json or .csv file representing the output from the batch detection API,
-# do one or more of the following:
-#
-# * Evaluate detector precision/recall, optionally rendering results (requires
-#   ground truth)
-# * Sample true/false positives/negatives and render to HTML (requires ground
-#   truth)
-# * Sample detections/non-detections and render to HTML (when ground truth isn't
-#   available)
-#
-# Ground truth, if available, must be in the COCO Camera Traps format.
-#
-########
+"""
+postprocess_batch_results.py
+Given a .json or .csv file containing MD results, do one or more of the following:
+* Sample detections/non-detections and render to HTML (when ground truth isn't
+  available) (this is 99.9% of what this module is for)
+* Evaluate detector precision/recall, optionally rendering results (requires
+  ground truth)
+* Sample true/false positives/negatives and render to HTML (requires ground
+  truth)
+Ground truth, if available, must be in COCO Camera Traps format:
+https://github.com/agentmorris/MegaDetector/blob/main/data_management/README.md#coco-camera-traps-format
+"""
 #%% Constants and imports
@@ -27,11 +28,9 @@ import os
 import sys
 import time
 import uuid
-import urllib
 import warnings
 import random
-from typing import Any, Dict, Iterable, Optional, Tuple
 from enum import IntEnum
 from multiprocessing.pool import ThreadPool
 from multiprocessing.pool import Pool
@@ -52,8 +51,7 @@ from md_utils.write_html_image_list import write_html_image_list
 from md_utils import path_utils
 from data_management.cct_json_utils import (CameraTrapJsonUtils, IndexedJsonDb)
 from api.batch_processing.postprocessing.load_api_results import load_api_results
-from md_utils.ct_utils import args_to_object
-from md_utils.ct_utils import invert_dictionary
+from md_utils.ct_utils import args_to_object, sets_overlap
 from detection.run_detector import get_typical_confidence_threshold_from_results
@@ -65,136 +63,163 @@ warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
 DEFAULT_NEGATIVE_CLASSES = ['empty']
 DEFAULT_UNKNOWN_CLASSES = ['unknown', 'unlabeled', 'ambiguous']
-def has_overlap(set1: Iterable, set2: Iterable) -> bool:
-    """
-    Check whether two sets overlap.
-    """
-    return not set(set1).isdisjoint(set(set2))
 # Make sure there is no overlap between the two sets, because this will cause
 # issues in the code
-assert not has_overlap(DEFAULT_NEGATIVE_CLASSES, DEFAULT_UNKNOWN_CLASSES), (
+assert not sets_overlap(DEFAULT_NEGATIVE_CLASSES, DEFAULT_UNKNOWN_CLASSES), (
         'Default negative and unknown classes cannot overlap.')
 class PostProcessingOptions:
+    """
+    Options used to parameterize process_batch_results().
+    """
     ### Required inputs
-    api_output_file = ''
+    #: MD results .json file to process
+    md_results_file = ''
+    #: Folder to which we should write HTML output
     output_dir = ''
     ### Options
-    # Can be a folder or a SAS URL
+    #: Folder where images live (filenames in [md_results_file] should be relative to this folder)
     image_base_dir = '.'
-    ground_truth_json_file = ''
     ## These apply only when we're doing ground-truth comparisons
-    # Classes we'll treat as negative
-    #
-    # Include the token "#NO_LABELS#" to indicate that an image with no annotations
-    # should be considered empty.
+    #: Optional .json file containing ground truth information
+    ground_truth_json_file = ''
+    #: Classes we'll treat as negative
+    #:
+    #: Include the token "#NO_LABELS#" to indicate that an image with no annotations
+    #: should be considered empty.
     negative_classes = DEFAULT_NEGATIVE_CLASSES
-    # Classes we'll treat as neither positive nor negative
+    #: Classes we'll treat as neither positive nor negative
     unlabeled_classes = DEFAULT_UNKNOWN_CLASSES
-    # A list of output sets that we should count, but not render images for.
-    #
-    # Typically used to preview sets with lots of empties, where you don't want to
-    # subset but also don't want to render 100,000 empty images.
-    #
-    # detections, non_detections
-    # detections_animal, detections_person, detections_vehicle
+    #: A list of output sets that we should count, but not render images for.
+    #:
+    #: Typically used to preview sets with lots of empties, where you don't want to
+    #: subset but also don't want to render 100,000 empty images.
+    #:
+    #: detections, non_detections
+    #: detections_animal, detections_person, detections_vehicle
     rendering_bypass_sets = []
-    # If this is None, choose a confidence threshold based on the detector version.
-    #
-    # This can either be a float or a dictionary mapping category names (not IDs) to
-    # thresholds.  The category "default" can be used to specify thresholds for
-    # other categories.  Currently the use of a dict here is not supported when
-    # ground truth is supplied.
+    #: If this is None, choose a confidence threshold based on the detector version.
+    #:
+    #: This can either be a float or a dictionary mapping category names (not IDs) to
+    #: thresholds.  The category "default" can be used to specify thresholds for
+    #: other categories.  Currently the use of a dict here is not supported when
+    #: ground truth is supplied.
     confidence_threshold = None
-    # Confidence threshold to apply to classification (not detection) results
-    #
-    # Only a float is supported here (unlike the "confidence_threshold" parameter, which
-    # can be a dict).
+    #: Confidence threshold to apply to classification (not detection) results
+    #:
+    #: Only a float is supported here (unlike the "confidence_threshold" parameter, which
+    #: can be a dict).
     classification_confidence_threshold = 0.5
-    # Used for summary statistics only
+    #: Used for summary statistics only
     target_recall = 0.9
-    # Number of images to sample, -1 for "all images"
+    #: Number of images to sample, -1 for "all images"
     num_images_to_sample = 500
-    # Random seed for sampling, or None
-    sample_seed: Optional[int] = 0 # None
+    #: Random seed for sampling, or None
+    sample_seed = 0 # None
+    #: Image width for images in the HTML output
     viz_target_width = 800
+    #: Line width (in pixels) for rendering detections
     line_thickness = 4
+    #: Box expansion (in pixels) for rendering detections
     box_expansion = 0
+    #: Job name to include in big letters in the output HTML
     job_name_string = None
+    #: Model version string to include in the output HTML
     model_version_string = None
-    # Sort order for the output, should be one of "filename", "confidence", or "random"
+    #: Sort order for the output, should be one of "filename", "confidence", or "random"
     html_sort_order = 'filename'
+    #: If True, images in the output HTML will be links back to the original images
     link_images_to_originals = True
-    # Optionally separate detections into categories (animal/vehicle/human)
-    #
-    # Currently only supported when ground truth is unavailable
+    #: Optionally separate detections into categories (animal/vehicle/human)
+    #:
+    #: Currently only supported when ground truth is unavailable
     separate_detections_by_category = True
-    # Optionally replace one or more strings in filenames with other strings;
-    # useful for taking a set of results generated for one folder structure
-    # and applying them to a slightly different folder structure.
+    #: Optionally replace one or more strings in filenames with other strings;
+    #: useful for taking a set of results generated for one folder structure
+    #: and applying them to a slightly different folder structure.
     api_output_filename_replacements = {}
+    #: Optionally replace one or more strings in filenames with other strings;
+    #: useful for taking a set of results generated for one folder structure
+    #: and applying them to a slightly different folder structure.
     ground_truth_filename_replacements = {}
-    # Allow bypassing API output loading when operating on previously-loaded
-    # results
-    api_detection_results: Optional[pd.DataFrame] = None
-    api_other_fields: Optional[Dict[str, Any]] = None
-    # Should we also split out a separate report about the detections that were
-    # just below our main confidence threshold?
-    #
-    # Currently only supported when ground truth is unavailable
+    #: Allow bypassing API output loading when operating on previously-loaded
+    #: results.  If present, this is a Pandas DataFrame.  Almost never useful.
+    api_detection_results = None
+    #: Allow bypassing API output loading when operating on previously-loaded
+    #: results.  If present, this is a str --> obj dict.  Almost never useful.
+    api_other_fields = None
+    #: Should we also split out a separate report about the detections that were
+    #: just below our main confidence threshold?
+    #:
+    #: Currently only supported when ground truth is unavailable.
     include_almost_detections = False
-    # Only a float is supported here (unlike the "confidence_threshold" parameter, which
-    # can be a dict).
+    #: Only a float is supported here (unlike the "confidence_threshold" parameter, which
+    #: can be a dict).
     almost_detection_confidence_threshold = None
-    # Control rendering parallelization
-    parallelize_rendering_n_cores: Optional[int] = 100
-    parallelize_rendering_with_threads = True
+    #: Enable/disable rendering parallelization
     parallelize_rendering = False
+    #: Number of threads/processes to use for rendering parallelization
+    parallelize_rendering_n_cores = 25
+    #: Whether to use threads (True) or processes (False) for rendering parallelization
+    parallelize_rendering_with_threads = True
+    #: When classification results are present, should be sort alphabetically by class name (False)
+    #: or in descending order by frequency (True)?
     sort_classification_results_by_count = False
-    # Should we split individual pages up into smaller pages if there are more than
-    # N images?
+    #: Should we split individual pages up into smaller pages if there are more than
+    #: N images?
     max_figures_per_html_file = None
 # ...PostProcessingOptions
 class PostProcessingResults:
+    """
+    Return format from process_batch_results
+    """
+    #: HTML file to which preview information was written
     output_html_file = ''
-    api_detection_results: Optional[pd.DataFrame] = None
-    api_other_fields: Optional[Dict[str, Any]] = None
+    #: Pandas Dataframe containing detection results
+    api_detection_results = None
+    #: str --> obj dictionary containing other information loaded from the results file
+    api_other_fields = None
 ##%% Helper classes and functions
@@ -203,6 +228,8 @@ class DetectionStatus(IntEnum):
     """
     Flags used to mark images as positive or negative for P/R analysis
     (according to ground truth and/or detector output)
+    :meta private:
     """
     DS_NEGATIVE = 0
@@ -225,11 +252,9 @@ class DetectionStatus(IntEnum):
     DS_ALMOST = 5
-def mark_detection_status(
-        indexed_db: IndexedJsonDb,
-        negative_classes: Iterable[str] = DEFAULT_NEGATIVE_CLASSES,
-        unknown_classes: Iterable[str] = DEFAULT_UNKNOWN_CLASSES
-        ) -> Tuple[int, int, int, int]:
+def _mark_detection_status(indexed_db,
+                           negative_classes=DEFAULT_NEGATIVE_CLASSES,
+                           unknown_classes=DEFAULT_UNKNOWN_CLASSES):
     """
     For each image in indexed_db.db['images'], add a '_detection_status' field
     to indicate whether to treat this image as positive, negative, ambiguous,
@@ -261,8 +286,8 @@ def mark_detection_status(
         # - unknown / unassigned-type labels
         # - negative-type labels
         # - positive labels (i.e., labels that are neither unknown nor negative)
-        has_unknown_labels = has_overlap(category_names, unknown_classes)
-        has_negative_labels = has_overlap(category_names, negative_classes)
+        has_unknown_labels = sets_overlap(category_names, unknown_classes)
+        has_negative_labels = sets_overlap(category_names, negative_classes)
         has_positive_labels = 0 < len(category_names - (unknown_classes | negative_classes))
         # assert has_unknown_labels is False, '{} has unknown labels'.format(annotations)
@@ -317,23 +342,27 @@ def mark_detection_status(
     return n_negative, n_positive, n_unknown, n_ambiguous
-# ...mark_detection_status()
+# ..._mark_detection_status()
-def is_sas_url(s: str) -> bool:
+def is_sas_url(s) -> bool:
     """
     Placeholder for a more robust way to verify that a link is a SAS URL.
     99.999% of the time this will suffice for what we're using it for right now.
+    :meta private:
     """
     return (s.startswith(('http://', 'https://')) and ('core.windows.net' in s)
             and ('?' in s))
-def relative_sas_url(folder_url: str, relative_path: str) -> Optional[str]:
+def relative_sas_url(folder_url, relative_path):
     """
     Given a container-level or folder-level SAS URL, create a SAS URL to the
     specified relative path.
+    :meta private:
     """
     relative_path = relative_path.replace('%','%25')
@@ -351,7 +380,7 @@ def relative_sas_url(folder_url: str, relative_path: str) -> Optional[str]:
     return tokens[0] + relative_path + '?' + tokens[1]
-def render_bounding_boxes(
+def _render_bounding_boxes(
         image_base_dir,
         image_relative_path,
         display_name,
@@ -363,6 +392,9 @@ def render_bounding_boxes(
         options=None):
     """
     Renders detection bounding boxes on a single image.
+    This is an internal function; if you want tools for rendering boxes on images, see
+    md_visualization.visualization_utils.
     The source image is:
@@ -381,6 +413,8 @@ def render_bounding_boxes(
     Returns the html info struct for this image in the format that's used for
     write_html_image_list.
+    :meta private:
     """
     if options is None:
@@ -450,7 +484,7 @@ def render_bounding_boxes(
                 rendering_confidence_threshold = {}
                 for category_id in category_ids:
                     rendering_confidence_threshold[category_id] = \
-                        get_threshold_for_category_id(category_id, options, detection_categories)
+                        _get_threshold_for_category_id(category_id, options, detection_categories)
             vis_utils.render_detection_bounding_boxes(
                 detections, image,
@@ -484,14 +518,21 @@ def render_bounding_boxes(
     # Optionally add links back to the original images
     if options.link_images_to_originals and (image_full_path is not None):
-        info['linkTarget'] = urllib.parse.quote(image_full_path)
+        # Handling special characters in links has been pushed down into
+        # write_html_image_list
+        #
+        # link_target = image_full_path.replace('\\','/')
+        # link_target  = urllib.parse.quote(link_target)
+        link_target = image_full_path
+        info['linkTarget'] = link_target
     return info
-# ...render_bounding_boxes
+# ..._render_bounding_boxes
-def prepare_html_subpages(images_html, output_dir, options=None):
+def _prepare_html_subpages(images_html, output_dir, options=None):
     """
     Write out a series of html image lists, e.g. the "detections" or "non-detections"
     pages.
@@ -557,11 +598,13 @@ def prepare_html_subpages(images_html, output_dir, options=None):
     return image_counts
-# ...prepare_html_subpages()
+# ..._prepare_html_subpages()
-# Determine the confidence threshold we should use for a specific category name
-def get_threshold_for_category_name(category_name,options):
+def _get_threshold_for_category_name(category_name,options):
+    """
+    Determines the confidence threshold we should use for a specific category name.
+    """
     if isinstance(options.confidence_threshold,float):
         return options.confidence_threshold
@@ -580,10 +623,12 @@ def get_threshold_for_category_name(category_name,options):
         return options.confidence_threshold['default']
-# Determine the confidence threshold we should use for a specific category ID
-#
-# detection_categories is a dict mapping category IDs to names.
-def get_threshold_for_category_id(category_id,options,detection_categories):
+def _get_threshold_for_category_id(category_id,options,detection_categories):
+    """
+    Determines the confidence threshold we should use for a specific category ID.
+    [detection_categories] is a dict mapping category IDs to names.
+    """
     if isinstance(options.confidence_threshold,float):
         return options.confidence_threshold
@@ -593,66 +638,73 @@ def get_threshold_for_category_id(category_id,options,detection_categories):
     category_name = detection_categories[category_id]
-    return get_threshold_for_category_name(category_name,options)
+    return _get_threshold_for_category_name(category_name,options)
+def _get_positive_categories(detections,options,detection_categories):
+    """
+    Gets a sorted list of unique categories (as string IDs) above the threshold for this image
+    [detection_categories] is a dict mapping category IDs to names.
+    """
-# Get a sorted list of unique categories (as string IDs) above the threshold for this image
-#
-# "detection_categories" is a dict mapping category IDs to names.
-def get_positive_categories(detections,options,detection_categories):
     positive_categories = set()
     for d in detections:
-        threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
+        threshold = _get_threshold_for_category_id(d['category'], options, detection_categories)
         if d['conf'] >= threshold:
             positive_categories.add(d['category'])
     return sorted(positive_categories)
-# Determine whether any positive detections are present in the detection list
-# [detections].
-def has_positive_detection(detections,options,detection_categories):
+def _has_positive_detection(detections,options,detection_categories):
+    """
+    Determines whether any positive detections are present in the detection list
+    [detections].
+    """
     found_positive_detection = False
     for d in detections:
-        threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
+        threshold = _get_threshold_for_category_id(d['category'], options, detection_categories)
         if d['conf'] >= threshold:
             found_positive_detection = True
             break
     return found_positive_detection
-# Render an image (with no ground truth information)
-#
-# Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
-# and  the second is a dict of information needed for rendering.  E.g.:
-#
-# [['detections_animal',
-# {
-#  'filename': 'detections_animal/detections_animal_blah~01060415.JPG',
-#  'title': '<b>Result type</b>: detections_animal,
-#            <b>Image</b>: blah\\01060415.JPG,
-#            <b>Max conf</b>: 0.897',
-#   'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5',
-#   'linkTarget': 'full_path_to_%5C01060415.JPG'
-# }]]
-#
-# When no classification data is present, this list will always be length-1.  When
-# classification data is present, an image may appear in multiple categories.
-#
-# Populates the 'max_conf' field of the first element of the list.
-#
-# Returns None if there are any errors.
-def render_image_no_gt(file_info,detection_categories_to_results_name,
+def _render_image_no_gt(file_info,detection_categories_to_results_name,
                        detection_categories,classification_categories,
                        options):
+    """
+    Renders an image (with no ground truth information)
+    Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
+    and  the second is a dict of information needed for rendering.  E.g.:
+    [['detections_animal',
+    {
+     'filename': 'detections_animal/detections_animal_blah~01060415.JPG',
+     'title': '<b>Result type</b>: detections_animal,
+               <b>Image</b>: blah\\01060415.JPG,
+               <b>Max conf</b>: 0.897',
+      'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5',
+      'linkTarget': 'full_path_to_%5C01060415.JPG'
+    }]]
+    When no classification data is present, this list will always be length-1.  When
+    classification data is present, an image may appear in multiple categories.
+    Populates the 'max_conf' field of the first element of the list.
+    Returns None if there are any errors.
+    """
     image_relative_path = file_info[0]
     max_conf = file_info[1]
     detections = file_info[2]
     # Determine whether any positive detections are present (using a threshold that
     # may vary by category)
-    found_positive_detection = has_positive_detection(detections,options,detection_categories)
+    found_positive_detection = _has_positive_detection(detections,options,detection_categories)
     detection_status = DetectionStatus.DS_UNASSIGNED
     if found_positive_detection:
@@ -668,7 +720,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
     if detection_status == DetectionStatus.DS_POSITIVE:
         if options.separate_detections_by_category:
-            positive_categories = tuple(get_positive_categories(detections,options,detection_categories))
+            positive_categories = tuple(_get_positive_categories(detections,options,detection_categories))
             if positive_categories not in detection_categories_to_results_name:
                 raise ValueError('Error: {} not in category mapping (file {})'.format(
                     str(positive_categories),image_relative_path))
@@ -690,7 +742,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
         rendering_options.confidence_threshold = \
             rendering_options.almost_detection_confidence_threshold
-    rendered_image_html_info = render_bounding_boxes(
+    rendered_image_html_info = _render_bounding_boxes(
         image_base_dir=options.image_base_dir,
         image_relative_path=image_relative_path,
         display_name=display_name,
@@ -738,18 +790,20 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
         image_result[0][1]['max_conf'] = max_conf
-    # ...if we got valid rendering info back from render_bounding_boxes()
+    # ...if we got valid rendering info back from _render_bounding_boxes()
     return image_result
-# ...def render_image_no_gt()
+# ...def _render_image_no_gt()
-# Render an image with ground truth information.  See render_image_no_gt for return
-# data format.
-def render_image_with_gt(file_info,ground_truth_indexed_db,
+def _render_image_with_gt(file_info,ground_truth_indexed_db,
                          detection_categories,classification_categories,options):
+    """
+    Render an image with ground truth information.  See _render_image_no_gt for return
+    data format.
+    """
     image_relative_path = file_info[0]
     max_conf = file_info[1]
     detections = file_info[2]
@@ -775,7 +829,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
     gt_presence = bool(gt_status)
-    gt_classes = CameraTrapJsonUtils.annotations_to_classnames(
+    gt_classes = CameraTrapJsonUtils.annotations_to_class_names(
         annotations, ground_truth_indexed_db.cat_id_to_name)
     gt_class_summary = ','.join(gt_classes)
@@ -784,7 +838,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
               f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
         return None
-    detected = has_positive_detection(detections, options, detection_categories)
+    detected = _has_positive_detection(detections, options, detection_categories)
     if gt_presence and detected:
         if '_classification_accuracy' not in image.keys():
@@ -804,7 +858,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
         res.upper(), str(gt_presence), gt_class_summary,
         max_conf * 100, image_relative_path)
-    rendered_image_html_info = render_bounding_boxes(
+    rendered_image_html_info = _render_bounding_boxes(
         image_base_dir=options.image_base_dir,
         image_relative_path=image_relative_path,
         display_name=display_name,
@@ -823,14 +877,35 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
     return image_result
-# ...def render_image_with_gt()
+# ...def _render_image_with_gt()
 #%% Main function
-def process_batch_results(options: PostProcessingOptions
-                          ) -> PostProcessingResults:
+def process_batch_results(options):
+    """
+    Given a .json or .csv file containing MD results, do one or more of the following:
+    * Sample detections/non-detections and render to HTML (when ground truth isn't
+      available) (this is 99.9% of what this module is for)
+    * Evaluate detector precision/recall, optionally rendering results (requires
+      ground truth)
+    * Sample true/false positives/negatives and render to HTML (requires ground
+      truth)
+    Ground truth, if available, must be in COCO Camera Traps format:
+    https://github.com/agentmorris/MegaDetector/blob/main/data_management/README.md#coco-camera-traps-format
+    Args:
+        options (PostProcessingOptions): everything we need to render a preview/analysis for
+            this set of results; see the PostProcessingOptions class for details.
+    Returns:
+        PostProcessingResults: information about the results/preview, most importantly the HTML filename
+            of the output.  See the PostProcessingResults class for details.
+    """
     ppresults = PostProcessingResults()
     ##%% Expand some options for convenience
@@ -847,8 +922,8 @@ def process_batch_results(options: PostProcessingOptions
     ground_truth_indexed_db = None
-    if (options.ground_truth_json_file is not None):
-        assert (options.confidence_threshold is None) or (isinstance(confidence_threshold,float)), \
+    if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
+        assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
             'Variable confidence thresholds are not supported when supplying ground truth'
     if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
@@ -863,7 +938,7 @@ def process_batch_results(options: PostProcessingOptions
             filename_replacements=options.ground_truth_filename_replacements)
         # Mark images in the ground truth as positive or negative
-        n_negative, n_positive, n_unknown, n_ambiguous = mark_detection_status(
+        n_negative, n_positive, n_unknown, n_ambiguous = _mark_detection_status(
             ground_truth_indexed_db, negative_classes=options.negative_classes,
             unknown_classes=options.unlabeled_classes)
         print(f'Finished loading and indexing ground truth: {n_negative} '
@@ -876,7 +951,7 @@ def process_batch_results(options: PostProcessingOptions
     # If the caller hasn't supplied results, load them
     if options.api_detection_results is None:
         detections_df, other_fields = load_api_results(
-            options.api_output_file, normalize_paths=True,
+            options.api_output_file, force_forward_slashes=True,
             filename_replacements=options.api_output_filename_replacements)
         ppresults.api_detection_results = detections_df
         ppresults.api_other_fields = other_fields
@@ -895,7 +970,10 @@ def process_batch_results(options: PostProcessingOptions
         print('Choosing default confidence threshold of {} based on MD version'.format(
             options.confidence_threshold))
-    if options.almost_detection_confidence_threshold is None:
+    if options.almost_detection_confidence_threshold is None and options.include_almost_detections:
+        assert isinstance(options.confidence_threshold,float), \
+            'If you are using a dictionary of confidence thresholds and almost-detections are enabled, ' + \
+            'you need to supply a threshold for almost detections.'
         options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
         if options.almost_detection_confidence_threshold < 0:
             options.almost_detection_confidence_threshold = 0
@@ -929,7 +1007,7 @@ def process_batch_results(options: PostProcessingOptions
         detections = row['detections']
         max_conf = row['max_detection_conf']
-        if has_positive_detection(detections, options, detection_categories):
+        if _has_positive_detection(detections, options, detection_categories):
             n_positives += 1
         elif (options.almost_detection_confidence_threshold is not None) and \
              (max_conf >= options.almost_detection_confidence_threshold):
@@ -1087,7 +1165,7 @@ def process_batch_results(options: PostProcessingOptions
             (precision_at_confidence_threshold + recall_at_confidence_threshold)
         print('At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}'.format(
-                str(options.confidence_threshold), precision_at_confidence_threshold,
+                options.confidence_threshold, precision_at_confidence_threshold,
                 recall_at_confidence_threshold, f1))
         ##%% Collect classification results, if they exist
@@ -1279,7 +1357,7 @@ def process_batch_results(options: PostProcessingOptions
                                                            worker_string))
             rendering_results = list(tqdm(pool.imap(
-                partial(render_image_with_gt,
+                partial(_render_image_with_gt,
                         ground_truth_indexed_db=ground_truth_indexed_db,
                         detection_categories=detection_categories,
                         classification_categories=classification_categories,
@@ -1287,9 +1365,10 @@ def process_batch_results(options: PostProcessingOptions
                 files_to_render), total=len(files_to_render)))
         else:
             for file_info in tqdm(files_to_render):
-                rendering_results.append(render_image_with_gt(
+                rendering_results.append(_render_image_with_gt(
                     file_info,ground_truth_indexed_db,
-                    detection_categories,classification_categories))
+                    detection_categories,classification_categories,
+                    options=options))
         elapsed = time.time() - start_time
         # Map all the rendering results in the list rendering_results into the
@@ -1303,7 +1382,7 @@ def process_batch_results(options: PostProcessingOptions
                 images_html[assignment[0]].append(assignment[1])
         # Prepare the individual html image files
-        image_counts = prepare_html_subpages(images_html, output_dir, options)
+        image_counts = _prepare_html_subpages(images_html, output_dir, options)
         print('{} images rendered (of {})'.format(image_rendered_count,image_count))
@@ -1319,6 +1398,12 @@ def process_batch_results(options: PostProcessingOptions
             image_counts['tp']
         )
+        confidence_threshold_string = ''
+        if isinstance(options.confidence_threshold,float):
+            confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
+        else:
+            confidence_threshold_string = str(options.confidence_threshold)
         index_page = """<html>
         {}
         <body>
@@ -1333,7 +1418,7 @@ def process_batch_results(options: PostProcessingOptions
         <h3>Sample images</h3>
         <div class="contentdiv">
-        <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p>
+        <p>A sample of {} images, annotated with detections above confidence {}.</p>
         <a href="tp.html">True positives (TP)</a> ({}) ({:0.1%})<br/>
         CLASSIFICATION_PLACEHOLDER_1
         <a href="tn.html">True negatives (TN)</a> ({}) ({:0.1%})<br/>
@@ -1343,7 +1428,7 @@ def process_batch_results(options: PostProcessingOptions
         </div>
         """.format(
             style_header,job_name_string,model_version_string,
-            image_count, str(options.confidence_threshold),
+            image_count, confidence_threshold_string,
             all_tp_count, all_tp_count/total_count,
             image_counts['tn'], image_counts['tn']/total_count,
             image_counts['fp'], image_counts['fp']/total_count,
@@ -1353,11 +1438,11 @@ def process_batch_results(options: PostProcessingOptions
         index_page += """
             <h3>Detection results</h3>
             <div class="contentdiv">
-            <p>At a confidence threshold of {:0.1%}, precision={:0.1%}, recall={:0.1%}</p>
+            <p>At a confidence threshold of {}, precision={:0.1%}, recall={:0.1%}</p>
             <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
             </div>
             """.format(
-                str(options.confidence_threshold), precision_at_confidence_threshold, recall_at_confidence_threshold,
+                confidence_threshold_string, precision_at_confidence_threshold, recall_at_confidence_threshold,
                 len(detections_df), pr_figure_relative_filename
            )
@@ -1457,7 +1542,7 @@ def process_batch_results(options: PostProcessingOptions
                 detections_this_row = row['detections']
                 above_threshold_category_ids_this_row = set()
                 for detection in detections_this_row:
-                    threshold = get_threshold_for_category_id(detection['category'], options, detection_categories)
+                    threshold = _get_threshold_for_category_id(detection['category'], options, detection_categories)
                     if detection['conf'] >= threshold:
                         above_threshold_category_ids_this_row.add(detection['category'])
                 if len(above_threshold_category_ids_this_row) == 0:
@@ -1520,11 +1605,11 @@ def process_batch_results(options: PostProcessingOptions
                 print('Rendering images with {} {}'.format(options.parallelize_rendering_n_cores,
                                                            worker_string))
-            # render_image_no_gt(file_info,detection_categories_to_results_name,
+            # _render_image_no_gt(file_info,detection_categories_to_results_name,
             # detection_categories,classification_categories)
             rendering_results = list(tqdm(pool.imap(
-                partial(render_image_no_gt,
+                partial(_render_image_no_gt,
                         detection_categories_to_results_name=detection_categories_to_results_name,
                         detection_categories=detection_categories,
                         classification_categories=classification_categories,
@@ -1532,7 +1617,7 @@ def process_batch_results(options: PostProcessingOptions
                         files_to_render), total=len(files_to_render)))
         else:
             for file_info in tqdm(files_to_render):
-                rendering_results.append(render_image_no_gt(file_info,
+                rendering_results.append(_render_image_no_gt(file_info,
                                                             detection_categories_to_results_name,
                                                             detection_categories,
                                                             classification_categories,
@@ -1556,7 +1641,7 @@ def process_batch_results(options: PostProcessingOptions
                 images_html[assignment[0]].append(assignment[1])
         # Prepare the individual html image files
-        image_counts = prepare_html_subpages(images_html, output_dir, options)
+        image_counts = _prepare_html_subpages(images_html, output_dir, options)
         if image_rendered_count == 0:
             seconds_per_image = 0.0
@@ -1589,7 +1674,7 @@ def process_batch_results(options: PostProcessingOptions
         confidence_threshold_string = ''
         if isinstance(options.confidence_threshold,float):
-            confidence_threshold_string = '{:.1%}'.format(options.confidence_threshold)
+            confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
         else:
             confidence_threshold_string = str(options.confidence_threshold)
@@ -1711,18 +1796,17 @@ if False:
     #%%
-    base_dir = r'G:\temp\md'
+    base_dir = r'g:\temp'
     options = PostProcessingOptions()
     options.image_base_dir = base_dir
-    options.output_dir = os.path.join(base_dir, 'postprocessing')
-    options.api_output_filename_replacements = {} # {'20190430cameratraps\\':''}
-    options.ground_truth_filename_replacements = {} # {'\\data\\blob\\':''}
+    options.output_dir = os.path.join(base_dir, 'preview')
     options.api_output_file = os.path.join(base_dir, 'results.json')
-    options.ground_truth_json_file = os.path.join(base_dir, 'gt.json')
-    # options.unlabeled_classes = ['human']
+    options.confidence_threshold = {'person':0.5,'animal':0.5,'vehicle':0.01}
+    options.include_almost_detections = True
+    options.almost_detection_confidence_threshold = 0.001
     ppresults = process_batch_results(options)
-    # os.start(ppresults.output_html_file)
+    # from md_utils.path_utils import open_file; open_file(ppresults.output_html_file)
 #%% Command-line driver

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl