PyPI - megadetector - Versions diffs - 5.0.23__py3-none-any.whl → 5.0.25__py3-none-any.whl - Mend

megadetector 5.0.23py3-none-any.whl → 5.0.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (42) hide show

megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
megadetector/classification/merge_classification_detection_output.py +2 -2
megadetector/data_management/coco_to_labelme.py +2 -1
megadetector/data_management/databases/integrity_check_json_db.py +15 -14
megadetector/data_management/databases/subset_json_db.py +49 -21
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
megadetector/data_management/mewc_to_md.py +340 -0
megadetector/data_management/speciesnet_to_md.py +41 -0
megadetector/data_management/yolo_output_to_md_output.py +15 -8
megadetector/detection/process_video.py +24 -7
megadetector/detection/pytorch_detector.py +841 -160
megadetector/detection/run_detector.py +341 -146
megadetector/detection/run_detector_batch.py +307 -70
megadetector/detection/run_inference_with_yolov5_val.py +61 -4
megadetector/detection/tf_detector.py +6 -1
megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
megadetector/postprocessing/compare_batch_results.py +236 -7
megadetector/postprocessing/create_crop_folder.py +358 -0
megadetector/postprocessing/md_to_labelme.py +7 -7
megadetector/postprocessing/md_to_wi.py +40 -0
megadetector/postprocessing/merge_detections.py +1 -1
megadetector/postprocessing/postprocess_batch_results.py +12 -5
megadetector/postprocessing/separate_detections_into_folders.py +32 -4
megadetector/postprocessing/validate_batch_results.py +9 -4
megadetector/utils/ct_utils.py +236 -45
megadetector/utils/directory_listing.py +3 -3
megadetector/utils/gpu_test.py +125 -0
megadetector/utils/md_tests.py +455 -116
megadetector/utils/path_utils.py +43 -2
megadetector/utils/wi_utils.py +2691 -0
megadetector/visualization/visualization_utils.py +95 -18
megadetector/visualization/visualize_db.py +25 -7
megadetector/visualization/visualize_detector_output.py +60 -13
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/METADATA +11 -23
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/RECORD +39 -36
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/WHEEL +1 -1
megadetector/detection/detector_training/__init__.py +0 -0
megadetector/detection/detector_training/model_main_tf2.py +0 -114
megadetector/utils/torch_test.py +0 -32
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/LICENSE +0 -0
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/top_level.txt +0 -0

megadetector/detection/run_inference_with_yolov5_val.py CHANGED Viewed

@@ -57,7 +57,7 @@ from megadetector.utils.ct_utils import is_iterable, split_list_into_fixed_size_
 from megadetector.utils.path_utils import path_is_abs
 from megadetector.data_management import yolo_output_to_md_output
 from megadetector.detection.run_detector import try_download_known_detector
-from megadetector.postprocessing.combine_api_outputs import combine_api_output_files
+from megadetector.postprocessing.combine_batch_outputs import combine_batch_output_files
 default_image_size_with_augmentation = int(1280 * 1.3)
 default_image_size_with_no_augmentation = 1280
@@ -214,6 +214,64 @@ def _clean_up_temporary_folders(options,
         print('Warning: using temporary YOLO results folder {}, but not removing it'.format(
             yolo_results_folder))
+def get_stats_for_category(filename,category='all'):
+    """
+    Retrieve statistics for a category from the YOLO console output
+    stored in [filenam].
+    Args:
+        filename (str): a text file containing console output from a YOLO val run
+        category (optional, str): a category name
+    Returns:
+        dict: a dict with fields n_images, n_labels, P, R, mAP50, and mAP50-95
+    """
+    with open(filename,'r',encoding='utf-8') as f:
+        lines = f.readlines()
+    # This is just a hedge to make sure there isn't some YOLO version floating
+    # around that used different IoU thresholds in the console output.
+    found_map50 = False
+    found_map5095 = False
+    for line in lines:
+        s = line.strip()
+        if ' map50 ' in s.lower() or ' map@.5 ' in s.lower():
+            found_map50 = True
+        if 'map50-95' in s.lower() or 'map@.5:.95' in s.lower():
+            found_map5095 = True
+        if not s.startswith(category):
+            continue
+        tokens = s.split(' ')
+        tokens_filtered = list(filter(None,tokens))
+        if len(tokens_filtered) != 7:
+            continue
+        assert found_map50 and found_map5095, \
+            'Parsing error in YOLO console output file {}'.format(filename)
+        to_return = {}
+        to_return['category'] = category
+        assert category == tokens_filtered[0]
+        to_return['n_images'] = int(tokens_filtered[1])
+        to_return['n_labels'] = int(tokens_filtered[2])
+        to_return['P'] = float(tokens_filtered[3])
+        to_return['R'] = float(tokens_filtered[4])
+        to_return['mAP50'] = float(tokens_filtered[5])
+        to_return['mAP50-95'] = float(tokens_filtered[6])
+        return to_return
+    # ...for each line
+    return None
 #%% Main function
@@ -478,7 +536,7 @@ def run_inference_with_yolo_val(options):
         # ...for each chunk
         # Merge
-        _ = combine_api_output_files(input_files=chunk_output_files,
+        _ = combine_batch_output_files(input_files=chunk_output_files,
                                  output_file=options.output_file,
                                  require_uniqueness=True,
                                  verbose=True)
@@ -644,8 +702,7 @@ def run_inference_with_yolo_val(options):
     assert len(category_ids) == 1 + category_ids[-1]
     yolo_dataset_file = os.path.join(yolo_results_folder,'dataset.yaml')
-    yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
+    yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
     with open(yolo_image_list_file,'w') as f:

megadetector/detection/tf_detector.py CHANGED Viewed

@@ -36,10 +36,15 @@ class TFDetector:
     BATCH_SIZE = 1
-    def __init__(self, model_path):
+    def __init__(self, model_path, detector_options=None):
         """
         Loads a model from [model_path] and starts a tf.Session with this graph. Obtains
         input and output tensor handles.
+        Args:
+            model_path (str): path to .pdb file
+            detector_options (dict, optional): key-value pairs that control detector
+                options; currently not used by TFDetector
         """
         detection_graph = TFDetector.__load_model(model_path)

megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} RENAMED Viewed

@@ -1,8 +1,8 @@
 """
-combine_api_outputs.py
+combine_batch_outputs.py
-Merges two or more .json files in batch API output format, optionally
+Merges two or more .json files in MD output format, optionally
 writing the results to another .json file.
 * Concatenates image lists, erroring if images are not unique.
@@ -15,10 +15,7 @@ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_pro
 Command-line use:
-combine_api_outputs input1.json input2.json ... inputN.json output.json
-Also see combine_api_shard_files() (not exposed via the command line yet) to
-combine the intermediate files created by the API.
+combine_batch_outputs input1.json input2.json ... inputN.json output.json
 This does no checking for redundancy; if you are looking to ensemble
 the results of multiple model versions, see merge_detections.py.
@@ -34,7 +31,7 @@ import json
 #%% Merge functions
-def combine_api_output_files(input_files,
+def combine_batch_output_files(input_files,
                              output_file=None,
                              require_uniqueness=True,
                              verbose=True):
@@ -64,7 +61,7 @@ def combine_api_output_files(input_files,
             input_dicts.append(json.load(f))
     print_if_verbose('Merging results')
-    merged_dict = combine_api_output_dictionaries(
+    merged_dict = combine_batch_output_dictionaries(
         input_dicts, require_uniqueness=require_uniqueness)
     print_if_verbose('Writing output to {}'.format(output_file))
@@ -75,7 +72,7 @@ def combine_api_output_files(input_files,
     return merged_dict
-def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
+def combine_batch_output_dictionaries(input_dicts, require_uniqueness=True):
     """
     Merges the list of MD results dictionaries [input_dicts] into a single dict.
     See module header comment for details on merge rules.
@@ -106,7 +103,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
         for k in input_dict:
             if k not in known_fields:
-                raise ValueError(f'Unrecognized API output field: {k}')
+                print(f'Warning: unrecognized batch output field: {k}')
         # Check compatibility of detection categories
         for cat_id in input_dict['detection_categories']:
@@ -157,7 +154,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
             assert info_compare['detector'] == info['detector'], (
                 'Incompatible detection versions in merging')
             assert info_compare['format_version'] == info['format_version'], (
-                'Incompatible API output versions in merging')
+                'Incompatible batch output versions in merging')
             if 'classifier' in info_compare:
                 if 'classifier' in info:
                     assert info['classifier'] == info_compare['classifier']
@@ -179,7 +176,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
                    'images': sorted_images}
     return merged_dict
-# ...combine_api_output_files()
+# ...combine_batch_output_files()
 def combine_api_shard_files(input_files, output_file=None):
@@ -243,7 +240,7 @@ def main():
         parser.exit()
     args = parser.parse_args()
-    combine_api_output_files(args.input_paths, args.output_path)
+    combine_batch_output_files(args.input_paths, args.output_path)
 if __name__ == '__main__':
     main()

megadetector/postprocessing/compare_batch_results.py CHANGED Viewed

@@ -138,6 +138,9 @@ class BatchComparisonOptions:
         #: List of filenames to include in the comparison, or None to use all files
         self.filenames_to_include = None
+        #: List of category names to include in the comparison, or None to use all categories
+        self.category_names_to_include = None
         #: Compare only detections/non-detections, ignore categories (still renders categories)
         self.class_agnostic_comparison = False
@@ -197,6 +200,10 @@ class BatchComparisonOptions:
         #: to describe images
         self.fn_to_display_fn = None
+        #: Should we run urllib.parse.quote() on paths before using them as links in the
+        #: output page?
+        self.parse_link_paths = True
 # ...class BatchComparisonOptions
@@ -982,7 +989,32 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             if invalid_category_error:
                 continue
+            # Should we be restricting the comparison to only certain categories?
+            if options.category_names_to_include is not None:
+                # Just in case the user provided a single category instead of a list
+                if isinstance(options.category_names_to_include,str):
+                    options.category_names_to_include = [options.category_names_to_include]
+                category_name_to_id_a = invert_dictionary(detection_categories_a)
+                category_name_to_id_b = invert_dictionary(detection_categories_b)
+                category_ids_to_include_a = []
+                category_ids_to_include_b = []
+                for category_name in options.category_names_to_include:
+                    if category_name in category_name_to_id_a:
+                        category_ids_to_include_a.append(category_name_to_id_a[category_name])
+                    if category_name in category_name_to_id_b:
+                        category_ids_to_include_b.append(category_name_to_id_b[category_name])
+                # Restrict the categories we treat as above-threshold to the set we're supposed
+                # to be using
+                categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
+                                                category_id in category_ids_to_include_a]
+                categories_above_threshold_b = [category_id for category_id in categories_above_threshold_b if \
+                                                category_id in category_ids_to_include_b]
             detection_a = (len(categories_above_threshold_a) > 0)
             detection_b = (len(categories_above_threshold_b) > 0)
@@ -1213,9 +1245,6 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             # ...def _categorize_image_with_image_level_gt(...)
-            # if 'val#human#human#HoSa#2021.006_na#2021#2021.006 (2021)#20210713' in im_a['file']:
-            #    import pdb; pdb.set_trace()
             # im_detection = im_a; category_id_to_threshold = category_id_to_threshold_a
             result_types_present_a = \
                 _categorize_image_with_image_level_gt(im_a,im_gt,annotations_gt,category_id_to_threshold_a)
@@ -1360,12 +1389,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             title = display_path + ' (max conf {:.2f},{:.2f})'.format(max_conf_a,max_conf_b)
+            if options.parse_link_paths:
+                link_target_string = urllib.parse.quote(input_image_absolute_paths[i_fn])
+            else:
+                link_target_string = input_image_absolute_paths[i_fn]
             info = {
                 'filename': fn,
                 'title': title,
                 'textStyle': 'font-family:verdana,arial,calibri;font-size:' + \
                     '80%;text-align:left;margin-top:20;margin-bottom:5',
-                'linkTarget': urllib.parse.quote(input_image_absolute_paths[i_fn]),
+                'linkTarget': link_target_string,
                 'sort_conf':sort_conf
             }
@@ -1575,7 +1609,9 @@ def n_way_comparison(filenames,
     if model_names is not None:
         assert len(model_names) == len(filenames), \
             '[model_names] should be the same length as [filenames]'
+    options.pairwise_options = []
     # Choose all pairwise combinations of the files in [filenames]
     for i, j in itertools.combinations(list(range(0,len(filenames))),2):
@@ -1598,7 +1634,200 @@ def n_way_comparison(filenames,
     return compare_batch_results(options)
-# ...n_way_comparison()
+# ...def n_way_comparison(...)
+def find_image_level_detections_above_threshold(results,threshold=0.2,category_names=None):
+    """
+    Returns images in the set of MD results [results] with detections above
+    a threshold confidence level, optionally only counting certain categories.
+    Args:
+        results (str or dict): the set of results, either a .json filename or a results
+            dict
+        threshold (float, optional): the threshold used to determine the target number of
+            detections in [results]
+        category_names (list or str, optional): the list of category names to consider (defaults
+            to using all categories), or the name of a single category.
+    Returns:
+        list: the images with above-threshold detections
+    """
+    if isinstance(results,str):
+        with open(results,'r') as f:
+            results = json.load(f)
+    category_ids_to_consider = None
+    if category_names is not None:
+        if isinstance(category_names,str):
+            category_names = [category_names]
+        category_id_to_name = results['detection_categories']
+        category_name_to_id = invert_dictionary(category_id_to_name)
+        category_ids_to_consider = []
+        # category_name = category_names[0]
+        for category_name in category_names:
+            category_id = category_name_to_id[category_name]
+            category_ids_to_consider.append(category_id)
+        assert len(category_ids_to_consider) > 0, \
+            'Category name list did not map to any category IDs'
+    images_above_threshold = []
+    for im in results['images']:
+        if ('detections' in im) and (im['detections'] is not None) and (len(im['detections']) > 0):
+            confidence_values_this_image = [0]
+            for det in im['detections']:
+                if category_ids_to_consider is not None:
+                    if det['category'] not in category_ids_to_consider:
+                        continue
+                confidence_values_this_image.append(det['conf'])
+            if max(confidence_values_this_image) >= threshold:
+                images_above_threshold.append(im)
+    # ...for each image
+    return images_above_threshold
+# ...def find_image_level_detections_above_threshold(...)
+def find_equivalent_threshold(results_a,
+                              results_b,
+                              threshold_a=0.2,
+                              category_names=None,
+                              verbose=False):
+    """
+    Given two sets of detector results, finds the confidence threshold for results_b
+    that produces the same fraction of *images* with detections as threshold_a does for
+    results_a.  Uses all categories.
+    Args:
+        results_a (str or dict): the first set of results, either a .json filename or a results
+            dict
+        results_b (str or dict): the second set of results, either a .json filename or a results
+            dict
+        threshold_a (float, optional): the threshold used to determine the target number of
+            detections in results_a
+        category_names (list or str, optional): the list of category names to consider (defaults
+            to using all categories), or the name of a single category.
+        verbose (bool, optional): enable additional debug output
+    Returns:
+        float: the threshold that - when applied to results_b - produces the same number
+            of image-level detections that results from applying threshold_a to results_a
+    """
+    if isinstance(results_a,str):
+        if verbose:
+            print('Loading results from {}'.format(results_a))
+        with open(results_a,'r') as f:
+            results_a = json.load(f)
+    if isinstance(results_b,str):
+        if verbose:
+            print('Loading results from {}'.format(results_b))
+        with open(results_b,'r') as f:
+            results_b = json.load(f)
+    category_ids_to_consider_a = None
+    category_ids_to_consider_b = None
+    if category_names is not None:
+        if isinstance(category_names,str):
+            category_names = [category_names]
+        categories_a = results_a['detection_categories']
+        categories_b = results_b['detection_categories']
+        category_name_to_id_a = invert_dictionary(categories_a)
+        category_name_to_id_b = invert_dictionary(categories_b)
+        category_ids_to_consider_a = []
+        category_ids_to_consider_b = []
+        # category_name = category_names[0]
+        for category_name in category_names:
+            category_id_a = category_name_to_id_a[category_name]
+            category_id_b = category_name_to_id_b[category_name]
+            category_ids_to_consider_a.append(category_id_a)
+            category_ids_to_consider_b.append(category_id_b)
+        assert len(category_ids_to_consider_a) > 0 and len(category_ids_to_consider_b) > 0, \
+            'Category name list did not map to any category IDs in one or both detection sets'
+    def _get_confidence_values_for_results(images,category_ids_to_consider,threshold):
+        """
+        Return a list of the maximum confidence value for each image in [images].
+        Returns zero confidence for images with no detections (or no detections
+        in the specified categories).  Does not return anything for invalid images.
+        """
+        confidence_values = []
+        images_above_threshold = []
+        for im in images:
+            if 'detections' in im and im['detections'] is not None:
+                if len(im['detections']) == 0:
+                    confidence_values.append(0)
+                else:
+                    confidence_values_this_image = []
+                    for det in im['detections']:
+                        if category_ids_to_consider is not None:
+                            if det['category'] not in category_ids_to_consider:
+                                continue
+                        confidence_values_this_image.append(det['conf'])
+                    if len(confidence_values_this_image) == 0:
+                        confidence_values.append(0)
+                    else:
+                        max_conf_value = max(confidence_values_this_image)
+                        if threshold is not None and max_conf_value >= threshold:
+                            images_above_threshold.append(im)
+                        confidence_values.append(max_conf_value)
+        # ...for each image
+        return confidence_values, images_above_threshold
+    confidence_values_a,images_above_threshold_a = \
+        _get_confidence_values_for_results(results_a['images'],
+                                          category_ids_to_consider_a,
+                                          threshold_a)
+    # ...def _get_confidence_values_for_results(...)
+    if verbose:
+        print('For result set A, considering {} of {} images'.format(
+            len(confidence_values_a),len(results_a['images'])))
+    confidence_values_a_above_threshold = [c for c in confidence_values_a if c >= threshold_a]
+    confidence_values_b,_ = _get_confidence_values_for_results(results_b['images'],
+                                                              category_ids_to_consider_b,
+                                                              threshold=None)
+    if verbose:
+        print('For result set B, considering {} of {} images'.format(
+            len(confidence_values_b),len(results_b['images'])))
+    confidence_values_b = sorted(confidence_values_b)
+    target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
+    detection_cutoff_index = round((1.0-target_detection_fraction) * len(confidence_values_b))
+    threshold_b = confidence_values_b[detection_cutoff_index]
+    if verbose:
+        print('{} confidence values above threshold (A)'.format(len(confidence_values_a_above_threshold)))
+        confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
+        print('{} confidence values above threshold (B)'.format(len(confidence_values_b_above_threshold)))
+    return threshold_b
+# ...def find_equivalent_threshold(...)
 #%% Interactive driver

megadetector 5.0.23__py3-none-any.whl → 5.0.25__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.23py3-none-any.whl → 5.0.25py3-none-any.whl