PyPI - megadetector - Versions diffs - 5.0.23__py3-none-any.whl → 5.0.24__py3-none-any.whl - Mend

megadetector 5.0.23py3-none-any.whl → 5.0.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (38) hide show

megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
megadetector/classification/merge_classification_detection_output.py +2 -2
megadetector/data_management/coco_to_labelme.py +2 -1
megadetector/data_management/databases/integrity_check_json_db.py +15 -14
megadetector/data_management/databases/subset_json_db.py +49 -21
megadetector/data_management/mewc_to_md.py +340 -0
megadetector/data_management/wi_to_md.py +41 -0
megadetector/data_management/yolo_output_to_md_output.py +15 -8
megadetector/detection/process_video.py +24 -7
megadetector/detection/pytorch_detector.py +841 -160
megadetector/detection/run_detector.py +340 -146
megadetector/detection/run_detector_batch.py +304 -68
megadetector/detection/run_inference_with_yolov5_val.py +61 -4
megadetector/detection/tf_detector.py +6 -1
megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
megadetector/postprocessing/compare_batch_results.py +68 -6
megadetector/postprocessing/md_to_labelme.py +7 -7
megadetector/postprocessing/md_to_wi.py +40 -0
megadetector/postprocessing/merge_detections.py +1 -1
megadetector/postprocessing/postprocess_batch_results.py +10 -3
megadetector/postprocessing/separate_detections_into_folders.py +32 -4
megadetector/postprocessing/validate_batch_results.py +9 -4
megadetector/utils/ct_utils.py +165 -45
megadetector/utils/gpu_test.py +107 -0
megadetector/utils/md_tests.py +355 -108
megadetector/utils/path_utils.py +9 -2
megadetector/utils/wi_utils.py +1794 -0
megadetector/visualization/visualization_utils.py +82 -16
megadetector/visualization/visualize_db.py +25 -7
megadetector/visualization/visualize_detector_output.py +60 -13
{megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/METADATA +10 -24
{megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/RECORD +35 -33
megadetector/detection/detector_training/__init__.py +0 -0
megadetector/detection/detector_training/model_main_tf2.py +0 -114
megadetector/utils/torch_test.py +0 -32
{megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/LICENSE +0 -0
{megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/WHEEL +0 -0
{megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/top_level.txt +0 -0

megadetector/detection/run_inference_with_yolov5_val.py CHANGED Viewed

@@ -57,7 +57,7 @@ from megadetector.utils.ct_utils import is_iterable, split_list_into_fixed_size_
 from megadetector.utils.path_utils import path_is_abs
 from megadetector.data_management import yolo_output_to_md_output
 from megadetector.detection.run_detector import try_download_known_detector
-from megadetector.postprocessing.combine_api_outputs import combine_api_output_files
+from megadetector.postprocessing.combine_batch_outputs import combine_batch_output_files
 default_image_size_with_augmentation = int(1280 * 1.3)
 default_image_size_with_no_augmentation = 1280
@@ -214,6 +214,64 @@ def _clean_up_temporary_folders(options,
         print('Warning: using temporary YOLO results folder {}, but not removing it'.format(
             yolo_results_folder))
+def get_stats_for_category(filename,category='all'):
+    """
+    Retrieve statistics for a category from the YOLO console output
+    stored in [filenam].
+    Args:
+        filename (str): a text file containing console output from a YOLO val run
+        category (optional, str): a category name
+    Returns:
+        dict: a dict with fields n_images, n_labels, P, R, mAP50, and mAP50-95
+    """
+    with open(filename,'r',encoding='utf-8') as f:
+        lines = f.readlines()
+    # This is just a hedge to make sure there isn't some YOLO version floating
+    # around that used different IoU thresholds in the console output.
+    found_map50 = False
+    found_map5095 = False
+    for line in lines:
+        s = line.strip()
+        if ' map50 ' in s.lower() or ' map@.5 ' in s.lower():
+            found_map50 = True
+        if 'map50-95' in s.lower() or 'map@.5:.95' in s.lower():
+            found_map5095 = True
+        if not s.startswith(category):
+            continue
+        tokens = s.split(' ')
+        tokens_filtered = list(filter(None,tokens))
+        if len(tokens_filtered) != 7:
+            continue
+        assert found_map50 and found_map5095, \
+            'Parsing error in YOLO console output file {}'.format(filename)
+        to_return = {}
+        to_return['category'] = category
+        assert category == tokens_filtered[0]
+        to_return['n_images'] = int(tokens_filtered[1])
+        to_return['n_labels'] = int(tokens_filtered[2])
+        to_return['P'] = float(tokens_filtered[3])
+        to_return['R'] = float(tokens_filtered[4])
+        to_return['mAP50'] = float(tokens_filtered[5])
+        to_return['mAP50-95'] = float(tokens_filtered[6])
+        return to_return
+    # ...for each line
+    return None
 #%% Main function
@@ -478,7 +536,7 @@ def run_inference_with_yolo_val(options):
         # ...for each chunk
         # Merge
-        _ = combine_api_output_files(input_files=chunk_output_files,
+        _ = combine_batch_output_files(input_files=chunk_output_files,
                                  output_file=options.output_file,
                                  require_uniqueness=True,
                                  verbose=True)
@@ -644,8 +702,7 @@ def run_inference_with_yolo_val(options):
     assert len(category_ids) == 1 + category_ids[-1]
     yolo_dataset_file = os.path.join(yolo_results_folder,'dataset.yaml')
-    yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
+    yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
     with open(yolo_image_list_file,'w') as f:

megadetector/detection/tf_detector.py CHANGED Viewed

@@ -36,10 +36,15 @@ class TFDetector:
     BATCH_SIZE = 1
-    def __init__(self, model_path):
+    def __init__(self, model_path, detector_options=None):
         """
         Loads a model from [model_path] and starts a tf.Session with this graph. Obtains
         input and output tensor handles.
+        Args:
+            model_path (str): path to .pdb file
+            detector_options (dict, optional): key-value pairs that control detector
+                options; currently not used by TFDetector
         """
         detection_graph = TFDetector.__load_model(model_path)

megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} RENAMED Viewed

@@ -1,8 +1,8 @@
 """
-combine_api_outputs.py
+combine_batch_outputs.py
-Merges two or more .json files in batch API output format, optionally
+Merges two or more .json files in MD output format, optionally
 writing the results to another .json file.
 * Concatenates image lists, erroring if images are not unique.
@@ -15,10 +15,7 @@ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_pro
 Command-line use:
-combine_api_outputs input1.json input2.json ... inputN.json output.json
-Also see combine_api_shard_files() (not exposed via the command line yet) to
-combine the intermediate files created by the API.
+combine_batch_outputs input1.json input2.json ... inputN.json output.json
 This does no checking for redundancy; if you are looking to ensemble
 the results of multiple model versions, see merge_detections.py.
@@ -34,7 +31,7 @@ import json
 #%% Merge functions
-def combine_api_output_files(input_files,
+def combine_batch_output_files(input_files,
                              output_file=None,
                              require_uniqueness=True,
                              verbose=True):
@@ -64,7 +61,7 @@ def combine_api_output_files(input_files,
             input_dicts.append(json.load(f))
     print_if_verbose('Merging results')
-    merged_dict = combine_api_output_dictionaries(
+    merged_dict = combine_batch_output_dictionaries(
         input_dicts, require_uniqueness=require_uniqueness)
     print_if_verbose('Writing output to {}'.format(output_file))
@@ -75,7 +72,7 @@ def combine_api_output_files(input_files,
     return merged_dict
-def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
+def combine_batch_output_dictionaries(input_dicts, require_uniqueness=True):
     """
     Merges the list of MD results dictionaries [input_dicts] into a single dict.
     See module header comment for details on merge rules.
@@ -106,7 +103,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
         for k in input_dict:
             if k not in known_fields:
-                raise ValueError(f'Unrecognized API output field: {k}')
+                print(f'Warning: unrecognized batch output field: {k}')
         # Check compatibility of detection categories
         for cat_id in input_dict['detection_categories']:
@@ -157,7 +154,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
             assert info_compare['detector'] == info['detector'], (
                 'Incompatible detection versions in merging')
             assert info_compare['format_version'] == info['format_version'], (
-                'Incompatible API output versions in merging')
+                'Incompatible batch output versions in merging')
             if 'classifier' in info_compare:
                 if 'classifier' in info:
                     assert info['classifier'] == info_compare['classifier']
@@ -179,7 +176,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
                    'images': sorted_images}
     return merged_dict
-# ...combine_api_output_files()
+# ...combine_batch_output_files()
 def combine_api_shard_files(input_files, output_file=None):
@@ -243,7 +240,7 @@ def main():
         parser.exit()
     args = parser.parse_args()
-    combine_api_output_files(args.input_paths, args.output_path)
+    combine_batch_output_files(args.input_paths, args.output_path)
 if __name__ == '__main__':
     main()

megadetector/postprocessing/compare_batch_results.py CHANGED Viewed

@@ -197,6 +197,10 @@ class BatchComparisonOptions:
         #: to describe images
         self.fn_to_display_fn = None
+        #: Should we run urllib.parse.quote() on paths before using them as links in the
+        #: output page?
+        self.parse_link_paths = True
 # ...class BatchComparisonOptions
@@ -1213,9 +1217,6 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             # ...def _categorize_image_with_image_level_gt(...)
-            # if 'val#human#human#HoSa#2021.006_na#2021#2021.006 (2021)#20210713' in im_a['file']:
-            #    import pdb; pdb.set_trace()
             # im_detection = im_a; category_id_to_threshold = category_id_to_threshold_a
             result_types_present_a = \
                 _categorize_image_with_image_level_gt(im_a,im_gt,annotations_gt,category_id_to_threshold_a)
@@ -1360,12 +1361,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             title = display_path + ' (max conf {:.2f},{:.2f})'.format(max_conf_a,max_conf_b)
+            if options.parse_link_paths:
+                link_target_string = urllib.parse.quote(input_image_absolute_paths[i_fn])
+            else:
+                link_target_string = input_image_absolute_paths[i_fn]
             info = {
                 'filename': fn,
                 'title': title,
                 'textStyle': 'font-family:verdana,arial,calibri;font-size:' + \
                     '80%;text-align:left;margin-top:20;margin-bottom:5',
-                'linkTarget': urllib.parse.quote(input_image_absolute_paths[i_fn]),
+                'linkTarget': link_target_string,
                 'sort_conf':sort_conf
             }
@@ -1575,7 +1581,9 @@ def n_way_comparison(filenames,
     if model_names is not None:
         assert len(model_names) == len(filenames), \
             '[model_names] should be the same length as [filenames]'
+    options.pairwise_options = []
     # Choose all pairwise combinations of the files in [filenames]
     for i, j in itertools.combinations(list(range(0,len(filenames))),2):
@@ -1598,7 +1606,61 @@ def n_way_comparison(filenames,
     return compare_batch_results(options)
-# ...n_way_comparison()
+# ...def n_way_comparison(...)
+def find_equivalent_threshold(results_a,results_b,threshold_a=0.2):
+    """
+    Given two sets of detector results, finds the confidence threshold for results_b
+    that produces the same fraction of *images* with detections as threshold_a does for
+    results_a.  Uses all categories.
+    Args:
+        results_a (str or dict): the first set of results, either a .json filename or a results
+            dict
+        results_b (str or dict): the second set of results, either a .json filename or a results
+            dict
+        threshold_a (float, optional): the threshold used to determine the target number of
+            detections in results_a
+    Returns:
+        float: the threshold that - when applied to results_b - produces the same number
+            of image-level detections that results from applying threshold_a to results_a
+    """
+    if isinstance(results_a,str):
+        with open(results_a,'r') as f:
+            results_a = json.load(f)
+    if isinstance(results_b,str):
+        with open(results_b,'r') as f:
+            results_b = json.load(f)
+    def get_confidence_values_for_results(images):
+        confidence_values = []
+        for im in images:
+            if 'detections' in im and im['detections'] is not None:
+                if len(im['detections']) == 0:
+                    confidence_values.append(0)
+                else:
+                    confidence_values_this_image = [det['conf'] for det in im['detections']]
+                    confidence_values.append(max(confidence_values_this_image))
+        return confidence_values
+    confidence_values_a = get_confidence_values_for_results(results_a['images'])
+    confidence_values_a_above_threshold = [c for c in confidence_values_a if c >= threshold_a]
+    confidence_values_b = get_confidence_values_for_results(results_b['images'])
+    confidence_values_b = sorted(confidence_values_b)
+    target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
+    detection_cutoff_index = round((1.0-target_detection_fraction) * len(confidence_values_b))
+    threshold_b = confidence_values_b[detection_cutoff_index]
+    return threshold_b
+# ...def find_equivalent_threshold(...)
 #%% Interactive driver

megadetector/postprocessing/md_to_labelme.py CHANGED Viewed

@@ -25,8 +25,8 @@ from multiprocessing.pool import ThreadPool
 from functools import partial
 from megadetector.visualization.visualization_utils import open_image
-from megadetector.utils.ct_utils import truncate_float
-from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP
+from megadetector.utils.ct_utils import round_float
+from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP, FAILURE_IMAGE_OPEN
 output_precision = 3
 default_confidence_threshold = 0.15
@@ -92,10 +92,10 @@ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
         # MD boxes are [x_min, y_min, width_of_box, height_of_box] (relative)
         #
         # labelme boxes are [[x0,y0],[x1,y1]] (absolute)
-        x0 = truncate_float(det['bbox'][0] * im['width'],output_precision)
-        y0 = truncate_float(det['bbox'][1] * im['height'],output_precision)
-        x1 = truncate_float(x0 + det['bbox'][2] * im['width'],output_precision)
-        y1 = truncate_float(y0 + det['bbox'][3] * im['height'],output_precision)
+        x0 = round_float(det['bbox'][0] * im['width'],output_precision)
+        y0 = round_float(det['bbox'][1] * im['height'],output_precision)
+        x1 = round_float(x0 + det['bbox'][2] * im['width'],output_precision)
+        y1 = round_float(y0 + det['bbox'][3] * im['height'],output_precision)
         shape['points'] = [[x0,y0],[x1,y1]]
         output_dict['shapes'].append(shape)
@@ -210,7 +210,7 @@ def md_to_labelme(results_file,image_base,confidence_threshold=None,
                     print('Warning: cannot open image {}, treating as a failure during inference'.format(
                         im_full_path))
                     if 'failure' not in im:
-                        im['failure'] = 'Failure image access'
+                        im['failure'] = FAILURE_IMAGE_OPEN
             # ...if we need to read w/h information

megadetector/postprocessing/md_to_wi.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""
+md_to_wi.py
+Converts the MD .json format to the WI predictions.json format.
+"""
+#%% Imports and constants
+import sys
+import argparse
+from megadetector.utils.wi_utils import generate_predictions_json_from_md_results
+#%% Command-line driver
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('md_results_file', action='store', type=str,
+                        help='output file in MD format to convert')
+    parser.add_argument('predictions_json_file', action='store', type=str,
+                        help='.json file to write in predictions.json format')
+    parser.add_argument('--base_folder', action='store', type=str, default=None,
+                        help='folder name to prepend to each path in md_results_file, ' + \
+                             'to convert relative paths to absolute paths.')
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    generate_predictions_json_from_md_results(args.md_results_file,
+                                              args.predictions_json_file,
+                                              base_folder=None)
+if __name__ == '__main__':
+    main()

megadetector/postprocessing/merge_detections.py CHANGED Viewed

@@ -9,7 +9,7 @@ results file from MDv5a.
 Detection categories must be the same in both files; if you want to first remap
 one file's category mapping to be the same as another's, see remap_detection_categories.
-If you want to literally merge two .json files, see combine_api_outputs.py.
+If you want to literally merge two .json files, see combine_batch_outputs.py.
 """

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -211,6 +211,9 @@ class PostProcessingOptions:
         #    format('https://megadetector.readthedocs.io')
         self.footer_text = ''
+        #: Character encoding to use when writing the index HTML html
+        self.output_html_encoding = None
     # ...__init__()
 # ...PostProcessingOptions
@@ -778,7 +781,8 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
             if det['conf'] > max_conf:
                 max_conf = det['conf']
-            if ('classifications' in det) and (len(det['classifications']) > 0):
+            if ('classifications' in det) and (len(det['classifications']) > 0) and \
+                (res != 'non_detections'):
                 # This is a list of [class,confidence] pairs, sorted by confidence
                 classifications = det['classifications']
@@ -1522,7 +1526,8 @@ def process_batch_results(options):
         # Close body and html tags
         index_page += '{}</body></html>'.format(options.footer_text)
         output_html_file = os.path.join(output_dir, 'index.html')
-        with open(output_html_file, 'w') as f:
+        with open(output_html_file, 'w',
+                  encoding=options.output_html_encoding) as f:
             f.write(index_page)
         print('Finished writing html to {}'.format(output_html_file))
@@ -1778,6 +1783,7 @@ def process_batch_results(options):
         index_page += '</div>\n'
         if has_classification_info:
             index_page += '<h3>Images of detected classes</h3>'
             index_page += '<p>The same image might appear under multiple classes ' + \
                 'if multiple species were detected.</p>\n'
@@ -1806,7 +1812,8 @@ def process_batch_results(options):
         index_page += '{}</body></html>'.format(options.footer_text)
         output_html_file = os.path.join(output_dir, 'index.html')
-        with open(output_html_file, 'w') as f:
+        with open(output_html_file, 'w',
+                  encoding=options.output_html_encoding) as f:
             f.write(index_page)
         print('Finished writing html to {}'.format(output_html_file))

megadetector/postprocessing/separate_detections_into_folders.py CHANGED Viewed

@@ -88,6 +88,7 @@ from tqdm import tqdm
 from megadetector.utils.ct_utils import args_to_object, is_float
 from megadetector.detection.run_detector import get_typical_confidence_threshold_from_results
 from megadetector.visualization import visualization_utils as vis_utils
+from megadetector.visualization.visualization_utils import blur_detections
 friendly_folder_names = {'animal':'animals','person':'people','vehicle':'vehicles'}
@@ -188,6 +189,11 @@ class SeparateDetectionsIntoFoldersOptions:
         #: Do not set explicitly; this gets loaded from [results_file]
         self.category_id_to_category_name = None
+        #: List of category names for which we should blur detections, most commonly ['person']
+        #:
+        #: Can also be a comma-separated list.
+        self.category_names_to_blur = None
     # ...__init__()
 # ...class SeparateDetectionsIntoFoldersOptions
@@ -369,10 +375,10 @@ def _process_detections(im,options):
         return
     # At this point, this image is getting copied; we may or may not also need to
-    # draw bounding boxes.
+    # draw bounding boxes or blur pixels.
-    # Do a simple copy operation if we don't need to render any boxes
-    if (not options.render_boxes) or \
+    # Do a simple copy operation if we don't need to manipulate the images (render boxes, blur pixels)
+    if (not options.render_boxes and (options.category_names_to_blur is None)) or \
         (categories_above_threshold is None) or \
         (len(categories_above_threshold) == 0):
@@ -386,6 +392,24 @@ def _process_detections(im,options):
         # Open the source image
         pil_image = vis_utils.load_image(source_path)
+        # Blur regions in the image if necessary
+        category_names_to_blur = options.category_names_to_blur
+        if category_names_to_blur is not None:
+            if isinstance(category_names_to_blur,str):
+                category_names_to_blur = category_names_to_blur.split(',')
+                category_names_to_blur = [s.strip() for s in category_names_to_blur]
+            detections_to_blur = []
+            for d in detections:
+                category_name = options.category_id_to_category_name[d['category']]
+                category_threshold = options.category_name_to_threshold[category_name]
+                if (d['conf'] >= category_threshold) and (category_name in category_names_to_blur):
+                    detections_to_blur.append(d)
+            if len(detections_to_blur) > 0:
+                blur_detections(pil_image,detections_to_blur)
         # Render bounding boxes for each category separately, because
         # we allow different thresholds for each category.
@@ -447,9 +471,11 @@ def separate_detections_into_folders(options):
     # Input validation
     # Currently we don't support moving (instead of copying) when we're also rendering
-    # bounding boxes.
+    # bounding boxes or blurring humans.
     assert not (options.render_boxes and options.move_images), \
         'Cannot specify both render_boxes and move_images'
+    assert not ((options.category_names_to_blur is not None) and options.move_images), \
+        'Cannot specify both category_names_to_blur and move_images'
     # Create output folder if necessary
     if (os.path.isdir(options.base_output_folder)) and \
@@ -687,6 +713,8 @@ def main():
                         help='Box expansion (in pixels) for rendering, only meaningful if ' + \
                              'using render_boxes (defaults to {})'.format(
                              default_box_expansion))
+    parser.add_argument('--category_names_to_blur', type=str, default=None,
+                        help='Comma-separated list of category names to blur (or a single category name, e.g. "person")')
     if len(sys.argv[1:])==0:
         parser.print_help()

megadetector/postprocessing/validate_batch_results.py CHANGED Viewed

@@ -50,6 +50,9 @@ class ValidateBatchResultsOptions:
         #: Enable additional debug output
         self.verbose = False
+        #: Should we raise errors immediately (vs. just catching and reporting)?
+        self.raise_errors = False
 # ...class ValidateBatchResultsOptions
@@ -71,8 +74,7 @@ def validate_batch_results(json_filename,options=None):
         the loaded data.  The "validation_results" dict contains fields called "errors", "warnings",
         and "filename".  "errors" and "warnings" are lists of strings, although "errors" will never
         be longer than N=1, since validation fails at the first error.
     """
     if options is None:
@@ -223,8 +225,11 @@ def validate_batch_results(json_filename,options=None):
                     'Warning: non-standard key {} present at file level'.format(k))
     except Exception as e:
-        validation_results['errors'].append(str(e))
+        if options.raise_errors:
+            raise
+        else:
+            validation_results['errors'].append(str(e))
     # ...try/except

megadetector 5.0.23__py3-none-any.whl → 5.0.24__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.23py3-none-any.whl → 5.0.24py3-none-any.whl