PyPI - megadetector - Versions diffs - 5.0.22__py3-none-any.whl → 5.0.24__py3-none-any.whl - Mend

megadetector 5.0.22py3-none-any.whl → 5.0.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (38) hide show

megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
megadetector/classification/merge_classification_detection_output.py +2 -2
megadetector/data_management/coco_to_labelme.py +2 -1
megadetector/data_management/databases/integrity_check_json_db.py +15 -14
megadetector/data_management/databases/subset_json_db.py +49 -21
megadetector/data_management/mewc_to_md.py +340 -0
megadetector/data_management/wi_to_md.py +41 -0
megadetector/data_management/yolo_output_to_md_output.py +15 -8
megadetector/detection/process_video.py +24 -7
megadetector/detection/pytorch_detector.py +841 -160
megadetector/detection/run_detector.py +340 -146
megadetector/detection/run_detector_batch.py +306 -70
megadetector/detection/run_inference_with_yolov5_val.py +61 -4
megadetector/detection/tf_detector.py +6 -1
megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
megadetector/postprocessing/compare_batch_results.py +68 -6
megadetector/postprocessing/md_to_labelme.py +7 -7
megadetector/postprocessing/md_to_wi.py +40 -0
megadetector/postprocessing/merge_detections.py +1 -1
megadetector/postprocessing/postprocess_batch_results.py +10 -3
megadetector/postprocessing/separate_detections_into_folders.py +32 -4
megadetector/postprocessing/validate_batch_results.py +9 -4
megadetector/utils/ct_utils.py +172 -57
megadetector/utils/gpu_test.py +107 -0
megadetector/utils/md_tests.py +363 -108
megadetector/utils/path_utils.py +9 -2
megadetector/utils/wi_utils.py +1794 -0
megadetector/visualization/visualization_utils.py +82 -16
megadetector/visualization/visualize_db.py +25 -7
megadetector/visualization/visualize_detector_output.py +60 -13
{megadetector-5.0.22.dist-info → megadetector-5.0.24.dist-info}/LICENSE +0 -0
{megadetector-5.0.22.dist-info → megadetector-5.0.24.dist-info}/METADATA +129 -143
{megadetector-5.0.22.dist-info → megadetector-5.0.24.dist-info}/RECORD +35 -33
{megadetector-5.0.22.dist-info → megadetector-5.0.24.dist-info}/top_level.txt +0 -0
megadetector/detection/detector_training/__init__.py +0 -0
megadetector/detection/detector_training/model_main_tf2.py +0 -114
megadetector/utils/torch_test.py +0 -32
{megadetector-5.0.22.dist-info → megadetector-5.0.24.dist-info}/WHEEL +0 -0

megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py CHANGED Viewed

@@ -14,9 +14,8 @@ import redis
 import argparse
 import PIL
-from io import BytesIO
-from detection.run_detector import load_detector, convert_to_tf_coords
+from detection.run_detector import load_detector
+from utils.ct_utils import convert_xywh_to_xyxy as convert_to_tf_coords
 import config
 import visualization.visualization_utils as vis_utils

megadetector/classification/merge_classification_detection_output.py CHANGED Viewed

@@ -70,7 +70,7 @@ from typing import Any
 import pandas as pd
 from tqdm import tqdm
-from megadetector.utils.ct_utils import truncate_float
+from megadetector.utils.ct_utils import round_float
 #%% Example usage
@@ -124,7 +124,7 @@ def row_to_classification_list(row: Mapping[str, Any],
         # filter out confidences below the threshold, and set precision to 4
         result = [
-            (k, truncate_float(conf, precision=4))
+            (k, round_float(conf, precision=4))
             for k, conf in result if conf >= threshold
         ]

megadetector/data_management/coco_to_labelme.py CHANGED Viewed

@@ -18,6 +18,7 @@ from tqdm import tqdm
 from collections import defaultdict
 from megadetector.visualization.visualization_utils import open_image
+from megadetector.detection.run_detector import FAILURE_IMAGE_OPEN
 #%% Functions
@@ -145,7 +146,7 @@ def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check
                 except Exception:
                     print('Warning: cannot open image {}'.format(im_full_path))
                     if 'failure' not in im:
-                        im['failure'] = 'Failure image access'
+                        im['failure'] = FAILURE_IMAGE_OPEN
             # ...if we need to read w/h information

megadetector/data_management/databases/integrity_check_json_db.py CHANGED Viewed

@@ -86,7 +86,7 @@ def _check_image_existence_and_size(image,options=None):
         options (IntegrityCheckOptions): parameters impacting validation
     Returns:
-        bool: whether this image passes validation
+        str: None if this image passes validation, otherwise an error string
     """
     if options is None:
@@ -96,23 +96,23 @@ def _check_image_existence_and_size(image,options=None):
     filePath = os.path.join(options.baseDir,image['file_name'])
     if not os.path.isfile(filePath):
-        # print('Image path {} does not exist'.format(filePath))
-        return False
+        s = 'Image path {} does not exist'.format(filePath)
+        return s
     if options.bCheckImageSizes:
         if not ('height' in image and 'width' in image):
-            print('Missing image size in {}'.format(filePath))
-            return False
+            s = 'Missing image size in {}'.format(filePath)
+            return s
         # width, height = Image.open(filePath).size
         pil_im = open_image(filePath)
         width,height = pil_im.size
         if (not (width == image['width'] and height == image['height'])):
-            print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
-                    image['id'], filePath, image['width'], image['height'], width, height))
-            return False
+            s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
+                    image['id'], filePath, image['width'], image['height'], width, height)
+            return s
-    return True
+    return None
 def integrity_check_json_db(jsonFile, options=None):
@@ -287,6 +287,7 @@ def integrity_check_json_db(jsonFile, options=None):
             if fn_relative not in image_paths_in_json:
                 unused_files.append(fn_relative)
+    # List of (filename,error_string) tuples
     validation_errors = []
     # If we're checking image existence but not image size, we don't need to read the images
@@ -298,8 +299,8 @@ def integrity_check_json_db(jsonFile, options=None):
         image_paths_relative_set = set(image_paths_relative)
         for im in images:
-            if im['file_name'] not in image_paths_relative_set:
-                validation_errors.append(im['file_name'])
+            if im['file_name'] not in image_paths_relative_set:
+                validation_errors.append((im['file_name'],'not found in relative path list'))
     # If we're checking image size, we need to read the images
     if options.bCheckImageSizes:
@@ -321,12 +322,12 @@ def integrity_check_json_db(jsonFile, options=None):
             results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
         else:
             results = []
-            for im in tqdm(images):
+            for im in tqdm(images):
                 results.append(_check_image_existence_and_size(im,options))
         for i_image,result in enumerate(results):
-            if result is not None:
-                validation_errors.append(images[i_image]['file_name'])
+            if result is not None:
+                validation_errors.append(images[i_image]['file_name'],result)
     # ...for each image

megadetector/data_management/databases/subset_json_db.py CHANGED Viewed

@@ -12,16 +12,18 @@ subset_json_detector_output.py.
 #%% Constants and imports
+import os
 import sys
 import json
 import argparse
 from tqdm import tqdm
+from copy import copy
 #%% Functions
-def subset_json_db(input_json, query, output_json=None, ignore_case=False):
+def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbose=False):
     """
     Given a json file (or dictionary already loaded from a json file), produce a new
     database containing only the images whose filenames contain the string 'query',
@@ -29,54 +31,80 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
     Args:
         input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
-        query (str): string to query for, only include images in the output whose filenames
-            contain this string.
+        query (str or list): string to query for, only include images in the output whose filenames
+            contain this string.  If this is a list, test for exact matches.
         output_json (str, optional): file to write the resulting .json file to
         ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
+        verbose (bool, optional): enable additional debug output
     Returns:
-        dict: possibly-modified CCT dictionary
+        dict: CCT dictionary containing a subset of the images and annotations in the input dict
     """
-    if ignore_case:
-        query = query.lower()
     # Load the input file if necessary
     if isinstance(input_json,str):
         print('Loading input .json...')
         with open(input_json, 'r') as f:
-            data = json.load(f)
+            input_data = json.load(f)
     else:
-        data = input_json
+        input_data = input_json
     # Find images matching the query
     images = []
-    image_ids = set()
-    for im in tqdm(data['images']):
-        fn = im['file_name']
+    if isinstance(query,str):
+        if ignore_case:
+            query = query.lower()
+        for im in tqdm(input_data['images']):
+            fn = im['file_name']
+            if ignore_case:
+                fn = fn.lower()
+            if query in fn:
+                images.append(im)
+    else:
+        query = set(query)
         if ignore_case:
-            fn = fn.lower()
-        if query in fn:
-            images.append(im)
-            image_ids.add(im['id'])
+            query = set([s.lower() for s in query])
+        for im in input_data['images']:
+            fn = im['file_name']
+            if ignore_case:
+                fn = fn.lower()
+            if fn in query:
+                images.append(im)
+    image_ids = set([im['id'] for im in images])
     # Find annotations referring to those images
     annotations = []
-    for ann in tqdm(data['annotations']):
+    for ann in input_data['annotations']:
         if ann['image_id'] in image_ids:
             annotations.append(ann)
-    output_data = data
+    output_data = copy(input_data)
     output_data['images'] = images
     output_data['annotations'] = annotations
     # Write the output file if requested
     if output_json is not None:
-        print('Writing output .json...')
-        json.dump(output_data,open(output_json,'w'),indent=1)
+        if verbose:
+            print('Writing output .json to {}'.format(output_json))
+        output_dir = os.path.dirname(output_json)
+        os.makedirs(output_dir,exist_ok=True)
+        with open(output_json,'w') as f:
+            json.dump(output_data,f,indent=1)
+    if verbose:
+        print('Keeping {} of {} images, {} of {} annotations'.format(
+            len(output_data['images']),len(input_data['images']),
+            len(output_data['annotations']),len(input_data['annotations'])))
     return output_data

megadetector/data_management/mewc_to_md.py ADDED Viewed

@@ -0,0 +1,340 @@
+"""
+mewc_to_md.py
+Converts the output of the MEWC inference scripts to the MD output format.
+"""
+#%% Imports and constants
+import os
+import json
+import pandas as pd
+from copy import deepcopy
+from collections import defaultdict
+from megadetector.utils.ct_utils import sort_list_of_dicts_by_key, invert_dictionary # noqa
+from megadetector.utils.path_utils import recursive_file_list
+from megadetector.postprocessing.validate_batch_results import \
+    ValidateBatchResultsOptions, validate_batch_results
+default_mewc_mount_prefix = '/images/'
+default_mewc_category_name_column = 'class_id'
+#%% Functions
+def mewc_to_md(mewc_input_folder,
+               output_file=None,
+               mount_prefix=default_mewc_mount_prefix,
+               category_name_column=default_mewc_category_name_column,
+               mewc_out_filename='mewc_out.csv',
+               md_out_filename='md_out.json'):
+    """
+    Args:
+        mewc_input_folder (str): the folder we'll search for MEWC output files
+        output_file (str, optional): .json file to write with class information
+        mount_prefix (str, optional): string to remove from all filenames in the MD
+            .json file, typically the prefix used to mount the image folder.
+        category_name_column (str, optional): column in the MEWC results .csv to use for
+            category naming.
+    Returns:
+        dict: an MD-formatted dict, the same as what's written to [output_file]
+    """
+    ##%% Read input files
+    assert os.path.isdir(mewc_input_folder), \
+        'Could not find folder {}'.format(mewc_input_folder)
+    ##%% Find MEWC output files
+    relative_path_to_mewc_info = {}
+    print('Listing files in folder {}'.format(mewc_input_folder))
+    all_files_relative = set(recursive_file_list(mewc_input_folder,return_relative_paths=True))
+    for fn_relative in all_files_relative:
+        if fn_relative.endswith(mewc_out_filename):
+            folder_relative = '/'.join(fn_relative.split('/')[:-1])
+            assert folder_relative not in relative_path_to_mewc_info
+            md_output_file_relative = os.path.join(folder_relative,md_out_filename).replace('\\','/')
+            assert md_output_file_relative in all_files_relative, \
+                'Could not find MD output file {} to match to {}'.format(
+                    md_output_file_relative,fn_relative)
+            relative_path_to_mewc_info[folder_relative] = \
+                {'mewc_predict_file':fn_relative,'md_file':md_output_file_relative}
+    del folder_relative
+    print('Found {} MEWC results files'.format(len(relative_path_to_mewc_info)))
+    ##%% Prepare to loop over results files
+    md_results_all = {}
+    md_results_all['images'] = []
+    md_results_all['detection_categories'] = {}
+    md_results_all['classification_categories'] = {}
+    md_results_all['info'] = None
+    classification_category_name_to_id = {}
+    ##%% Loop over results files
+    # relative_folder = next(iter(relative_path_to_mewc_info.keys()))
+    for relative_folder in relative_path_to_mewc_info:
+        ##%%
+        mewc_info = relative_path_to_mewc_info[relative_folder]
+        mewc_csv_fn_abs = os.path.join(mewc_input_folder,mewc_info['mewc_predict_file'])
+        mewc_md_fn_abs = os.path.join(mewc_input_folder,mewc_info['md_file'])
+        mewc_classification_info = pd.read_csv(mewc_csv_fn_abs)
+        mewc_classification_info = mewc_classification_info.to_dict('records')
+        assert os.path.isfile(mewc_md_fn_abs), \
+            'Could not find file {}'.format(mewc_md_fn_abs)
+        with open(mewc_md_fn_abs,'r') as f:
+            md_results = json.load(f)
+        ##%% Remove the mount prefix from MD files if necessary
+        if mount_prefix is not None and len(mount_prefix) > 0:
+            n_files_without_mount_prefix = 0
+            # im = md_results['images'][0]
+            for im in md_results['images']:
+                if not im['file'].startswith(mount_prefix):
+                    n_files_without_mount_prefix += 1
+                else:
+                    im['file'] = im['file'].replace(mount_prefix,'',1)
+            if n_files_without_mount_prefix > 0:
+                print('Warning {} of {} files in the MD results did not include the mount prefix {}'.format(
+                    n_files_without_mount_prefix,len(md_results['images']),mount_prefix))
+        ##%% Convert MEWC snip IDs to image files
+        # r = mewc_classification_info[0]
+        for r in mewc_classification_info:
+            # E.g. "IMG0-0.jpg"
+            snip_file = r['filename']
+            # E.g. "IMG0-0"
+            snip_file_no_ext = os.path.splitext(snip_file)[0]
+            ext = os.path.splitext(snip_file)[1] # noqa
+            tokens = snip_file_no_ext.split('-')
+            if len(tokens) == 1:
+                print('Warning: in folder {}, detection ID not found in snip filename {}, skipping'.format(
+                relative_folder,snip_file_no_ext))
+                r['image_filename_without_extension'] = snip_file_no_ext
+                r['snip_id'] = None
+                continue
+            filename_without_snip_id = '-'.join(tokens[0:-1])
+            snip_id = int(tokens[-1])
+            image_filename_without_extension = filename_without_snip_id
+            r['image_filename_without_extension'] = image_filename_without_extension
+            r['snip_id'] = snip_id
+        # ...for each MEWC result record
+        ##%% Make sure MD results and MEWC results refer to the same files
+        images_in_md_results_no_extension = \
+            set([os.path.splitext(im['file'])[0] for im in md_results['images']])
+        images_in_mewc_results_no_extension = set(r['image_filename_without_extension'] \
+                                                  for r in mewc_classification_info)
+        # All files with classification results should also have detection results
+        for fn in images_in_mewc_results_no_extension:
+            assert fn in images_in_md_results_no_extension, \
+                'Error: file {} is present in mewc-predict results, but not in MD results'.format(fn)
+        # This is just a note to self: no classification results are present for empty images
+        if False:
+            for fn in images_in_md_results_no_extension:
+                if fn not in images_in_mewc_results_no_extension:
+                    print('Warning: file {}/{} is present in MD results, but not in mewc-predict results'.format(
+                        relative_folder,fn))
+        ##%% Validate images
+        for im in md_results['images']:
+            fn_relative = im['file']
+            fn_abs = os.path.join(mewc_input_folder,relative_folder,fn_relative)
+            if not os.path.isfile(fn_abs):
+                print('Warning: image file {} does not exist'.format(fn_abs))
+        ##%% Map filenames to MEWC results
+        image_id_to_mewc_records = defaultdict(list)
+        for r in mewc_classification_info:
+            image_id_to_mewc_records[r['image_filename_without_extension']].append(r)
+        ##%% Add classification info to MD results
+        # im = md_results['images'][0]
+        for im in md_results['images']:
+            if ('detections' not in im) or (im['detections'] is None) or (len(im['detections']) == 0):
+                continue
+            detections = im['detections']
+            # *Don't* sort by confidence, it looks like snip IDs use the original sort order
+            # detections = sort_list_of_dicts_by_key(detections,'conf',reverse=True)
+            # This is just a debug assist, so I can run this cell more than once
+            for det in detections:
+                det['classifications'] = []
+            image_id = os.path.splitext(im['file'])[0]
+            mewc_records_this_image = image_id_to_mewc_records[image_id]
+            # r = mewc_records_this_image[0]
+            for r in mewc_records_this_image:
+                if r['snip_id'] is None:
+                    continue
+                category_name = r[category_name_column]
+                # This is a *global* list of category mappings, across all mewc .csv files
+                if category_name not in classification_category_name_to_id:
+                    category_id = str(len(classification_category_name_to_id))
+                    classification_category_name_to_id[category_name] = category_id
+                else:
+                    category_id = classification_category_name_to_id[category_name]
+                snip_id = r['snip_id']
+                if snip_id >= len(detections):
+                    print('Warning: image {} has a classified snip ID of {}, but only {} detections are present'.format(
+                        image_id,snip_id,len(detections)))
+                    continue
+                det = detections[snip_id]
+                if 'classifications' not in det:
+                    det['classifications'] = []
+                det['classifications'].append([category_id,r['prob']])
+            # ...for each classification in this image
+        # ...for each image
+        ##%% Map MD reults to the global level
+        if md_results_all['info'] is None:
+            md_results_all['info'] = md_results['info']
+        for category_id in md_results['detection_categories']:
+            if category_id not in md_results_all['detection_categories']:
+                md_results_all['detection_categories'][category_id] = \
+                    md_results['detection_categories'][category_id]
+            else:
+                assert md_results_all['detection_categories'][category_id] == \
+                    md_results['detection_categories'][category_id], \
+                    'MD results present with incompatible detection categories'
+        # im = md_results['images'][0]
+        for im in md_results['images']:
+            im_copy = deepcopy(im)
+            im_copy['file'] = os.path.join(relative_folder,im['file']).replace('\\','/')
+            md_results_all['images'].append(im_copy)
+    # ...for each folder that contains MEWC results
+    del md_results
+    ##%% Write output
+    md_results_all['classification_categories'] = invert_dictionary(classification_category_name_to_id)
+    if output_file is not None:
+        output_dir = os.path.dirname(output_file)
+        os.makedirs(output_dir,exist_ok=True)
+        with open(output_file,'w') as f:
+            json.dump(md_results_all,f,indent=1)
+        validation_options = ValidateBatchResultsOptions()
+        validation_options.check_image_existence = True
+        validation_options.relative_path_base = mewc_input_folder
+        validation_options.raise_errors = True
+        validation_results = validate_batch_results(output_file,validation_options) # noqa
+# ...def mewc_to_md(...)
+#%% Interactive driver
+if False:
+    pass
+    #%%
+    mewc_input_folder = r'G:\temp\mewc-test'
+    mount_prefix = '/images/'
+    output_file = os.path.join(mewc_input_folder,'results_with_classes.json')
+    _ = mewc_to_md(mewc_input_folder=mewc_input_folder,
+                   output_file=output_file,
+                   mount_prefix=mount_prefix,
+                   category_name_column='class_id')
+#%% Command-line driver
+import sys
+import argparse
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'input_folder',type=str,
+        help='Folder containing images and MEWC .json/.csv files')
+    parser.add_argument(
+        'output_file',type=str,
+        help='.json file where output will be written')
+    parser.add_argument(
+        '--mount_prefix',type=str,default=default_mewc_mount_prefix,
+        help='prefix to remove from each filename in MEWC results, typically the Docker mount point')
+    parser.add_argument(
+        '--category_name_column',type=str,default=default_mewc_category_name_column,
+        help='column in the MEWC .csv file to use for category names')
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    _ = mewc_to_md(mewc_input_folder=args.input_folder,
+                   output_file=args.output_file,
+                   mount_prefix=args.mount_prefix,
+                   category_name_column=args.category_name_column)
+if __name__ == '__main__':
+    main()

megadetector/data_management/wi_to_md.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""
+wi_to_md.py
+Converts the WI predictions.json format to MD .json format.  This is just a
+command-line wrapper around utils.wi_utils.generate_md_results_from_predictions_json.
+"""
+#%% Imports and constants
+import sys
+import argparse
+from megadetector.utils.wi_utils import generate_md_results_from_predictions_json
+#%% Command-line driver
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('predictions_json_file', action='store', type=str,
+                        help='.json file to convert from predictions.json format to MD format')
+    parser.add_argument('md_results_file', action='store', type=str,
+                        help='output file to write in MD format')
+    parser.add_argument('--base_folder', action='store', type=str, default=None,
+                        help='leading string to remove from each path in the predictions.json ' + \
+                            'file (to convert from absolute to relative paths)')
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    generate_md_results_from_predictions_json(args.predictions_json_file,
+                                              args.md_results_file,
+                                              args.base_folder)
+if __name__ == '__main__':
+    main()

megadetector 5.0.22__py3-none-any.whl → 5.0.24__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.22py3-none-any.whl → 5.0.24py3-none-any.whl