PyPI - megadetector - Versions diffs - 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl - Mend

megadetector 5.0.24py3-none-any.whl → 5.0.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show

megadetector/postprocessing/compare_batch_results.py CHANGED Viewed

@@ -138,6 +138,9 @@ class BatchComparisonOptions:
         #: List of filenames to include in the comparison, or None to use all files
         self.filenames_to_include = None
+        #: List of category names to include in the comparison, or None to use all categories
+        self.category_names_to_include = None
         #: Compare only detections/non-detections, ignore categories (still renders categories)
         self.class_agnostic_comparison = False
@@ -986,7 +989,32 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             if invalid_category_error:
                 continue
+            # Should we be restricting the comparison to only certain categories?
+            if options.category_names_to_include is not None:
+                # Just in case the user provided a single category instead of a list
+                if isinstance(options.category_names_to_include,str):
+                    options.category_names_to_include = [options.category_names_to_include]
+                category_name_to_id_a = invert_dictionary(detection_categories_a)
+                category_name_to_id_b = invert_dictionary(detection_categories_b)
+                category_ids_to_include_a = []
+                category_ids_to_include_b = []
+                for category_name in options.category_names_to_include:
+                    if category_name in category_name_to_id_a:
+                        category_ids_to_include_a.append(category_name_to_id_a[category_name])
+                    if category_name in category_name_to_id_b:
+                        category_ids_to_include_b.append(category_name_to_id_b[category_name])
+                # Restrict the categories we treat as above-threshold to the set we're supposed
+                # to be using
+                categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
+                                                category_id in category_ids_to_include_a]
+                categories_above_threshold_b = [category_id for category_id in categories_above_threshold_b if \
+                                                category_id in category_ids_to_include_b]
             detection_a = (len(categories_above_threshold_a) > 0)
             detection_b = (len(categories_above_threshold_b) > 0)
@@ -1609,7 +1637,72 @@ def n_way_comparison(filenames,
 # ...def n_way_comparison(...)
-def find_equivalent_threshold(results_a,results_b,threshold_a=0.2):
+def find_image_level_detections_above_threshold(results,threshold=0.2,category_names=None):
+    """
+    Returns images in the set of MD results [results] with detections above
+    a threshold confidence level, optionally only counting certain categories.
+    Args:
+        results (str or dict): the set of results, either a .json filename or a results
+            dict
+        threshold (float, optional): the threshold used to determine the target number of
+            detections in [results]
+        category_names (list or str, optional): the list of category names to consider (defaults
+            to using all categories), or the name of a single category.
+    Returns:
+        list: the images with above-threshold detections
+    """
+    if isinstance(results,str):
+        with open(results,'r') as f:
+            results = json.load(f)
+    category_ids_to_consider = None
+    if category_names is not None:
+        if isinstance(category_names,str):
+            category_names = [category_names]
+        category_id_to_name = results['detection_categories']
+        category_name_to_id = invert_dictionary(category_id_to_name)
+        category_ids_to_consider = []
+        # category_name = category_names[0]
+        for category_name in category_names:
+            category_id = category_name_to_id[category_name]
+            category_ids_to_consider.append(category_id)
+        assert len(category_ids_to_consider) > 0, \
+            'Category name list did not map to any category IDs'
+    images_above_threshold = []
+    for im in results['images']:
+        if ('detections' in im) and (im['detections'] is not None) and (len(im['detections']) > 0):
+            confidence_values_this_image = [0]
+            for det in im['detections']:
+                if category_ids_to_consider is not None:
+                    if det['category'] not in category_ids_to_consider:
+                        continue
+                confidence_values_this_image.append(det['conf'])
+            if max(confidence_values_this_image) >= threshold:
+                images_above_threshold.append(im)
+    # ...for each image
+    return images_above_threshold
+# ...def find_image_level_detections_above_threshold(...)
+def find_equivalent_threshold(results_a,
+                              results_b,
+                              threshold_a=0.2,
+                              category_names=None,
+                              verbose=False):
     """
     Given two sets of detector results, finds the confidence threshold for results_b
     that produces the same fraction of *images* with detections as threshold_a does for
@@ -1622,6 +1715,9 @@ def find_equivalent_threshold(results_a,results_b,threshold_a=0.2):
             dict
         threshold_a (float, optional): the threshold used to determine the target number of
             detections in results_a
+        category_names (list or str, optional): the list of category names to consider (defaults
+            to using all categories), or the name of a single category.
+        verbose (bool, optional): enable additional debug output
     Returns:
         float: the threshold that - when applied to results_b - produces the same number
@@ -1629,35 +1725,106 @@ def find_equivalent_threshold(results_a,results_b,threshold_a=0.2):
     """
     if isinstance(results_a,str):
+        if verbose:
+            print('Loading results from {}'.format(results_a))
         with open(results_a,'r') as f:
             results_a = json.load(f)
     if isinstance(results_b,str):
+        if verbose:
+            print('Loading results from {}'.format(results_b))
         with open(results_b,'r') as f:
             results_b = json.load(f)
+    category_ids_to_consider_a = None
+    category_ids_to_consider_b = None
+    if category_names is not None:
+        if isinstance(category_names,str):
+            category_names = [category_names]
+        categories_a = results_a['detection_categories']
+        categories_b = results_b['detection_categories']
+        category_name_to_id_a = invert_dictionary(categories_a)
+        category_name_to_id_b = invert_dictionary(categories_b)
+        category_ids_to_consider_a = []
+        category_ids_to_consider_b = []
+        # category_name = category_names[0]
+        for category_name in category_names:
+            category_id_a = category_name_to_id_a[category_name]
+            category_id_b = category_name_to_id_b[category_name]
+            category_ids_to_consider_a.append(category_id_a)
+            category_ids_to_consider_b.append(category_id_b)
-    def get_confidence_values_for_results(images):
+        assert len(category_ids_to_consider_a) > 0 and len(category_ids_to_consider_b) > 0, \
+            'Category name list did not map to any category IDs in one or both detection sets'
+    def _get_confidence_values_for_results(images,category_ids_to_consider,threshold):
+        """
+        Return a list of the maximum confidence value for each image in [images].
+        Returns zero confidence for images with no detections (or no detections
+        in the specified categories).  Does not return anything for invalid images.
+        """
         confidence_values = []
+        images_above_threshold = []
         for im in images:
             if 'detections' in im and im['detections'] is not None:
                 if len(im['detections']) == 0:
                     confidence_values.append(0)
                 else:
-                    confidence_values_this_image = [det['conf'] for det in im['detections']]
-                    confidence_values.append(max(confidence_values_this_image))
-        return confidence_values
+                    confidence_values_this_image = []
+                    for det in im['detections']:
+                        if category_ids_to_consider is not None:
+                            if det['category'] not in category_ids_to_consider:
+                                continue
+                        confidence_values_this_image.append(det['conf'])
+                    if len(confidence_values_this_image) == 0:
+                        confidence_values.append(0)
+                    else:
+                        max_conf_value = max(confidence_values_this_image)
+                        if threshold is not None and max_conf_value >= threshold:
+                            images_above_threshold.append(im)
+                        confidence_values.append(max_conf_value)
+        # ...for each image
+        return confidence_values, images_above_threshold
-    confidence_values_a = get_confidence_values_for_results(results_a['images'])
+    confidence_values_a,images_above_threshold_a = \
+        _get_confidence_values_for_results(results_a['images'],
+                                          category_ids_to_consider_a,
+                                          threshold_a)
+    # ...def _get_confidence_values_for_results(...)
+    if verbose:
+        print('For result set A, considering {} of {} images'.format(
+            len(confidence_values_a),len(results_a['images'])))
     confidence_values_a_above_threshold = [c for c in confidence_values_a if c >= threshold_a]
-    confidence_values_b = get_confidence_values_for_results(results_b['images'])
-    confidence_values_b = sorted(confidence_values_b)
+    confidence_values_b,_ = _get_confidence_values_for_results(results_b['images'],
+                                                              category_ids_to_consider_b,
+                                                              threshold=None)
+    if verbose:
+        print('For result set B, considering {} of {} images'.format(
+            len(confidence_values_b),len(results_b['images'])))
+    confidence_values_b = sorted(confidence_values_b)
     target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
     detection_cutoff_index = round((1.0-target_detection_fraction) * len(confidence_values_b))
     threshold_b = confidence_values_b[detection_cutoff_index]
+    if verbose:
+        print('{} confidence values above threshold (A)'.format(len(confidence_values_a_above_threshold)))
+        confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
+        print('{} confidence values above threshold (B)'.format(len(confidence_values_b_above_threshold)))
     return threshold_b
 # ...def find_equivalent_threshold(...)

megadetector/postprocessing/create_crop_folder.py ADDED Viewed

@@ -0,0 +1,420 @@
+"""
+create_crop_folder.py
+Given a MegaDetector .json file and a folder of images, creates a new folder
+of images representing all above-threshold crops from the original folder.
+"""
+#%% Constants and imports
+import os
+import json
+from tqdm import tqdm
+from multiprocessing.pool import Pool, ThreadPool
+from collections import defaultdict
+from functools import partial
+from megadetector.utils.path_utils import insert_before_extension
+from megadetector.utils.ct_utils import invert_dictionary
+from megadetector.visualization.visualization_utils import crop_image
+from megadetector.visualization.visualization_utils import exif_preserving_save
+#%% Support classes
+class CreateCropFolderOptions:
+    """
+    Options used to parameterize create_crop_folder().
+    """
+    def __init__(self):
+        #: Confidence threshold determining which detections get written
+        self.confidence_threshold = 0.1
+        #: Number of pixels to expand each crop
+        self.expansion = 0
+        #: JPEG quality to use for saving crops (None for default)
+        self.quality = 95
+        #: Whether to overwrite existing images
+        self.overwrite = True
+        #: Number of concurrent workers
+        self.n_workers = 8
+        #: Whether to use processes ('process') or threads ('thread') for parallelization
+        self.pool_type = 'thread'
+        #: Include only these categories, or None to include all
+        #:
+        #: options.category_names_to_include = ['animal']
+        self.category_names_to_include = None
+#%% Support functions
+def _get_crop_filename(image_fn,crop_id):
+    """
+    Generate crop filenames in a consistent way.
+    """
+    if isinstance(crop_id,int):
+        crop_id = str(crop_id).zfill(3)
+    assert isinstance(crop_id,str)
+    return insert_before_extension(image_fn,'crop_' + crop_id)
+def _generate_crops_for_single_image(crops_this_image,
+                                     input_folder,
+                                     output_folder,
+                                     options):
+    """
+    Generate all the crops required for a single image.
+    """
+    if len(crops_this_image) == 0:
+        return
+    image_fn_relative = crops_this_image[0]['image_fn_relative']
+    input_fn_abs = os.path.join(input_folder,image_fn_relative)
+    assert os.path.isfile(input_fn_abs)
+    detections_to_crop = [c['detection'] for c in crops_this_image]
+    cropped_images = crop_image(detections_to_crop,
+                                input_fn_abs,
+                                confidence_threshold=0,
+                                expansion=options.expansion)
+    assert len(cropped_images) == len(crops_this_image)
+    # i_crop = 0; crop_info = crops_this_image[0]
+    for i_crop,crop_info in enumerate(crops_this_image):
+        assert crop_info['image_fn_relative'] == image_fn_relative
+        crop_filename_relative = _get_crop_filename(image_fn_relative, crop_info['crop_id'])
+        crop_filename_abs = os.path.join(output_folder,crop_filename_relative).replace('\\','/')
+        if os.path.isfile(crop_filename_abs) and not options.overwrite:
+            continue
+        cropped_image = cropped_images[i_crop]
+        os.makedirs(os.path.dirname(crop_filename_abs),exist_ok=True)
+        exif_preserving_save(cropped_image,crop_filename_abs,quality=options.quality)
+    # ...for each crop
+#%% Main function
+def crop_results_to_image_results(image_results_file_with_crop_ids,
+                                  crop_results_file,
+                                  output_file,
+                                  delete_crop_information=True):
+    """
+    This function is intended to be run after you have:
+        1. Run MegaDetector on a folder
+        2. Generated a crop folder using create_crop_folder
+        3. Run a species classifier on those crops
+    This function will take the crop-level results and transform them back
+    to the original images.  Classification categories, if available, are taken
+    from [crop_results_file].
+    Args:
+        image_results_file_with_crop_ids (str): results file for the original images,
+            containing crop IDs, likely generated via create_crop_folder.  All
+            non-standard fields in this file will be passed along to [output_file].
+        crop_results_file (str): results file for the crop folder
+        output_file (str): ouptut .json file, containing crop-level classifications
+            mapped back to the image level.
+        delete_crop_information (bool, optional): whether to delete the "crop_id" and
+            "crop_filename_relative" fields from each detection, if present.
+    """
+    ##%% Validate inputs
+    assert os.path.isfile(image_results_file_with_crop_ids), \
+        'Could not find image-level input file {}'.format(image_results_file_with_crop_ids)
+    assert os.path.isfile(crop_results_file), \
+        'Could not find crop results file {}'.format(crop_results_file)
+    os.makedirs(os.path.dirname(output_file),exist_ok=True)
+    ##%% Read input files
+    print('Reading input...')
+    with open(image_results_file_with_crop_ids,'r') as f:
+        image_results_with_crop_ids = json.load(f)
+    with open(crop_results_file,'r') as f:
+        crop_results = json.load(f)
+    # Find all the detection categories that need to be consistent
+    used_category_ids = set()
+    for im in tqdm(image_results_with_crop_ids['images']):
+        if 'detections' not in im or im['detections'] is None:
+            continue
+        for det in im['detections']:
+            if 'crop_id' in det:
+                used_category_ids.add(det['category'])
+    # Make sure the categories that matter are consistent across the two files
+    for category_id in used_category_ids:
+        category_name = image_results_with_crop_ids['detection_categories'][category_id]
+        assert category_id in crop_results['detection_categories'] and \
+            category_name == crop_results['detection_categories'][category_id], \
+                'Crop results and detection results use incompatible categories'
+    crop_filename_to_results = {}
+    # im = crop_results['images'][0]
+    for im in crop_results['images']:
+        crop_filename_to_results[im['file']] = im
+    if 'classification_categories' in crop_results:
+        image_results_with_crop_ids['classification_categories'] = \
+            crop_results['classification_categories']
+    if 'classification_category_descriptions' in crop_results:
+        image_results_with_crop_ids['classification_category_descriptions'] = \
+            crop_results['classification_category_descriptions']
+    ##%% Read classifications from crop results, merge into image-level results
+    # im = image_results_with_crop_ids['images'][0]
+    for im in tqdm(image_results_with_crop_ids['images']):
+        if 'detections' not in im or im['detections'] is None:
+            continue
+        for det in im['detections']:
+            if 'classifications' in det:
+                del det['classifications']
+            if 'crop_id' in det:
+                crop_filename_relative = det['crop_filename_relative']
+                assert crop_filename_relative in crop_filename_to_results, \
+                    'Crop lookup error'
+                crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
+                assert crop_results_this_detection['file'] == crop_filename_relative
+                assert len(crop_results_this_detection['detections']) == 1
+                # Allow a slight confidence difference for the case where output precision was truncated
+                assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
+                assert crop_results_this_detection['detections'][0]['category'] == det['category']
+                assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1]
+                det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
+            if delete_crop_information:
+                if 'crop_id' in det:
+                    del det['crop_id']
+                if 'crop_filename_relative' in det:
+                    del det['crop_filename_relative']
+        # ...for each detection
+    # ...for each image
+    ##%% Write output file
+    print('Writing output file...')
+    with open(output_file,'w') as f:
+        json.dump(image_results_with_crop_ids,f,indent=1)
+# ...def crop_results_to_image_results(...)
+def create_crop_folder(input_file,
+                       input_folder,
+                       output_folder,
+                       output_file=None,
+                       crops_output_file=None,
+                       options=None):
+    """
+    Given a MegaDetector .json file and a folder of images, creates a new folder
+    of images representing all above-threshold crops from the original folder.
+    Optionally writes a new .json file that attaches unique IDs to each detection.
+    Args:
+        input_file (str): MD-formatted .json file to process
+        input_folder (str): Input image folder
+        output_folder (str): Output (cropped) image folder
+        output_file (str, optional): new .json file that attaches unique IDs to each detection.
+        crops_output_file (str, optional): new .json file that includes whole-image detections
+            for each of the crops, using confidence values from the original results
+        options (CreateCropFolderOptions, optional): crop parameters
+    """
+    ## Validate options, prepare output folders
+    if options is None:
+        options = CreateCropFolderOptions()
+    assert os.path.isfile(input_file), 'Input file {} not found'.format(input_file)
+    assert os.path.isdir(input_folder), 'Input folder {} not found'.format(input_folder)
+    os.makedirs(output_folder,exist_ok=True)
+    if output_file is not None:
+        os.makedirs(os.path.dirname(output_file),exist_ok=True)
+    ##%% Read input
+    print('Reading MD results file...')
+    with open(input_file,'r') as f:
+        detection_results = json.load(f)
+    category_ids_to_include = None
+    if options.category_names_to_include is not None:
+        category_id_to_name = detection_results['detection_categories']
+        category_name_to_id = invert_dictionary(category_id_to_name)
+        category_ids_to_include = set()
+        for category_name in options.category_names_to_include:
+            assert category_name in category_name_to_id, \
+                'Unrecognized category name {}'.format(category_name)
+            category_ids_to_include.add(category_name_to_id[category_name])
+    ##%% Make a list of crops that we need to create
+    # Maps input images to list of dicts, with keys 'crop_id','detection'
+    image_fn_relative_to_crops = defaultdict(list)
+    n_crops = 0
+    n_detections_excluded_by_category = 0
+    # im = detection_results['images'][0]
+    for i_image,im in enumerate(detection_results['images']):
+        if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
+            continue
+        detections_this_image = im['detections']
+        image_fn_relative = im['file']
+        for i_detection,det in enumerate(detections_this_image):
+            if det['conf'] < options.confidence_threshold:
+                continue
+            if (category_ids_to_include is not None) and \
+                (det['category'] not in category_ids_to_include):
+                n_detections_excluded_by_category += 1
+                continue
+            det['crop_id'] = i_detection
+            crop_info = {'image_fn_relative':image_fn_relative,
+                            'crop_id':i_detection,
+                            'detection':det}
+            crop_filename_relative = _get_crop_filename(image_fn_relative,
+                                                        crop_info['crop_id'])
+            det['crop_filename_relative'] = crop_filename_relative
+            image_fn_relative_to_crops[image_fn_relative].append(crop_info)
+            n_crops += 1
+    # ...for each input image
+    print('Prepared a list of {} crops from {} of {} input images'.format(
+        n_crops,len(image_fn_relative_to_crops),len(detection_results['images'])))
+    if n_detections_excluded_by_category > 0:
+        print('Excluded {} detections by category'.format(n_detections_excluded_by_category))
+    ##%% Generate crops
+    if options.n_workers <= 1:
+        # image_fn_relative = next(iter(image_fn_relative_to_crops))
+        for image_fn_relative in tqdm(image_fn_relative_to_crops.keys()):
+            crops_this_image = image_fn_relative_to_crops[image_fn_relative]
+            _generate_crops_for_single_image(crops_this_image=crops_this_image,
+                                             input_folder=input_folder,
+                                             output_folder=output_folder,
+                                             options=options)
+    else:
+        print('Creating a {} pool with {} workers'.format(options.pool_type,options.n_workers))
+        if options.pool_type == 'thread':
+            pool = ThreadPool(options.n_workers)
+        else:
+            assert options.pool_type == 'process'
+            pool = Pool(options.n_workers)
+        # Each element in this list is the list of crops for a single image
+        crop_lists = list(image_fn_relative_to_crops.values())
+        with tqdm(total=len(image_fn_relative_to_crops)) as pbar:
+            for i,_ in enumerate(pool.imap_unordered(partial(
+                        _generate_crops_for_single_image,
+                            input_folder=input_folder,
+                            output_folder=output_folder,
+                            options=options),
+                        crop_lists)):
+                pbar.update()
+    # ...if we're using parallel processing
+    ##%% Write output file
+    if output_file is not None:
+        with open(output_file,'w') as f:
+            json.dump(detection_results,f,indent=1)
+    if crops_output_file is not None:
+        original_images = detection_results['images']
+        detection_results_cropped = detection_results
+        detection_results_cropped['images'] = []
+        # im = original_images[0]
+        for im in original_images:
+            if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
+                continue
+            detections_this_image = im['detections']
+            image_fn_relative = im['file']
+            for i_detection,det in enumerate(detections_this_image):
+                if 'crop_id' in det:
+                    im_out = {}
+                    im_out['file'] = det['crop_filename_relative']
+                    det_out = {}
+                    det_out['category'] = det['category']
+                    det_out['conf'] = det['conf']
+                    det_out['bbox'] = [0, 0, 1, 1]
+                    im_out['detections'] = [det_out]
+                    detection_results_cropped['images'].append(im_out)
+                # ...if we need to include this crop in the new .json file
+            # ...for each crop
+        # ...for each original image
+        with open(crops_output_file,'w') as f:
+            json.dump(detection_results_cropped,f,indent=1)
+# ...def create_crop_folder()
+#%% Command-line driver
+# TODO

megadetector/postprocessing/load_api_results.py CHANGED Viewed

@@ -107,6 +107,9 @@ def write_api_results(detection_results_table, other_fields, out_path):
     images = detection_results_table.to_json(orient='records',
                                              double_precision=3)
     images = json.loads(images)
+    for im in images:
+        if 'failure' in im and im['failure'] is None:
+            del im['failure']
     fields['images'] = images
     # Convert the 'version' field back to a string as per format convention
@@ -129,7 +132,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
     except Exception:
         print('Warning: error removing max_detection_conf from output')
         pass
     with open(out_path, 'w') as f:
         json.dump(fields, f, indent=1)

megadetector/postprocessing/md_to_coco.py CHANGED Viewed

@@ -181,7 +181,7 @@ def md_to_coco(md_results_file,
             w = im['width']
             h = im['height']
         coco_im['width'] = w
         coco_im['height'] = h

megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.24py3-none-any.whl → 5.0.26py3-none-any.whl