PyPI - megadetector - Versions diffs - 5.0.25__py3-none-any.whl → 5.0.26__py3-none-any.whl - Mend

megadetector 5.0.25py3-none-any.whl → 5.0.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (34) hide show

megadetector/postprocessing/create_crop_folder.py CHANGED Viewed

@@ -18,6 +18,7 @@ from collections import defaultdict
 from functools import partial
 from megadetector.utils.path_utils import insert_before_extension
+from megadetector.utils.ct_utils import invert_dictionary
 from megadetector.visualization.visualization_utils import crop_image
 from megadetector.visualization.visualization_utils import exif_preserving_save
@@ -48,6 +49,11 @@ class CreateCropFolderOptions:
         #: Whether to use processes ('process') or threads ('thread') for parallelization
         self.pool_type = 'thread'
+        #: Include only these categories, or None to include all
+        #:
+        #: options.category_names_to_include = ['animal']
+        self.category_names_to_include = None
 #%% Support functions
@@ -106,7 +112,8 @@ def _generate_crops_for_single_image(crops_this_image,
 def crop_results_to_image_results(image_results_file_with_crop_ids,
                                   crop_results_file,
-                                  output_file):
+                                  output_file,
+                                  delete_crop_information=True):
     """
     This function is intended to be run after you have:
@@ -115,14 +122,18 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
         3. Run a species classifier on those crops
     This function will take the crop-level results and transform them back
-    to the original images.
+    to the original images.  Classification categories, if available, are taken
+    from [crop_results_file].
     Args:
         image_results_file_with_crop_ids (str): results file for the original images,
-            containing crop IDs, likely generated via create_crop_folder.
+            containing crop IDs, likely generated via create_crop_folder.  All
+            non-standard fields in this file will be passed along to [output_file].
         crop_results_file (str): results file for the crop folder
         output_file (str): ouptut .json file, containing crop-level classifications
-            mapped back to the image level
+            mapped back to the image level.
+        delete_crop_information (bool, optional): whether to delete the "crop_id" and
+            "crop_filename_relative" fields from each detection, if present.
     """
     ##%% Validate inputs
@@ -136,26 +147,45 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
     ##%% Read input files
+    print('Reading input...')
     with open(image_results_file_with_crop_ids,'r') as f:
         image_results_with_crop_ids = json.load(f)
     with open(crop_results_file,'r') as f:
         crop_results = json.load(f)
-    assert crop_results['detection_categories'] == \
-        image_results_with_crop_ids['detection_categories'], \
-            'Crop results and image-level results use different detection categories'
+    # Find all the detection categories that need to be consistent
+    used_category_ids = set()
+    for im in tqdm(image_results_with_crop_ids['images']):
+        if 'detections' not in im or im['detections'] is None:
+            continue
+        for det in im['detections']:
+            if 'crop_id' in det:
+                used_category_ids.add(det['category'])
+    # Make sure the categories that matter are consistent across the two files
+    for category_id in used_category_ids:
+        category_name = image_results_with_crop_ids['detection_categories'][category_id]
+        assert category_id in crop_results['detection_categories'] and \
+            category_name == crop_results['detection_categories'][category_id], \
+                'Crop results and detection results use incompatible categories'
     crop_filename_to_results = {}
     # im = crop_results['images'][0]
     for im in crop_results['images']:
         crop_filename_to_results[im['file']] = im
-    image_results_with_crop_ids['classification_categories'] = \
-        crop_results['classification_categories']
+    if 'classification_categories' in crop_results:
+        image_results_with_crop_ids['classification_categories'] = \
+            crop_results['classification_categories']
+    if 'classification_category_descriptions' in crop_results:
+        image_results_with_crop_ids['classification_category_descriptions'] = \
+            crop_results['classification_category_descriptions']
-    ##%% Read classifications from crop results
+    ##%% Read classifications from crop results, merge into image-level results
     # im = image_results_with_crop_ids['images'][0]
     for im in tqdm(image_results_with_crop_ids['images']):
@@ -175,11 +205,18 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
                 crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
                 assert crop_results_this_detection['file'] == crop_filename_relative
                 assert len(crop_results_this_detection['detections']) == 1
-                assert crop_results_this_detection['detections'][0]['conf'] == det['conf']
+                # Allow a slight confidence difference for the case where output precision was truncated
+                assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
                 assert crop_results_this_detection['detections'][0]['category'] == det['category']
                 assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1]
                 det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
+            if delete_crop_information:
+                if 'crop_id' in det:
+                    del det['crop_id']
+                if 'crop_filename_relative' in det:
+                    del det['crop_filename_relative']
         # ...for each detection
     # ...for each image
@@ -187,6 +224,8 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
     ##%% Write output file
+    print('Writing output file...')
     with open(output_file,'w') as f:
         json.dump(image_results_with_crop_ids,f,indent=1)
@@ -223,21 +262,35 @@ def create_crop_folder(input_file,
     assert os.path.isfile(input_file), 'Input file {} not found'.format(input_file)
     assert os.path.isdir(input_folder), 'Input folder {} not found'.format(input_folder)
     os.makedirs(output_folder,exist_ok=True)
-    os.makedirs(os.path.dirname(output_file),exist_ok=True)
+    if output_file is not None:
+        os.makedirs(os.path.dirname(output_file),exist_ok=True)
     ##%% Read input
+    print('Reading MD results file...')
     with open(input_file,'r') as f:
         detection_results = json.load(f)
-    ##%% Make a list crops that we need to create
+    category_ids_to_include = None
+    if options.category_names_to_include is not None:
+        category_id_to_name = detection_results['detection_categories']
+        category_name_to_id = invert_dictionary(category_id_to_name)
+        category_ids_to_include = set()
+        for category_name in options.category_names_to_include:
+            assert category_name in category_name_to_id, \
+                'Unrecognized category name {}'.format(category_name)
+            category_ids_to_include.add(category_name_to_id[category_name])
+    ##%% Make a list of crops that we need to create
     # Maps input images to list of dicts, with keys 'crop_id','detection'
     image_fn_relative_to_crops = defaultdict(list)
     n_crops = 0
+    n_detections_excluded_by_category = 0
     # im = detection_results['images'][0]
     for i_image,im in enumerate(detection_results['images']):
@@ -249,27 +302,35 @@ def create_crop_folder(input_file,
         image_fn_relative = im['file']
         for i_detection,det in enumerate(detections_this_image):
+            if det['conf'] < options.confidence_threshold:
+                continue
+            if (category_ids_to_include is not None) and \
+                (det['category'] not in category_ids_to_include):
+                n_detections_excluded_by_category += 1
+                continue
+            det['crop_id'] = i_detection
-            if det['conf'] > options.confidence_threshold:
-                det['crop_id'] = i_detection
-                crop_info = {'image_fn_relative':image_fn_relative,
-                             'crop_id':i_detection,
-                             'detection':det}
-                crop_filename_relative = _get_crop_filename(image_fn_relative,
-                                                            crop_info['crop_id'])
-                det['crop_filename_relative'] = crop_filename_relative
+            crop_info = {'image_fn_relative':image_fn_relative,
+                            'crop_id':i_detection,
+                            'detection':det}
+            crop_filename_relative = _get_crop_filename(image_fn_relative,
+                                                        crop_info['crop_id'])
+            det['crop_filename_relative'] = crop_filename_relative
-                image_fn_relative_to_crops[image_fn_relative].append(crop_info)
-                n_crops += 1
+            image_fn_relative_to_crops[image_fn_relative].append(crop_info)
+            n_crops += 1
     # ...for each input image
     print('Prepared a list of {} crops from {} of {} input images'.format(
         n_crops,len(image_fn_relative_to_crops),len(detection_results['images'])))
+    if n_detections_excluded_by_category > 0:
+        print('Excluded {} detections by category'.format(n_detections_excluded_by_category))
     ##%% Generate crops
@@ -307,6 +368,7 @@ def create_crop_folder(input_file,
     # ...if we're using parallel processing
     ##%% Write output file
     if output_file is not None:

megadetector/postprocessing/load_api_results.py CHANGED Viewed

@@ -107,6 +107,9 @@ def write_api_results(detection_results_table, other_fields, out_path):
     images = detection_results_table.to_json(orient='records',
                                              double_precision=3)
     images = json.loads(images)
+    for im in images:
+        if 'failure' in im and im['failure'] is None:
+            del im['failure']
     fields['images'] = images
     # Convert the 'version' field back to a string as per format convention
@@ -129,7 +132,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
     except Exception:
         print('Warning: error removing max_detection_conf from output')
         pass
     with open(out_path, 'w') as f:
         json.dump(fields, f, indent=1)

megadetector/postprocessing/md_to_coco.py CHANGED Viewed

@@ -181,7 +181,7 @@ def md_to_coco(md_results_file,
             w = im['width']
             h = im['height']
         coco_im['width'] = w
         coco_im['height'] = h

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -30,6 +30,7 @@ import time
 import uuid
 import warnings
 import random
+import json
 from enum import IntEnum
 from multiprocessing.pool import ThreadPool
@@ -48,8 +49,11 @@ from megadetector.visualization import visualization_utils as vis_utils
 from megadetector.visualization import plot_utils
 from megadetector.utils.write_html_image_list import write_html_image_list
 from megadetector.utils import path_utils
-from megadetector.utils.ct_utils import args_to_object, sets_overlap
-from megadetector.data_management.cct_json_utils import (CameraTrapJsonUtils, IndexedJsonDb)
+from megadetector.utils.ct_utils import args_to_object
+from megadetector.utils.ct_utils import sets_overlap
+from megadetector.utils.ct_utils import sort_dictionary_by_value
+from megadetector.data_management.cct_json_utils import CameraTrapJsonUtils
+from megadetector.data_management.cct_json_utils import IndexedJsonDb
 from megadetector.postprocessing.load_api_results import load_api_results
 from megadetector.detection.run_detector import get_typical_confidence_threshold_from_results
@@ -214,6 +218,15 @@ class PostProcessingOptions:
         #: Character encoding to use when writing the index HTML html
         self.output_html_encoding = None
+        #: Additional image fields to display in image headers.  If this is a list,
+        #: we'll include those fields; if this is a dict, we'll use that dict to choose
+        #: alternative display names for each field.
+        self.additional_image_fields_to_display = None
+        #: If classification results are present, should we include a summary of
+        #: classification categories?
+        self.include_classification_category_report = True
     # ...__init__()
 # ...PostProcessingOptions
@@ -434,15 +447,6 @@ def _render_bounding_boxes(
     if options is None:
         options = PostProcessingOptions()
-    # Leaving code in place for reading from blob storage, may support this
-    # in the future.
-    """
-    stream = io.BytesIO()
-    _ = blob_service.get_blob_to_stream(container_name, image_id, stream)
-    # resize is to display them in this notebook or in the HTML more quickly
-    image = Image.open(stream).resize(viz_size)
-    """
     image_full_path = None
     if res in options.rendering_bypass_sets:
@@ -472,10 +476,12 @@ def _render_bounding_boxes(
         if image is not None:
             original_size = image.size
+            # Resize the image if necessary
             if options.viz_target_width is not None:
                 image = vis_utils.resize_image(image, options.viz_target_width)
+            # Render ground truth boxes if necessary
             if ground_truth_boxes is not None and len(ground_truth_boxes) > 0:
                 # Create class labels like "gt_1" or "gt_27"
@@ -487,8 +493,7 @@ def _render_bounding_boxes(
                                                    original_size=original_size,label_map=label_map,
                                                    thickness=4,expansion=4)
-            # render_detection_bounding_boxes expects either a float or a dict mapping
-            # category IDs to names.
+            # Preprare per-category confidence thresholds
             if isinstance(options.confidence_threshold,float):
                 rendering_confidence_threshold = options.confidence_threshold
             else:
@@ -499,12 +504,14 @@ def _render_bounding_boxes(
                 for category_id in category_ids:
                     rendering_confidence_threshold[category_id] = \
                         _get_threshold_for_category_id(category_id, options, detection_categories)
+            # Render detection boxes
             vis_utils.render_detection_bounding_boxes(
                 detections, image,
                 label_map=detection_categories,
                 classification_label_map=classification_categories,
                 confidence_threshold=rendering_confidence_threshold,
+                classification_confidence_threshold=options.classification_confidence_threshold,
                 thickness=options.line_thickness,
                 expansion=options.box_expansion)
@@ -686,9 +693,11 @@ def _has_positive_detection(detections,options,detection_categories):
     return found_positive_detection
-def _render_image_no_gt(file_info,detection_categories_to_results_name,
-                       detection_categories,classification_categories,
-                       options):
+def _render_image_no_gt(file_info,
+                        detection_categories_to_results_name,
+                        detection_categories,
+                        classification_categories,
+                        options):
     """
     Renders an image (with no ground truth information)
@@ -713,9 +722,15 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
     Returns None if there are any errors.
     """
-    image_relative_path = file_info[0]
-    max_conf = file_info[1]
-    detections = file_info[2]
+    image_relative_path = file_info['file']
+    # Useful debug snippet
+    #
+    # if 'filename' in image_relative_path:
+    #    import pdb; pdb.set_trace()
+    max_conf = file_info['max_detection_conf']
+    detections = file_info['detections']
     # Determine whether any positive detections are present (using a threshold that
     # may vary by category)
@@ -749,9 +764,31 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
         assert detection_status == DetectionStatus.DS_ALMOST
         res = 'almost_detections'
-    display_name = '<b>Result type</b>: {}, <b>Image</b>: {}, <b>Max conf</b>: {:0.3f}'.format(
+    display_name = '<b>Result type</b>: {}, <b>image</b>: {}, <b>max conf</b>: {:0.3f}'.format(
         res, image_relative_path, max_conf)
+    # Are there any bonus fields we need to include in each image header?
+    if options.additional_image_fields_to_display is not None:
+        for field_name in options.additional_image_fields_to_display:
+            if field_name in file_info:
+                field_value = file_info[field_name]
+                if (field_value is None) or \
+                    (isinstance(field_value,float) and np.isnan(field_value)):
+                        continue
+                # Optionally use a display name that's different from the field name
+                if isinstance(options.additional_image_fields_to_display,dict):
+                    field_display_name = \
+                        options.additional_image_fields_to_display[field_name]
+                else:
+                    field_display_name = field_name
+                field_string = '<b>{}</b>: {}'.format(field_display_name,field_value)
+                display_name += ', {}'.format(field_string)
     rendering_options = copy.copy(options)
     if detection_status == DetectionStatus.DS_ALMOST:
         rendering_options.confidence_threshold = \
@@ -781,17 +818,24 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
             if det['conf'] > max_conf:
                 max_conf = det['conf']
+            # We make the decision here that only "detections" (not "almost-detections")
+            # will appear on the classification category pages
+            detection_threshold = \
+                _get_threshold_for_category_id(det['category'], options, detection_categories)
+            if det['conf'] < detection_threshold:
+                continue
             if ('classifications' in det) and (len(det['classifications']) > 0) and \
                 (res != 'non_detections'):
-                # This is a list of [class,confidence] pairs, sorted by confidence
+                # This is a list of [class,confidence] pairs, sorted by classification confidence
                 classifications = det['classifications']
                 top1_class_id = classifications[0][0]
                 top1_class_name = classification_categories[top1_class_id]
                 top1_class_score = classifications[0][1]
-                # If we either don't have a confidence threshold, or we've met our
-                # confidence threshold
+                # If we either don't have a classification confidence threshold, or
+                # we've met our classification confidence threshold
                 if (options.classification_confidence_threshold < 0) or \
                     (top1_class_score >= options.classification_confidence_threshold):
                     class_string = 'class_{}'.format(top1_class_name)
@@ -823,9 +867,9 @@ def _render_image_with_gt(file_info,ground_truth_indexed_db,
     data format.
     """
-    image_relative_path = file_info[0]
-    max_conf = file_info[1]
-    detections = file_info[2]
+    image_relative_path = file_info['file']
+    max_conf = file_info['max_detection_conf']
+    detections = file_info['detections']
     # This should already have been normalized to either '/' or '\'
@@ -971,6 +1015,7 @@ def process_batch_results(options):
             print('\n*** Warning: {} images with ambiguous positive/negative status found in ground truth ***\n'.format(
                 n_ambiguous))
     ##%% Load detection (and possibly classification) results
     # If the caller hasn't supplied results, load them
@@ -1028,6 +1073,8 @@ def process_batch_results(options):
     n_positives = 0
     n_almosts = 0
+    print('Assigning images to rendering categories')
     for i_row,row in tqdm(detections_df.iterrows(),total=len(detections_df)):
         detections = row['detections']
@@ -1372,7 +1419,7 @@ def process_batch_results(options):
         for _, row in images_to_visualize.iterrows():
             # Filenames should already have been normalized to either '/' or '\'
-            files_to_render.append([row['file'], row['max_detection_conf'], row['detections']])
+            files_to_render.append(row.to_dict())
         start_time = time.time()
         if options.parallelize_rendering:
@@ -1523,8 +1570,13 @@ def process_batch_results(options):
                     len(images_html['class_{}'.format(cname)]))
             index_page += '</div>'
-        # Close body and html tags
-        index_page += '{}</body></html>'.format(options.footer_text)
+        # Write custom footer if it was provided
+        if (options.footer_text is not None) and (len(options.footer_text) > 0):
+            index_page += '{}\n'.format(options.footer_text)
+        # Close open html tags
+        index_page += '\n</body></html>\n'
         output_html_file = os.path.join(output_dir, 'index.html')
         with open(output_html_file, 'w',
                   encoding=options.output_html_encoding) as f:
@@ -1532,7 +1584,7 @@ def process_batch_results(options):
         print('Finished writing html to {}'.format(output_html_file))
-    # ...for each image
+    # ...if we have ground truth
     ##%% Otherwise, if we don't have ground truth...
@@ -1618,9 +1670,7 @@ def process_batch_results(options):
             assert isinstance(row['detections'],list)
             # Filenames should already have been normalized to either '/' or '\'
-            files_to_render.append([row['file'],
-                                    row['max_detection_conf'],
-                                    row['detections']])
+            files_to_render.append(row.to_dict())
         start_time = time.time()
         if options.parallelize_rendering:
@@ -1691,8 +1741,7 @@ def process_batch_results(options):
         # Write index.html
         # We can't just sum these, because image_counts includes images in both their
-        # detection and classification classes
-        # total_images = sum(image_counts.values())
+        # detection and classification classes
         total_images = 0
         for k in image_counts.keys():
             v = image_counts[k]
@@ -1779,9 +1828,15 @@ def process_batch_results(options):
             else:
                 index_page += '<a href="{}">{}</a> ({}, {:.1%})<br/>\n'.format(
                     filename,label,image_count,image_fraction)
+        # ...for each result set
         index_page += '</div>\n'
+        # If classification information is present and we're supposed to create
+        # a summary of classifications, we'll put it here
+        category_count_footer = None
         if has_classification_info:
             index_page += '<h3>Species classification results</h3>'
@@ -1810,15 +1865,74 @@ def process_batch_results(options):
                         cname, cname.lower(), ccount)
             index_page += '</div>\n'
-        index_page += '{}</body></html>'.format(options.footer_text)
+            if options.include_classification_category_report:
+                # TODO: it's only for silly historical reasons that we re-read
+                # the input file in this case; we're not currently carrying the json
+                # representation around, only the Pandas representation.
+                print('Generating classification category report')
+                with open(options.md_results_file,'r') as f:
+                    d = json.load(f)
+                classification_category_to_count = {}
+                # im = d['images'][0]
+                for im in d['images']:
+                    if 'detections' in im and im['detections'] is not None:
+                        for det in im['detections']:
+                            if 'classifications' in det:
+                                class_id = det['classifications'][0][0]
+                                if class_id not in classification_category_to_count:
+                                    classification_category_to_count[class_id] = 0
+                                else:
+                                    classification_category_to_count[class_id] = \
+                                        classification_category_to_count[class_id] + 1
+                category_name_to_count = {}
+                for class_id in classification_category_to_count:
+                    category_name = d['classification_categories'][class_id]
+                    category_name_to_count[category_name] = \
+                        classification_category_to_count[class_id]
+                category_name_to_count = sort_dictionary_by_value(
+                    category_name_to_count,reverse=True)
+                category_count_footer = ''
+                category_count_footer += '<br/>\n'
+                category_count_footer += \
+                    '<h3>Category counts (for the whole dataset, not just the sample used for this page)</h3>\n'
+                category_count_footer += '<div class="contentdiv">\n'
+                for category_name in category_name_to_count.keys():
+                    count = category_name_to_count[category_name]
+                    category_count_html = '{}: {}<br>\n'.format(category_name,count)
+                    category_count_footer += category_count_html
+                category_count_footer += '</div>\n'
+            # ...if we're generating a classification category report
+        # ...if classification info is present
+        if category_count_footer is not None:
+            index_page += category_count_footer + '\n'
+        # Write custom footer if it was provided
+        if (options.footer_text is not None) and (len(options.footer_text) > 0):
+            index_page += options.footer_text + '\n'
+        # Close open html tags
+        index_page += '\n</body></html>\n'
         output_html_file = os.path.join(output_dir, 'index.html')
         with open(output_html_file, 'w',
                   encoding=options.output_html_encoding) as f:
             f.write(index_page)
-        print('Finished writing html to {}'.format(output_html_file))
-        # os.startfile(output_html_file)
+        print('Finished writing html to {}'.format(output_html_file))
     # ...if we do/don't have ground truth

megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py CHANGED Viewed

@@ -41,7 +41,6 @@ if False:
     baseDir = ''
     options = repeat_detections_core.RepeatDetectionOptions()
-    options.bRenderHtml = True
     options.imageBase = baseDir
     options.outputBase = os.path.join(baseDir, 'repeat_detections')
     options.filenameReplacements = {}  # E.g., {'20190430cameratraps\\':''}
@@ -85,11 +84,10 @@ def main():
                              'do manual review of the repeat detection images (which you should)')
     parser.add_argument('--imageBase', action='store', type=str, default='',
-                        help='Image base dir, relevant if renderHtml is True or if ' + \
-                             '"omitFilteringFolder" is not set')
+                        help='Image base dir')
     parser.add_argument('--outputBase', action='store', type=str, default='',
-                        help='HTML or filtering folder output dir')
+                        help='filtering folder output dir')
     parser.add_argument('--confidenceMin', action='store', type=float,
                         default=defaultOptions.confidenceMin,
@@ -146,7 +144,7 @@ def main():
     parser.add_argument('--omitFilteringFolder', action='store_false',
                         dest='bWriteFilteringFolder',
-                        help='Should we create a folder of rendered detections for post-filtering?')
+                        help='Should we skip creating the folder of rendered detections filtering?')
     parser.add_argument('--debugMaxDir', action='store', type=int, default=-1,
                         help='For debugging only, limit the number of directories we process')
@@ -191,9 +189,6 @@ def main():
                         default=defaultOptions.detectionTilesPrimaryImageWidth,
                         help='The width of the main image when rendering images with detection tiles')
-    parser.add_argument('--renderHtml', action='store_true',
-                        dest='bRenderHtml', help='Should we render HTML output?')
     if len(sys.argv[1:]) == 0:
         parser.print_help()
         parser.exit()

megadetector 5.0.25__py3-none-any.whl → 5.0.26__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.25py3-none-any.whl → 5.0.26py3-none-any.whl