PyPI - megadetector - Versions diffs - 5.0.20__py3-none-any.whl → 5.0.21__py3-none-any.whl - Mend

megadetector 5.0.20py3-none-any.whl → 5.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (19) hide show

megadetector/data_management/importers/osu-small-animals-to-json.py CHANGED Viewed

@@ -12,17 +12,17 @@ Prepare the OSU Small Animals dataset for LILA release:
 import os
-input_folder = r'G:\temp\osu-small-animals'
+input_folder = os.path.expanduser('~/osu-small-animals')
 assert os.path.isdir(input_folder)
-output_folder = r'G:\temp\osu-small-animals-lila'
+output_folder = os.path.expanduser('~/osu-small-animals-lila')
 os.makedirs(output_folder,exist_ok=True)
 output_file = os.path.join(output_folder,'osu-small-animals.json')
-preview_folder = r'G:\temp\osu-small-animals-preview'
+preview_folder = os.path.expanduser('~/osu-small-animals-preview')
 os.makedirs(preview_folder,exist_ok=True)
-common_to_latin_file = r'c:\git\agentmorrisprivate\camera-traps\osu-small-animals-common-to-latin.txt'
+common_to_latin_file = r'osu-small-animals-common-to-latin.txt'
 assert os.path.isfile(common_to_latin_file)

megadetector/data_management/yolo_output_to_md_output.py CHANGED Viewed

@@ -59,19 +59,21 @@ def read_classes_from_yolo_dataset_file(fn):
     integer category IDs to string category names.
     Args:
-        fn (str): YOLOv5/YOLOv8 dataset file with a .yml or .yaml extension, or a .json file
-            mapping integer category IDs to category names.
+        fn (str): YOLOv5/YOLOv8 dataset file with a .yml or .yaml extension, a .json file
+            mapping integer category IDs to category names, or a .txt file with a flat
+            list of classes.
     Returns:
         dict: a mapping from integer category IDs to category names
     """
+    category_id_to_name = {}
     if fn.endswith('.yml') or fn.endswith('.yaml'):
         with open(fn,'r') as f:
             lines = f.readlines()
-        category_id_to_name = {}
         pat = '\d+:.+'
         for s in lines:
             if re.search(pat,s) is not None:
@@ -83,10 +85,21 @@ def read_classes_from_yolo_dataset_file(fn):
         with open(fn,'r') as f:
             d_in = json.load(f)
-            category_id_to_name = {}
             for k in d_in.keys():
                 category_id_to_name[int(k)] = d_in[k]
+    elif fn.endswith('.txt'):
+        with open(fn,'r') as f:
+            lines = f.readlines()
+        next_category_id = 0
+        for line in lines:
+            s = line.strip()
+            if len(s) == 0:
+                continue
+            category_id_to_name[next_category_id] = s
+            next_category_id += 1
     else:
         raise ValueError('Unrecognized category file type: {}'.format(fn))

megadetector/detection/video_utils.py CHANGED Viewed

@@ -678,12 +678,18 @@ def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
     return frame_filenames,fs
-def video_folder_to_frames(input_folder, output_folder_base,
-                           recursive=True, overwrite=True,
-                           n_threads=1, every_n_frames=None,
-                           verbose=False, parallelization_uses_threads=True,
-                           quality=None, max_width=None,
-                           frames_to_extract=None, allow_empty_videos=False):
+def video_folder_to_frames(input_folder,
+                           output_folder_base,
+                           recursive=True,
+                           overwrite=True,
+                           n_threads=1,
+                           every_n_frames=None,
+                           verbose=False,
+                           parallelization_uses_threads=True,
+                           quality=None,
+                           max_width=None,
+                           frames_to_extract=None,
+                           allow_empty_videos=False):
     """
     For every video file in input_folder, creates a folder within output_folder_base, and
     renders frame of that video to images in that folder.
@@ -709,6 +715,8 @@ def video_folder_to_frames(input_folder, output_folder_base,
             each video; mutually exclusive with every_n_frames.  If all values are beyond
             the length of a video, no frames are extracted. Can also be a single int,
             specifying a single frame number.
+        allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
+            frames (by default, this is an error).
     Returns:
         tuple: a length-3 tuple containing:
@@ -719,8 +727,11 @@ def video_folder_to_frames(input_folder, output_folder_base,
     """
     # Recursively enumerate video files
+    if verbose:
+        print('Enumerating videos in {}'.format(input_folder))
     input_files_full_paths = find_videos(input_folder,recursive=recursive)
-    print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
+    if verbose:
+        print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
     if len(input_files_full_paths) == 0:
         return [],[],[]
@@ -974,6 +985,7 @@ if False:
     results_file = r'results.json'
     confidence_threshold = 0.75
     #%% Load detector output
     with open(results_file,'r') as f:

megadetector/postprocessing/combine_api_outputs.py CHANGED Viewed

@@ -192,7 +192,7 @@ def combine_api_shard_files(input_files, output_file=None):
     Args:
         input_files (list of str): files to merge
-        output_file (str, optiona): file to which we should write merged results
+        output_file (str, optional): file to which we should write merged results
     Returns:
         dict: merged results

megadetector/postprocessing/detector_calibration.py ADDED Viewed

@@ -0,0 +1,367 @@
+"""
+detector_calibration.py
+Tools for comparing/calibrating confidence values from detectors, particularly different
+versions of MegaDetector.
+"""
+#%% Constants and imports
+import random
+from tqdm import tqdm
+from enum import IntEnum
+from collections import defaultdict
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+from megadetector.postprocessing.validate_batch_results import \
+    validate_batch_results, ValidateBatchResultsOptions
+from megadetector.utils.ct_utils import get_iou
+#%% Classes
+class CalibrationOptions:
+    """
+    Options controlling comparison/calibration behavior.
+    """
+    def __init__(self):
+        #: IoU threshold used for determining whether two detections are the same
+        #:
+        #: When multiple detections match, we will only use the highest-matching IoU.
+        self.iou_threshold = 0.75
+        #: Minimum confidence threshold to consider for calibration (should be lower than
+        #: the lowest value you would use in realistic situations)
+        self.confidence_threshold = 0.025
+        #: Should we populate the data_a and data_b fields in the return value?
+        self.return_data = False
+        #: Model name to use in printouts and plots for result set A
+        self.model_name_a = 'model_a'
+        #: Model name to use in printouts and plots for result set B
+        self.model_name_b = 'model_b'
+        #: Maximum number of samples to use for plotting or calibration per category,
+        #: or None to use all paired values.
+        self.max_samples_per_category = None
+        #: List of category IDs to use for plotting comparisons, or None to plot
+        #: all categories.
+        self.categories_to_plot = None
+        #: Optionally map category ID to name in plot labels
+        self.category_id_to_name = None
+# ...class CalibrationOptions
+class ConfidenceMatchColumns(IntEnum):
+    COLUMN_CONF_A = 0
+    COLUMN_CONF_B = 1
+    COLUMN_CONF_IOU = 2
+    COLUMN_CONF_I_IMAGE = 3
+    COLUMN_CONF_CATEGORY_ID = 4
+class CalibrationResults:
+    """
+    Results of a model-to-model comparison.
+    """
+    def __init__(self):
+        #: List of tuples: [conf_a, conf_b, iou, i_image, category_id]
+        self.confidence_matches = []
+        #: Populated with the data loaded from json_filename_a if options.return_data is True
+        self.data_a = None
+        #: Populated with the data loaded from json_filename_b if options.return_data is True
+        self.data_b = None
+# ...class CalibrationResults
+#%% Calibration functions
+def compare_model_confidence_values(json_filename_a,json_filename_b,options=None):
+    """
+    Compare confidence values across two .json results files.  Compares only detections that
+    can be matched by IoU, i.e., does not do anything with detections that only appear in one file.
+    Args:
+        json_filename_a (str or dict): filename containing results from the first model to be compared;
+            should refer to the same images as [json_filename_b].  Can also be a loaded results dict.
+        json_filename_b (str or dict): filename containing results from the second model to be compared;
+            should refer to the same images as [json_filename_a].  Can also be a loaded results dict.
+        options (CalibrationOptions, optional): all the parameters used to control this process, see
+            CalibrationOptions for details
+    Returns:
+        CalibrationResults: description of the comparison results
+    """
+    ## Option handling
+    if options is None:
+        options = CalibrationOptions()
+    validation_options = ValidateBatchResultsOptions()
+    validation_options.return_data = True
+    if isinstance(json_filename_a,str):
+        results_a = validate_batch_results(json_filename_a,options=validation_options)
+        assert len(results_a['validation_results']['errors']) == 0
+    else:
+        assert isinstance(json_filename_a,dict)
+        results_a = json_filename_a
+    if isinstance(json_filename_b,str):
+        results_b = validate_batch_results(json_filename_b,options=validation_options)
+        assert len(results_b['validation_results']['errors']) == 0
+    else:
+        assert isinstance(json_filename_b,dict)
+        results_b = json_filename_b
+    ## Make sure these results sets are comparable
+    image_filenames_a = [im['file'] for im in results_a['images']]
+    image_filenames_b = [im['file'] for im in results_b['images']]
+    assert set(image_filenames_a) == set(image_filenames_b), \
+        'Cannot calibrate non-matching image sets'
+    categories_a = results_a['detection_categories']
+    categories_b = results_b['detection_categories']
+    assert set(categories_a.keys()) == set(categories_b.keys())
+    for k in categories_a.keys():
+        assert categories_a[k] == categories_b[k], 'Category mismatch'
+    ## Compare detections
+    image_filename_b_to_im = {}
+    for im in results_b['images']:
+        image_filename_b_to_im[im['file']] = im
+    n_detections_a = 0
+    n_detections_a_queried = 0
+    n_detections_a_matched = 0
+    confidence_matches = []
+    # For each image
+    # im_a = results_a['images'][0]
+    for i_image,im_a in tqdm(enumerate(results_a['images']),total=len(results_a['images'])):
+        fn = im_a['file']
+        im_b = image_filename_b_to_im[fn]
+        if 'detections' not in im_a or im_a['detections'] is None:
+            continue
+        if 'detections' not in im_b or im_b['detections'] is None:
+            continue
+        # For each detection in result set A...
+        #
+        # det_a = im_a['detections'][0]
+        for det_a in im_a['detections']:
+            n_detections_a += 1
+            conf_a = det_a['conf']
+            category_id = det_a['category']
+            # Is this above threshold?
+            if conf_a < options.confidence_threshold:
+                continue
+            n_detections_a_queried += 1
+            bbox_a = det_a['bbox']
+            best_iou = None
+            best_iou_conf = None
+            # For each detection in result set B...
+            #
+            # det_b = im_b['detections'][0]
+            for det_b in im_b['detections']:
+                # Is this the same category?
+                if det_b['category'] != category_id:
+                    continue
+                conf_b = det_b['conf']
+                # Is this above threshold?
+                if conf_b < options.confidence_threshold:
+                    continue
+                bbox_b = det_b['bbox']
+                iou = get_iou(bbox_a,bbox_b)
+                # Is this an adequate IoU to consider?
+                if iou < options.iou_threshold:
+                    continue
+                # Is this the best match so far?
+                if best_iou is None or iou > best_iou:
+                    best_iou = iou
+                    best_iou_conf = conf_b
+            # ...for each detection in im_b
+            if best_iou is not None:
+                n_detections_a_matched += 1
+                conf_result = [conf_a,best_iou_conf,best_iou,i_image,category_id]
+                confidence_matches.append(conf_result)
+        # ...for each detection in im_a
+    # ...for each image in result set A
+    print('\nOf {} detections in result set A, queried {}, matched {}'.format(
+        n_detections_a,n_detections_a_queried,n_detections_a_matched))
+    assert len(confidence_matches) == n_detections_a_matched
+    calibration_results = CalibrationResults()
+    calibration_results.confidence_matches = confidence_matches
+    if options.return_data:
+        calibration_results.data_a = results_a
+        calibration_results.data_b = results_b
+    return calibration_results
+# ...def compare_model_confidence_values(...)
+#%% Plotting functions
+def plot_matched_confidence_values(calibration_results,output_filename,options=None):
+    """
+    Given a set of paired confidence values for matching detections (from
+    compare_model_confidence_values), plot histograms of those pairs for each
+    detection category.
+    Args:
+        calibration_results (CalibrationResults): output from a call to
+            compare_model_confidence_values, containing paired confidence
+            values for two sets of detection results.
+        output_filename (str): filename to write the plot (.png or .jpg)
+        options (CalibrationOptions, optional): plotting options, see
+            CalibrationOptions for details.
+    """
+    fig_w = 12
+    fig_h = 8
+    n_hist_bins = 80
+    if options is None:
+        options = CalibrationOptions()
+    # Find matched confidence pairs for each category ID
+    category_to_matches = defaultdict(list)
+    confidence_matches = calibration_results.confidence_matches
+    for m in confidence_matches:
+        category_id = m[ConfidenceMatchColumns.COLUMN_CONF_CATEGORY_ID]
+        category_to_matches[category_id].append(m)
+    # Optionally sample matches
+    category_to_samples = defaultdict(list)
+    for i_category,category_id in enumerate(category_to_matches.keys()):
+        matches_this_category = category_to_matches[category_id]
+        if (options.max_samples_per_category is None) or \
+            (len(matches_this_category) <= options.max_samples_per_category):
+            category_to_samples[category_id] = matches_this_category
+        else:
+            assert len(matches_this_category) > options.max_samples_per_category
+            category_to_samples[category_id] = random.sample(matches_this_category,options.max_samples_per_category)
+    del category_to_matches
+    del confidence_matches
+    categories_to_plot = list(category_to_samples.keys())
+    if options.categories_to_plot is not None:
+        categories_to_plot = [category_id for category_id in categories_to_plot if\
+                              category_id in options.categories_to_plot]
+    n_subplots = len(categories_to_plot)
+    plt.ioff()
+    fig = matplotlib.figure.Figure(figsize=(fig_w, fig_h), tight_layout=True)
+    # fig,axes = plt.subplots(nrows=n_subplots,ncols=1)
+    axes = fig.subplots(n_subplots, 1)
+    # i_category = 0; category_id = categories_to_plot[i_category]
+    for i_category,category_id in enumerate(categories_to_plot):
+        ax = axes[i_category]
+        category_string = category_id
+        if options.category_id_to_name is not None and \
+            category_id in options.category_id_to_name:
+            category_string = options.category_id_to_name[category_id]
+        samples_this_category = category_to_samples[category_id]
+        x = [m[0] for m in samples_this_category]
+        y = [m[1] for m in samples_this_category]
+        weights_a = np.ones_like(x)/float(len(x))
+        weights_b = np.ones_like(y)/float(len(y))
+        ax.hist(x,histtype='step',bins=n_hist_bins,density=False,color='red',weights=weights_a)
+        ax.hist(y,histtype='step',bins=n_hist_bins,density=False,color='blue',weights=weights_b)
+        ax.legend([options.model_name_a,options.model_name_b])
+        ax.set_ylabel(category_string)
+        # plt.tight_layout()
+        # I experimented with heat maps, but they weren't very informative.
+        # Leaving this code here in case I revisit.  Note to self: scatter plots
+        # were a disaster.
+        if False:
+            heatmap, xedges, yedges = np.histogram2d(x, y, bins=30)
+            extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
+            plt.imshow(heatmap.T, extent=extent, origin='lower', norm='log')
+    # ...for each category for which we need to generate a histogram
+    plt.close(fig)
+    fig.savefig(output_filename,dpi=100)
+# ...def plot_matched_confidence_values(...)
+#%% Interactive driver(s)
+if False:
+    #%%
+    options = ValidateBatchResultsOptions()
+    # json_filename = r'g:\temp\format.json'
+    # json_filename = r'g:\temp\test-videos\video_results.json'
+    json_filename = r'g:\temp\test-videos\image_results.json'
+    options.check_image_existence = True
+    options.relative_path_base = r'g:\temp\test-videos'
+    validate_batch_results(json_filename,options)

megadetector/postprocessing/md_to_coco.py CHANGED Viewed

@@ -41,7 +41,8 @@ def md_to_coco(md_results_file,
     The default confidence threshold is not 0; the assumption is that by default, you are
     going to treat the resulting COCO file as a set of labels.  If you are using the resulting COCO
     file to evaluate a detector, you likely want a default confidence threshold of 0.  Confidence
-    values will be written to the semi-standard "score" field for each image
+    values will be written to the semi-standard "score" field for each image if
+    preserve_nonstandard_metadata is True.
     A folder of images is required if width and height information are not available
     in the MD results file.

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -92,16 +92,18 @@ class PostProcessingOptions:
         #: Optional .json file containing ground truth information
         self.ground_truth_json_file = ''
-        #: Classes we'll treat as negative
+        #: List of classes we'll treat as negative (defaults to "empty", typically includes
+        #: classes like "blank", "misfire", etc.).
         #:
         #: Include the token "#NO_LABELS#" to indicate that an image with no annotations
         #: should be considered empty.
         self.negative_classes = DEFAULT_NEGATIVE_CLASSES
-        #: Classes we'll treat as neither positive nor negative
+        #: List of classes we'll treat as neither positive nor negative (defaults to
+        #: "unknown", typically includes classes like "unidentifiable").
         self.unlabeled_classes = DEFAULT_UNKNOWN_CLASSES
-        #: A list of output sets that we should count, but not render images for.
+        #: List of output sets that we should count, but not render images for.
         #:
         #: Typically used to preview sets with lots of empties, where you don't want to
         #: subset but also don't want to render 100,000 empty images.
@@ -198,11 +200,16 @@ class PostProcessingOptions:
         #: When classification results are present, should be sort alphabetically by class name (False)
         #: or in descending order by frequency (True)?
-        self.sort_classification_results_by_count = False
+        self.sort_classification_results_by_count = False
         #: Should we split individual pages up into smaller pages if there are more than
         #: N images?
         self.max_figures_per_html_file = None
+        #: Footer text for the index page
+        # self.footer_text = '<br/><p style="font-size:80%;">Preview page created with the <a href="{}">MegaDetector Python package</a>.</p>'.\
+        #    format('https://megadetector.readthedocs.io')
+        self.footer_text = ''
     # ...__init__()
@@ -590,6 +597,7 @@ def _prepare_html_subpages(images_html, output_dir, options=None):
         html_image_list_options = {}
         html_image_list_options['maxFiguresPerHtmlFile'] = options.max_figures_per_html_file
         html_image_list_options['headerHtml'] = '<h1>{}</h1>'.format(res.upper())
+        html_image_list_options['pageTitle'] = '{}'.format(res.lower())
         # Don't write empty pages
         if len(array) == 0:
@@ -762,7 +770,7 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
     if len(rendered_image_html_info) > 0:
         image_result = [[res, rendered_image_html_info]]
+        classes_rendered_this_image = set()
         max_conf = 0
         for det in detections:
@@ -782,11 +790,14 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
                 # confidence threshold
                 if (options.classification_confidence_threshold < 0) or \
                     (top1_class_score >= options.classification_confidence_threshold):
-                    image_result.append(['class_{}'.format(top1_class_name),
-                                         rendered_image_html_info])
+                    class_string = 'class_{}'.format(top1_class_name)
                 else:
-                    image_result.append(['class_unreliable',
+                    class_string = 'class_unreliable'
+                if class_string not in classes_rendered_this_image:
+                    image_result.append([class_string,
                                          rendered_image_html_info])
+                    classes_rendered_this_image.add(class_string)
             # ...if this detection has classification info
@@ -1083,7 +1094,8 @@ def process_batch_results(options):
     output_html_file = ''
-    style_header = """<head>
+    style_header = """<head>
+        <title>Detection results preview</title>
         <style type="text/css">
         a { text-decoration: none; }
         body { font-family: segoe ui, calibri, "trebuchet ms", verdana, arial, sans-serif; }
@@ -1424,7 +1436,7 @@ def process_batch_results(options):
         else:
             confidence_threshold_string = str(options.confidence_threshold)
-        index_page = """<html>
+        index_page = """<html>
         {}
         <body>
         <h2>Evaluation</h2>
@@ -1509,7 +1521,7 @@ def process_batch_results(options):
             index_page += '</div>'
         # Close body and html tags
-        index_page += '</body></html>'
+        index_page += '{}</body></html>'.format(options.footer_text)
         output_html_file = os.path.join(output_dir, 'index.html')
         with open(output_html_file, 'w') as f:
             f.write(index_page)
@@ -1529,7 +1541,6 @@ def process_batch_results(options):
         # for each category
         images_html = collections.defaultdict(list)
         # Add default entries by accessing them for the first time
         # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
@@ -1637,14 +1648,15 @@ def process_batch_results(options):
                         files_to_render), total=len(files_to_render)))
         else:
             for file_info in tqdm(files_to_render):
-                rendering_results.append(_render_image_no_gt(file_info,
-                                                            detection_categories_to_results_name,
-                                                            detection_categories,
-                                                            classification_categories,
-                                                            options=options))
+                rendering_result = _render_image_no_gt(file_info,
+                                                       detection_categories_to_results_name,
+                                                       detection_categories,
+                                                       classification_categories,
+                                                       options=options)
+                rendering_results.append(rendering_result)
-        elapsed = time.time() - start_time
+        elapsed = time.time() - start_time
         # Do we have classification results in addition to detection results?
         has_classification_info = False
@@ -1793,7 +1805,7 @@ def process_batch_results(options):
                         cname, cname.lower(), ccount)
             index_page += '</div>\n'
-        index_page += '</body></html>'
+        index_page += '{}</body></html>'.format(options.footer_text)
         output_html_file = os.path.join(output_dir, 'index.html')
         with open(output_html_file, 'w') as f:
             f.write(index_page)

megadetector 5.0.20__py3-none-any.whl → 5.0.21__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.20py3-none-any.whl → 5.0.21py3-none-any.whl