PyPI - megadetector - Versions diffs - 10.0.8__py3-none-any.whl → 10.0.10__py3-none-any.whl - Mend

megadetector 10.0.8py3-none-any.whl → 10.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (22) hide show

megadetector/postprocessing/compare_batch_results.py CHANGED Viewed

@@ -136,7 +136,7 @@ class BatchComparisonOptions:
         #: Colormap to use for detections in file B (maps detection categories to colors)
         self.colormap_b = ['RoyalBlue']
-        #: Process-based parallelization isn't supported yet; this must be "True"
+        #: Whether to render images with threads (True) or processes (False)
         self.parallelize_rendering_with_threads = True
         #: List of filenames to include in the comparison, or None to use all files
@@ -152,7 +152,7 @@ class BatchComparisonOptions:
         self.target_width = 800
         #: Number of workers to use for rendering, or <=1 to disable parallelization
-        self.n_rendering_workers = 20
+        self.n_rendering_workers = 10
         #: Random seed for image sampling (not used if max_images_per_category is None)
         self.random_seed = 0
@@ -183,7 +183,7 @@ class BatchComparisonOptions:
         #: Should we show category names (instead of numbers) on detected boxes?
         self.show_category_names_on_detected_boxes = True
-        #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render.
+        #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render
         self.pairwise_options = []
         #: Only process images whose file names contain this token
@@ -197,7 +197,7 @@ class BatchComparisonOptions:
         self.verbose = False
         #: Separate out the "clean TP" and "clean TN" categories, only relevant when GT is
-        #: available.
+        #: available
         self.include_clean_categories = True
         #: When rendering to the output table, optionally write alternative strings
@@ -211,6 +211,10 @@ class BatchComparisonOptions:
         #: Should we include a TOC?  TOC is always omitted if <=2 comparisons are performed.
         self.include_toc = True
+        #: Should we return the mapping from categories (e.g. "common detections") to image
+        #: pairs?  Makes the return dict much larger, but allows post-hoc exploration.
+        self.return_images_by_category = False
 # ...class BatchComparisonOptions
@@ -224,7 +228,7 @@ class PairwiseBatchComparisonResults:
         #: String of HTML content suitable for rendering to an HTML file
         self.html_content = None
-        #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input.
+        #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input
         self.pairwise_options = None
         #: A dictionary with keys representing category names; in the no-ground-truth case, for example,
@@ -295,7 +299,8 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
     """
     input_image_path = os.path.join(options.image_folder,fn)
-    assert os.path.isfile(input_image_path), 'Image {} does not exist'.format(input_image_path)
+    assert os.path.isfile(input_image_path), \
+        'Image {} does not exist'.format(input_image_path)
     im = visualization_utils.open_image(input_image_path)
     image_pair = image_pairs[fn]
@@ -628,11 +633,21 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
     os.makedirs(options.output_folder,exist_ok=True)
+    # Just in case the user provided a single category instead of a list
+    # for category_names_to_include
+    if options.category_names_to_include is not None:
+        if isinstance(options.category_names_to_include,str):
+            options.category_names_to_include = [options.category_names_to_include]
     ##%% Load both result sets
+    if options.verbose:
+        print('Loading {}'.format(pairwise_options.results_filename_a))
     with open(pairwise_options.results_filename_a,'r') as f:
         results_a = json.load(f)
+    if options.verbose:
+        print('Loading {}'.format(pairwise_options.results_filename_b))
     with open(pairwise_options.results_filename_b,'r') as f:
         results_b = json.load(f)
@@ -654,6 +669,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
     detection_category_name_to_id = invert_dictionary(detection_categories_a)
     options.detection_category_id_to_name = detection_category_id_to_name
+    category_name_to_id_a = invert_dictionary(detection_categories_a)
+    category_name_to_id_b = invert_dictionary(detection_categories_b)
+    category_ids_to_include_a = []
+    category_ids_to_include_b = []
+    for category_name in options.category_names_to_include:
+        if category_name in category_name_to_id_a:
+            category_ids_to_include_a.append(category_name_to_id_a[category_name])
+        if category_name in category_name_to_id_b:
+            category_ids_to_include_b.append(category_name_to_id_b[category_name])
     if pairwise_options.results_description_a is None:
         if 'detector' not in results_a['info']:
             print('No model metadata supplied for results-A, assuming MDv4')
@@ -679,7 +705,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
     filename_to_image_b = {im['file']:im for im in images_b}
-    ##%% Make sure they represent the same set of images
+    ##%% Make sure the two result sets represent the same set of images
     filenames_a = [im['file'] for im in images_a]
     filenames_b_set = set([im['file'] for im in images_b])
@@ -914,7 +940,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
                 pairwise_options.detection_thresholds_b['default']
     # fn = filenames_to_compare[0]
-    for i_file,fn in tqdm(enumerate(filenames_to_compare),total=len(filenames_to_compare)):
+    for i_file,fn in tqdm(enumerate(filenames_to_compare),
+                          total=len(filenames_to_compare)):
         if fn not in filename_to_image_b:
@@ -1000,27 +1027,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
                     categories_above_threshold_b.add(category_id)
             if invalid_category_error:
                 continue
             # Should we be restricting the comparison to only certain categories?
             if options.category_names_to_include is not None:
-                # Just in case the user provided a single category instead of a list
-                if isinstance(options.category_names_to_include,str):
-                    options.category_names_to_include = [options.category_names_to_include]
-                category_name_to_id_a = invert_dictionary(detection_categories_a)
-                category_name_to_id_b = invert_dictionary(detection_categories_b)
-                category_ids_to_include_a = []
-                category_ids_to_include_b = []
-                for category_name in options.category_names_to_include:
-                    if category_name in category_name_to_id_a:
-                        category_ids_to_include_a.append(category_name_to_id_a[category_name])
-                    if category_name in category_name_to_id_b:
-                        category_ids_to_include_b.append(category_name_to_id_b[category_name])
                 # Restrict the categories we treat as above-threshold to the set we're supposed
                 # to be using
                 categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
@@ -1287,7 +1298,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             max_conf_b = _maxempty([det['conf'] for det in im_b['detections']])
             sort_conf = max(max_conf_a,max_conf_b)
-    # ...what kind of ground truth (if any) do we have?
+        # ...what kind of ground truth (if any) do we have?
         assert comparison_category is not None
         categories_to_image_pairs[comparison_category][fn] = im_pair
@@ -1313,7 +1324,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
     local_output_folder = os.path.join(options.output_folder,'cmp_' + \
                                        str(output_index).zfill(3))
-    def render_detection_comparisons(category,image_pairs,image_filenames):
+    def _render_detection_comparisons(category,image_pairs,image_filenames):
+        """
+        Render all the detection results pairs for the sampled images in a
+        particular category (e.g. all the "common detections").
+        """
         print('Rendering detections for category {}'.format(category))
@@ -1336,7 +1351,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
         return output_image_paths
-    # ...def render_detection_comparisons()
+    # ...def _render_detection_comparisons()
     if len(options.colormap_a) > 1:
         color_string_a = str(options.colormap_a)
@@ -1371,7 +1386,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
         input_image_absolute_paths = [os.path.join(options.image_folder,fn) for fn in image_filenames]
-        category_image_output_paths = render_detection_comparisons(category,
+        category_image_output_paths = _render_detection_comparisons(category,
                                                             image_pairs,image_filenames)
         category_html_filename = os.path.join(local_output_folder,
@@ -1469,6 +1484,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             print("Pool closed and joined for comparison rendering")
         except Exception:
             pass
     ##%% Write the top-level HTML file content
     html_output_string  = ''
@@ -1591,8 +1608,11 @@ def compare_batch_results(options):
     for i_comparison,pairwise_options in enumerate(pairwise_options_list):
         print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
+        pairwise_options.verbose = options.verbose
         pairwise_results = \
             _pairwise_compare_batch_results(options,i_comparison,pairwise_options)
+        if not options.return_images_by_category:
+            pairwise_results.categories_to_image_pairs = None
         html_content += pairwise_results.html_content
         all_pairwise_results.append(pairwise_results)

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -1145,7 +1145,7 @@ def process_batch_results(options):
     images_to_visualize = detections_df
-    if options.num_images_to_sample is not None and options.num_images_to_sample > 0:
+    if (options.num_images_to_sample is not None) and (options.num_images_to_sample > 0):
         images_to_visualize = images_to_visualize.sample(
             n=min(options.num_images_to_sample, len(images_to_visualize)),
             random_state=options.sample_seed)

megadetector/postprocessing/subset_json_detector_output.py CHANGED Viewed

@@ -156,6 +156,12 @@ class SubsetJsonDetectorOutputOptions:
         #: to be contiguous.  Set to 1 to remove empty categories only.
         self.remove_classification_categories_below_count = None
+        #: Remove detections above a threshold size (as a fraction of the image size)
+        self.maximum_detection_size = None
+        #: Remove detections below a threshold size (as a fraction of the image size)
+        self.minimum_detection_size = None
 # ...class SubsetJsonDetectorOutputOptions
@@ -274,6 +280,71 @@ def remove_classification_categories_below_count(data, options):
 # ...def remove_classification_categories_below_count(...)
+def subset_json_detector_output_by_size(data, options):
+    """
+    Remove detections above or below threshold sizes (as a fraction
+    of the image size).
+    Args:
+        data (dict): data loaded from a MD results file
+        options (SubsetJsonDetectorOutputOptions): parameters for subsetting
+    Returns:
+        dict: Possibly-modified version of [data] (also modifies in place)
+    """
+    if (options.maximum_detection_size is None) and \
+        (options.minimum_detection_size is None):
+        return data
+    if options.maximum_detection_size is None:
+        options.maximum_detection_size = 1000
+    if options.minimum_detection_size is None:
+        options.minimum_detection_size = -1000
+    print('Subsetting by size ({} <--> {})'.format(
+        options.minimum_detection_size,
+        options.maximum_detection_size))
+    images_in = data['images']
+    images_out = []
+    # im = images_in[0]
+    for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
+        # Always keep failed images; if the caller wants to remove these, they
+        # will use remove_failed_images
+        if ('detections' not in im) or (im['detections'] is None):
+            images_out.append(im)
+            continue
+        detections_to_keep = []
+        for det in im['detections']:
+            # [x_min, y_min, width_of_box, height_of_box]
+            detection_size = det['bbox'][2] * det['bbox'][3]
+            if (detection_size >= options.minimum_detection_size) and \
+               (detection_size <= options.maximum_detection_size):
+                detections_to_keep.append(det)
+        im['detections'] = detections_to_keep
+        images_out.append(im)
+    # ...for each image
+    data['images'] = images_out
+    print('done, found {} matches (of {})'.format(
+            len(data['images']),len(images_in)))
+    return data
+# ...def subset_json_detector_output_by_size(...)
 def subset_json_detector_output_by_confidence(data, options):
     """
     Removes all detections below options.confidence_threshold.
@@ -674,6 +745,11 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
         data = subset_json_detector_output_by_list(data, options)
+    if (options.maximum_detection_size is not None) or \
+        (options.minimum_detection_size is not None):
+        data = subset_json_detector_output_by_size(data, options)
     if not options.split_folders:
         _write_detection_results(data, output_filename, options)
@@ -837,6 +913,10 @@ def main(): # noqa
                         help='Replace [query] with this')
     parser.add_argument('--confidence_threshold', type=float, default=None,
                         help='Remove detections below this confidence level')
+    parser.add_argument('--maximum_detection_size', type=float, default=None,
+                        help='Remove detections above this size (as a fraction of the image size)')
+    parser.add_argument('--minimum_detection_size', type=float, default=None,
+                        help='Remove detections below this size (as a fraction of the image size)')
     parser.add_argument('--keep_files_in_list', type=str, default=None,
                         help='Keep only files in this list, which can be a .json results file or a folder.' + \
                              ' Assumes that the input .json file contains relative paths when comparing to a folder.')

megadetector/utils/directory_listing.py CHANGED Viewed

@@ -129,6 +129,9 @@ def create_html_index(dir,
         recursive (bool, optional): recurse into subfolders
     """
+    if template_fun is None:
+        template_fun = _create_plain_index
     print('Traversing {}'.format(dir))
     # Make sure we remove the trailing /

megadetector/utils/path_utils.py CHANGED Viewed

@@ -1046,6 +1046,73 @@ def parallel_copy_files(input_file_to_output_file,
 # ...def parallel_copy_files(...)
+#%% File deletion functions
+def delete_file(input_file, verbose=False):
+    """
+    Deletes a single file.
+    Args:
+        input_file (str): file to delete
+        verbose (bool, optional): enable additional debug console output
+    Returns:
+        bool: True if file was deleted successfully, False otherwise
+    """
+    try:
+        if verbose:
+            print('Deleting file {}'.format(input_file))
+        if os.path.isfile(input_file):
+            os.remove(input_file)
+            return True
+        else:
+            if verbose:
+                print('File {} does not exist'.format(input_file))
+            return False
+    except Exception as e:
+        if verbose:
+            print('Error deleting file {}: {}'.format(input_file, str(e)))
+        return False
+# ...def delete_file(...)
+def parallel_delete_files(input_files,
+                          max_workers=16,
+                          use_threads=True,
+                          verbose=False):
+    """
+    Deletes one or more files in parallel.
+    Args:
+        input_files (list): list of files to delete
+        max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
+        use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
+            max_workers <= 1
+        verbose (bool, optional): enable additional debug console output
+    """
+    if len(input_files) == 0:
+        return
+    n_workers = min(max_workers, len(input_files))
+    if use_threads:
+        pool = ThreadPool(n_workers)
+    else:
+        pool = Pool(n_workers)
+    with tqdm(total=len(input_files)) as pbar:
+        for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
+                                                  input_files)):
+            pbar.update()
+# ...def parallel_delete_files(...)
 #%% File size functions
 def get_file_sizes(base_dir, convert_slashes=True):

megadetector/utils/string_utils.py CHANGED Viewed

@@ -34,6 +34,27 @@ def is_float(s):
     return True
+def is_int(s):
+    """
+    Checks whether [s] is an object (typically a string) that can be cast to a int
+    Args:
+        s (object): object to evaluate
+    Returns:
+        bool: True if s successfully casts to a int, otherwise False
+    """
+    if s is None:
+        return False
+    try:
+        _ = int(s)
+    except ValueError:
+        return False
+    return True
 def human_readable_to_bytes(size):
     """
     Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),

megadetector 10.0.8__py3-none-any.whl → 10.0.10__py3-none-any.whl

Potentially problematic release.

megadetector 10.0.8py3-none-any.whl → 10.0.10py3-none-any.whl