PyPI - megadetector - Versions diffs - 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl - Mend

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show

api/batch_processing/data_preparation/manage_local_batch.py +297 -202
api/batch_processing/data_preparation/manage_video_batch.py +7 -2
api/batch_processing/postprocessing/add_max_conf.py +1 -0
api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
api/batch_processing/postprocessing/compare_batch_results.py +111 -61
api/batch_processing/postprocessing/convert_output_format.py +24 -6
api/batch_processing/postprocessing/load_api_results.py +56 -72
api/batch_processing/postprocessing/md_to_labelme.py +119 -51
api/batch_processing/postprocessing/merge_detections.py +30 -5
api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
classification/prepare_classification_script.py +191 -191
data_management/cct_json_utils.py +7 -2
data_management/coco_to_labelme.py +263 -0
data_management/coco_to_yolo.py +72 -48
data_management/databases/integrity_check_json_db.py +75 -64
data_management/databases/subset_json_db.py +1 -1
data_management/generate_crops_from_cct.py +1 -1
data_management/get_image_sizes.py +44 -26
data_management/importers/animl_results_to_md_results.py +3 -5
data_management/importers/noaa_seals_2019.py +2 -2
data_management/importers/zamba_results_to_md_results.py +2 -2
data_management/labelme_to_coco.py +264 -127
data_management/labelme_to_yolo.py +96 -53
data_management/lila/create_lila_blank_set.py +557 -0
data_management/lila/create_lila_test_set.py +2 -1
data_management/lila/create_links_to_md_results_files.py +1 -1
data_management/lila/download_lila_subset.py +138 -45
data_management/lila/generate_lila_per_image_labels.py +23 -14
data_management/lila/get_lila_annotation_counts.py +16 -10
data_management/lila/lila_common.py +15 -42
data_management/lila/test_lila_metadata_urls.py +116 -0
data_management/read_exif.py +65 -16
data_management/remap_coco_categories.py +84 -0
data_management/resize_coco_dataset.py +14 -31
data_management/wi_download_csv_to_coco.py +239 -0
data_management/yolo_output_to_md_output.py +40 -13
data_management/yolo_to_coco.py +313 -100
detection/process_video.py +36 -14
detection/pytorch_detector.py +1 -1
detection/run_detector.py +73 -18
detection/run_detector_batch.py +116 -27
detection/run_inference_with_yolov5_val.py +135 -27
detection/run_tiled_inference.py +153 -43
detection/tf_detector.py +2 -1
detection/video_utils.py +4 -2
md_utils/ct_utils.py +101 -6
md_utils/md_tests.py +264 -17
md_utils/path_utils.py +326 -47
md_utils/process_utils.py +26 -7
md_utils/split_locations_into_train_val.py +215 -0
md_utils/string_utils.py +10 -0
md_utils/url_utils.py +66 -3
md_utils/write_html_image_list.py +12 -2
md_visualization/visualization_utils.py +380 -74
md_visualization/visualize_db.py +41 -10
md_visualization/visualize_detector_output.py +185 -104
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
taxonomy_mapping/map_new_lila_datasets.py +43 -39
taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
taxonomy_mapping/preview_lila_taxonomy.py +27 -27
taxonomy_mapping/species_lookup.py +33 -13
taxonomy_mapping/taxonomy_csv_checker.py +7 -5
md_visualization/visualize_megadb.py +0 -183
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0

api/batch_processing/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -23,12 +23,10 @@ import collections
 import copy
 import errno
 import io
-import itertools
 import os
 import sys
 import time
 import uuid
-import urllib
 import warnings
 import random
@@ -114,10 +112,18 @@ class PostProcessingOptions:
     # detections_animal, detections_person, detections_vehicle
     rendering_bypass_sets = []
-    # By default, choose a confidence threshold based on the detector version
+    # If this is None, choose a confidence threshold based on the detector version.
+    #
+    # This can either be a float or a dictionary mapping category names (not IDs) to
+    # thresholds.  The category "default" can be used to specify thresholds for
+    # other categories.  Currently the use of a dict here is not supported when
+    # ground truth is supplied.
     confidence_threshold = None
     # Confidence threshold to apply to classification (not detection) results
+    #
+    # Only a float is supported here (unlike the "confidence_threshold" parameter, which
+    # can be a dict).
     classification_confidence_threshold = 0.5
     # Used for summary statistics only
@@ -163,6 +169,9 @@ class PostProcessingOptions:
     #
     # Currently only supported when ground truth is unavailable
     include_almost_detections = False
+    # Only a float is supported here (unlike the "confidence_threshold" parameter, which
+    # can be a dict).
     almost_detection_confidence_threshold = None
     # Control rendering parallelization
@@ -427,12 +436,25 @@ def render_bounding_boxes(
                 vis_utils.render_db_bounding_boxes(ground_truth_boxes, gt_classes, image,
                                                    original_size=original_size,label_map=label_map,
                                                    thickness=4,expansion=4)
+            # render_detection_bounding_boxes expects either a float or a dict mapping
+            # category IDs to names.
+            if isinstance(options.confidence_threshold,float):
+                rendering_confidence_threshold = options.confidence_threshold
+            else:
+                category_ids = set()
+                for d in detections:
+                    category_ids.add(d['category'])
+                rendering_confidence_threshold = {}
+                for category_id in category_ids:
+                    rendering_confidence_threshold[category_id] = \
+                        get_threshold_for_category_id(category_id, options, detection_categories)
             vis_utils.render_detection_bounding_boxes(
                 detections, image,
                 label_map=detection_categories,
                 classification_label_map=classification_categories,
-                confidence_threshold=options.confidence_threshold,
+                confidence_threshold=rendering_confidence_threshold,
                 thickness=options.line_thickness,
                 expansion=options.box_expansion)
@@ -460,7 +482,14 @@ def render_bounding_boxes(
     # Optionally add links back to the original images
     if options.link_images_to_originals and (image_full_path is not None):
-        info['linkTarget'] = urllib.parse.quote(image_full_path)
+        # Handling special characters in links has been pushed down into
+        # write_html_image_list
+        #
+        # link_target = image_full_path.replace('\\','/')
+        # link_target  = urllib.parse.quote(link_target)
+        link_target = image_full_path
+        info['linkTarget'] = link_target
     return info
@@ -535,15 +564,68 @@ def prepare_html_subpages(images_html, output_dir, options=None):
 # ...prepare_html_subpages()
-# Get unique categories above the threshold for this image
-def get_positive_categories(detections,options):
+# Determine the confidence threshold we should use for a specific category name
+def get_threshold_for_category_name(category_name,options):
+    if isinstance(options.confidence_threshold,float):
+        return options.confidence_threshold
+    else:
+        assert isinstance(options.confidence_threshold,dict), \
+            'confidence_threshold must either be a float or a dict'
+    if category_name in options.confidence_threshold:
+        return options.confidence_threshold[category_name]
+    else:
+        assert 'default' in options.confidence_threshold, \
+            'category {} not in confidence_threshold dict, and no default supplied'.format(
+                category_name)
+        return options.confidence_threshold['default']
+# Determine the confidence threshold we should use for a specific category ID
+#
+# detection_categories is a dict mapping category IDs to names.
+def get_threshold_for_category_id(category_id,options,detection_categories):
+    if isinstance(options.confidence_threshold,float):
+        return options.confidence_threshold
+    assert category_id in detection_categories, \
+        'Invalid category ID {}'.format(category_id)
+    category_name = detection_categories[category_id]
+    return get_threshold_for_category_name(category_name,options)
+# Get a sorted list of unique categories (as string IDs) above the threshold for this image
+#
+# "detection_categories" is a dict mapping category IDs to names.
+def get_positive_categories(detections,options,detection_categories):
     positive_categories = set()
     for d in detections:
-        if d['conf'] >= options.confidence_threshold:
+        threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
+        if d['conf'] >= threshold:
             positive_categories.add(d['category'])
     return sorted(positive_categories)
+# Determine whether any positive detections are present in the detection list
+# [detections].
+def has_positive_detection(detections,options,detection_categories):
+    found_positive_detection = False
+    for d in detections:
+        threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
+        if d['conf'] >= threshold:
+            found_positive_detection = True
+            break
+    return found_positive_detection
 # Render an image (with no ground truth information)
 #
 # Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
@@ -573,8 +655,12 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
     max_conf = file_info[1]
     detections = file_info[2]
+    # Determine whether any positive detections are present (using a threshold that
+    # may vary by category)
+    found_positive_detection = has_positive_detection(detections,options,detection_categories)
     detection_status = DetectionStatus.DS_UNASSIGNED
-    if max_conf >= options.confidence_threshold:
+    if found_positive_detection:
         detection_status = DetectionStatus.DS_POSITIVE
     else:
         if options.include_almost_detections:
@@ -587,7 +673,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
     if detection_status == DetectionStatus.DS_POSITIVE:
         if options.separate_detections_by_category:
-            positive_categories = tuple(get_positive_categories(detections,options))
+            positive_categories = tuple(get_positive_categories(detections,options,detection_categories))
             if positive_categories not in detection_categories_to_results_name:
                 raise ValueError('Error: {} not in category mapping (file {})'.format(
                     str(positive_categories),image_relative_path))
@@ -703,7 +789,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
               f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
         return None
-    detected = max_conf > options.confidence_threshold
+    detected = has_positive_detection(detections, options, detection_categories)
     if gt_presence and detected:
         if '_classification_accuracy' not in image.keys():
@@ -766,6 +852,10 @@ def process_batch_results(options: PostProcessingOptions
     ground_truth_indexed_db = None
+    if (options.ground_truth_json_file is not None):
+        assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
+            'Variable confidence thresholds are not supported when supplying ground truth'
     if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
         if options.separate_detections_by_category:
@@ -791,7 +881,7 @@ def process_batch_results(options: PostProcessingOptions
     # If the caller hasn't supplied results, load them
     if options.api_detection_results is None:
         detections_df, other_fields = load_api_results(
-            options.api_output_file, normalize_paths=True,
+            options.api_output_file, force_forward_slashes=True,
             filename_replacements=options.api_output_filename_replacements)
         ppresults.api_detection_results = detections_df
         ppresults.api_other_fields = other_fields
@@ -821,7 +911,7 @@ def process_batch_results(options: PostProcessingOptions
         n_failures = detections_df['failure'].count()
         print('Ignoring {} failed images'.format(n_failures))
         # Explicitly forcing a copy() operation here to suppress "trying to be set
-        # on a copy" # warnings (and associated risks) below.
+        # on a copy" warnings (and associated risks) below.
         detections_df = detections_df[detections_df['failure'].isna()].copy()
     assert other_fields is not None
@@ -836,31 +926,24 @@ def process_batch_results(options: PostProcessingOptions
             for k, v in classification_categories.items()
         }
-    # Add column 'pred_detection_label' to indicate predicted detection status.
-    #
-    # This column doesn't capture category information, it's just about detections,
-    # non-detections, and almost-detections.
-    det_status = 'pred_detection_label'
-    if options.include_almost_detections:
-        detections_df[det_status] = DetectionStatus.DS_ALMOST
-        confidences = detections_df['max_detection_conf']
-        pos_mask = (confidences >= options.confidence_threshold)
-        detections_df.loc[pos_mask, det_status] = DetectionStatus.DS_POSITIVE
-        neg_mask = (confidences < options.almost_detection_confidence_threshold)
-        detections_df.loc[neg_mask, det_status] = DetectionStatus.DS_NEGATIVE
-    else:
-        detections_df[det_status] = np.where(
-            detections_df['max_detection_conf'] >= options.confidence_threshold,
-            DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE)
-    n_positives = sum(detections_df[det_status] == DetectionStatus.DS_POSITIVE)
+    # Count detections and almost-detections for reporting purposes
+    n_positives = 0
+    n_almosts = 0
+    for i_row,row in tqdm(detections_df.iterrows(),total=len(detections_df)):
+        detections = row['detections']
+        max_conf = row['max_detection_conf']
+        if has_positive_detection(detections, options, detection_categories):
+            n_positives += 1
+        elif (options.almost_detection_confidence_threshold is not None) and \
+             (max_conf >= options.almost_detection_confidence_threshold):
+            n_almosts += 1
     print(f'Finished loading and preprocessing {len(detections_df)} rows '
           f'from detector output, predicted {n_positives} positives.')
     if options.include_almost_detections:
-        n_almosts = sum(detections_df[det_status] == DetectionStatus.DS_ALMOST)
         print('...and {} almost-positives'.format(n_almosts))
@@ -1211,7 +1294,8 @@ def process_batch_results(options: PostProcessingOptions
             for file_info in tqdm(files_to_render):
                 rendering_results.append(render_image_with_gt(
                     file_info,ground_truth_indexed_db,
-                    detection_categories,classification_categories))
+                    detection_categories,classification_categories,
+                    options=options))
         elapsed = time.time() - start_time
         # Map all the rendering results in the list rendering_results into the
@@ -1241,6 +1325,12 @@ def process_batch_results(options: PostProcessingOptions
             image_counts['tp']
         )
+        confidence_threshold_string = ''
+        if isinstance(options.confidence_threshold,float):
+            confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
+        else:
+            confidence_threshold_string = str(options.confidence_threshold)
         index_page = """<html>
         {}
         <body>
@@ -1255,7 +1345,7 @@ def process_batch_results(options: PostProcessingOptions
         <h3>Sample images</h3>
         <div class="contentdiv">
-        <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p>
+        <p>A sample of {} images, annotated with detections above confidence {}.</p>
         <a href="tp.html">True positives (TP)</a> ({}) ({:0.1%})<br/>
         CLASSIFICATION_PLACEHOLDER_1
         <a href="tn.html">True negatives (TN)</a> ({}) ({:0.1%})<br/>
@@ -1265,7 +1355,7 @@ def process_batch_results(options: PostProcessingOptions
         </div>
         """.format(
             style_header,job_name_string,model_version_string,
-            image_count, options.confidence_threshold,
+            image_count, confidence_threshold_string,
             all_tp_count, all_tp_count/total_count,
             image_counts['tn'], image_counts['tn']/total_count,
             image_counts['fp'], image_counts['fp']/total_count,
@@ -1275,11 +1365,11 @@ def process_batch_results(options: PostProcessingOptions
         index_page += """
             <h3>Detection results</h3>
             <div class="contentdiv">
-            <p>At a confidence threshold of {:0.1%}, precision={:0.1%}, recall={:0.1%}</p>
+            <p>At a confidence threshold of {}, precision={:0.1%}, recall={:0.1%}</p>
             <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
             </div>
             """.format(
-                options.confidence_threshold, precision_at_confidence_threshold, recall_at_confidence_threshold,
+                confidence_threshold_string, precision_at_confidence_threshold, recall_at_confidence_threshold,
                 len(detections_df), pr_figure_relative_filename
            )
@@ -1345,46 +1435,60 @@ def process_batch_results(options: PostProcessingOptions
         # Accumulate html image structs (in the format expected by write_html_image_list)
         # for each category
         images_html = collections.defaultdict(list)
-        images_html['non_detections']
         # Add default entries by accessing them for the first time
-        # Maps detection categories - e.g. "human" - to result set names, e.g.
-        # "detections_human"
+        # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
+        # result set names, e.g. "detections_human", "detections_cat_truck".
         detection_categories_to_results_name = {}
         # Keep track of which categories are single-class (e.g. "animal") and which are
         # combinations (e.g. "animal_vehicle")
         detection_categories_to_category_count = {}
-        detection_categories_to_category_count['detections'] = 0
+        # For the creation of a "non-detections" category
+        images_html['non_detections']
         detection_categories_to_category_count['non_detections'] = 0
-        detection_categories_to_category_count['almost_detections'] = 0
         if not options.separate_detections_by_category:
             # For the creation of a "detections" category
             images_html['detections']
+            detection_categories_to_category_count['detections'] = 0
         else:
             # Add a set of results for each category and combination of categories, e.g.
             # "detections_animal_vehicle".  When we're using this script for non-MegaDetector
             # results, this can generate lots of categories, e.g. detections_bear_bird_cat_dog_pig.
             # We'll keep that huge set of combinations in this map, but we'll only write
             # out links for the ones that are non-empty.
-            keys = detection_categories.keys()
-            subsets = []
-            for L in range(1, len(keys)+1):
-                for subset in itertools.combinations(keys, L):
-                    subsets.append(subset)
-            for subset in subsets:
-                sorted_subset = tuple(sorted(subset))
+            used_combinations = set()
+            # row = images_to_visualize.iloc[0]
+            for i_row, row in images_to_visualize.iterrows():
+                detections_this_row = row['detections']
+                above_threshold_category_ids_this_row = set()
+                for detection in detections_this_row:
+                    threshold = get_threshold_for_category_id(detection['category'], options, detection_categories)
+                    if detection['conf'] >= threshold:
+                        above_threshold_category_ids_this_row.add(detection['category'])
+                if len(above_threshold_category_ids_this_row) == 0:
+                    continue
+                sorted_categories_this_row = tuple(sorted(above_threshold_category_ids_this_row))
+                used_combinations.add(sorted_categories_this_row)
+            for sorted_subset in used_combinations:
+                assert len(sorted_subset) > 0
                 results_name = 'detections'
                 for category_id in sorted_subset:
                     results_name = results_name + '_' + detection_categories[category_id]
                 images_html[results_name]
                 detection_categories_to_results_name[sorted_subset] = results_name
-                detection_categories_to_category_count[results_name] = len(sorted_subset)
+                detection_categories_to_category_count[results_name] = len(sorted_subset)
         if options.include_almost_detections:
             images_html['almost_detections']
+            detection_categories_to_category_count['almost_detections'] = 0
         # Create output directories
         for res in images_html.keys():
@@ -1495,9 +1599,15 @@ def process_batch_results(options: PostProcessingOptions
             almost_detection_string = ' (&ldquo;almost detection&rdquo; threshold at {:.1%})'.format(
                 options.almost_detection_confidence_threshold)
+        confidence_threshold_string = ''
+        if isinstance(options.confidence_threshold,float):
+            confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
+        else:
+            confidence_threshold_string = str(options.confidence_threshold)
         index_page = """<html>\n{}\n<body>\n
         <h2>Visualization of results for {}</h2>\n
-        <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above {:.1%} confidence{}.</p>\n
+        <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above confidence {}{}.</p>\n
         <div class="contentdiv">
         <p>Model version: {}</p>
@@ -1505,7 +1615,7 @@ def process_batch_results(options: PostProcessingOptions
         <h3>Sample images</h3>\n
         <div class="contentdiv">\n""".format(
-            style_header, job_name_string, image_count, len(detections_df), options.confidence_threshold,
+            style_header, job_name_string, image_count, len(detections_df), confidence_threshold_string,
             almost_detection_string, model_version_string)
         failure_string = ''
@@ -1521,7 +1631,17 @@ def process_batch_results(options: PostProcessingOptions
             friendly_name = friendly_name.capitalize()
             return friendly_name
-        for result_set_name in images_html.keys():
+        sorted_result_set_names = sorted(list(images_html.keys()))
+        result_set_name_to_count = {}
+        for result_set_name in sorted_result_set_names:
+            image_count = image_counts[result_set_name]
+            result_set_name_to_count[result_set_name] = image_count
+        sorted_result_set_names = sorted(sorted_result_set_names,
+                                         key=lambda x: result_set_name_to_count[x],
+                                         reverse=True)
+        for result_set_name in sorted_result_set_names:
             # Don't print classification classes here; we'll do that later with a slightly
             # different structure

api/batch_processing/postprocessing/remap_detection_categories.py ADDED Viewed

@@ -0,0 +1,163 @@
+########
+#
+# remap_detection_categories.py
+#
+# Given a MegaDetector results file, remap the category IDs according to a specified
+# dictionary, writing the results to a new file.
+#
+# Currently only supports remapping detection categories, not classification categories.
+#
+########
+#%% Constants and imports
+import json
+import os
+from tqdm import tqdm
+from md_utils.ct_utils import invert_dictionary
+#%% Main function
+def remap_detection_categories(input_file,
+                               output_file,
+                               target_category_map,
+                               extra_category_handling='error',
+                               overwrite=False):
+    """
+    Given a MD results file [input_file], remap the category IDs according to the dictionary
+    [target_category_map], writing the results to [output_file].  The remapped dictionary needs to have
+    the same category names as the input file's detection_categories dictionary.
+    Currently only supports remapping detection categories, not classification categories.
+    target_category_map can also be a MD results file, in which case we'll use that file's
+    detection_categories dictionary.
+    [extra_category_handling] specifies what we should do if categories are present in the source file
+    that are not present in the target mapping.
+    'error' == Error in this case.
+    'drop_if_unused' == Don't include these in the output file's category mappings if they are unused,
+       error if they are.
+    'remap' == Remap to unused category IDs.  This is reserved for future use, not currently implemented.
+    """
+    if os.path.exists(output_file) and (not overwrite):
+        print('File {} exists, bypassing remapping'.format(output_file))
+        return
+    assert os.path.isfile(input_file), \
+        'File {} does not exist'.format(input_file)
+    # If "target_category_map" is passed as a filename, load the "detection_categories"
+    # dict.
+    if isinstance(target_category_map,str):
+        target_categories_file = target_category_map
+        with open(target_categories_file,'r') as f:
+            d = json.load(f)
+            target_category_map = d['detection_categories']
+    assert isinstance(target_category_map,dict)
+    with open(input_file,'r') as f:
+        input_data = json.load(f)
+    input_images = input_data['images']
+    input_categories = input_data['detection_categories']
+    # Figure out which categories are actually used
+    used_category_ids = set()
+    for im in input_images:
+        if 'detections' not in im or im['detections'] is None:
+            continue
+        for det in im['detections']:
+            used_category_ids.add(det['category'])
+    used_category_names = [input_categories[cid] for cid in used_category_ids]
+    input_names_set = set(input_categories.values())
+    output_names_set = set(target_category_map.values())
+    # category_name = list(input_names_set)[0]
+    for category_name in input_names_set:
+        if category_name in output_names_set:
+            continue
+        if extra_category_handling == 'error':
+            raise ValueError('Category {} present in source but not in target'.format(category_name))
+        elif extra_category_handling == 'drop_if_unused':
+            if category_name in used_category_names:
+                raise ValueError('Category {} present (and used) in source but not in target'.format(
+                    category_name))
+            else:
+                print('Category {} is unused and not present in the target mapping, ignoring'.format(
+                    category_name))
+                continue
+        elif extra_category_handling == 'remap':
+            raise NotImplementedError('Remapping of extra category IDs not yet implemented')
+        else:
+            raise ValueError('Unrecognized extra category handling scheme {}'.format(
+                extra_category_handling))
+    output_category_name_to_output_category_id = invert_dictionary(target_category_map)
+    input_category_id_to_output_category_id = {}
+    for input_category_id in input_categories.keys():
+        category_name = input_categories[input_category_id]
+        if category_name not in output_category_name_to_output_category_id:
+            assert category_name not in used_category_names
+        else:
+            output_category_id = output_category_name_to_output_category_id[category_name]
+            input_category_id_to_output_category_id[input_category_id] = output_category_id
+    # im = input_images[0]
+    for im in tqdm(input_images):
+        if 'detections' not in im or im['detections'] is None:
+            continue
+        # det = im['detections'][0]
+        for det in im['detections']:
+            det['category'] = input_category_id_to_output_category_id[det['category']]
+    input_data['detection_categories'] = target_category_map
+    with open(output_file,'w') as f:
+        json.dump(input_data,f,indent=1)
+    print('Saved remapped results to {}'.format(output_file))
+#%% Interactive driver
+if False:
+    pass
+    #%%
+    target_categories_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-only_yolov5x6.json'
+    target_category_map = target_categories_file
+    input_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-goannas-lilablanks_yolov5x6-20240223.json'
+    output_file = input_file.replace('.json','_remapped.json')
+    assert output_file != input_file
+    overwrite = True
+    extra_category_handling = 'drop_if_unused'
+    remap_detection_categories(input_file=input_file,
+                               output_file=output_file,
+                               target_category_map=target_category_map,
+                               extra_category_handling=extra_category_handling,
+                               overwrite=overwrite)
+#%% Command-line driver
+# TODO

megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl