PyPI - megadetector - Versions diffs - 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl - Mend

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show

api/batch_processing/data_preparation/manage_local_batch.py +297 -202
api/batch_processing/data_preparation/manage_video_batch.py +7 -2
api/batch_processing/postprocessing/add_max_conf.py +1 -0
api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
api/batch_processing/postprocessing/compare_batch_results.py +111 -61
api/batch_processing/postprocessing/convert_output_format.py +24 -6
api/batch_processing/postprocessing/load_api_results.py +56 -72
api/batch_processing/postprocessing/md_to_labelme.py +119 -51
api/batch_processing/postprocessing/merge_detections.py +30 -5
api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
classification/prepare_classification_script.py +191 -191
data_management/cct_json_utils.py +7 -2
data_management/coco_to_labelme.py +263 -0
data_management/coco_to_yolo.py +72 -48
data_management/databases/integrity_check_json_db.py +75 -64
data_management/databases/subset_json_db.py +1 -1
data_management/generate_crops_from_cct.py +1 -1
data_management/get_image_sizes.py +44 -26
data_management/importers/animl_results_to_md_results.py +3 -5
data_management/importers/noaa_seals_2019.py +2 -2
data_management/importers/zamba_results_to_md_results.py +2 -2
data_management/labelme_to_coco.py +264 -127
data_management/labelme_to_yolo.py +96 -53
data_management/lila/create_lila_blank_set.py +557 -0
data_management/lila/create_lila_test_set.py +2 -1
data_management/lila/create_links_to_md_results_files.py +1 -1
data_management/lila/download_lila_subset.py +138 -45
data_management/lila/generate_lila_per_image_labels.py +23 -14
data_management/lila/get_lila_annotation_counts.py +16 -10
data_management/lila/lila_common.py +15 -42
data_management/lila/test_lila_metadata_urls.py +116 -0
data_management/read_exif.py +65 -16
data_management/remap_coco_categories.py +84 -0
data_management/resize_coco_dataset.py +14 -31
data_management/wi_download_csv_to_coco.py +239 -0
data_management/yolo_output_to_md_output.py +40 -13
data_management/yolo_to_coco.py +313 -100
detection/process_video.py +36 -14
detection/pytorch_detector.py +1 -1
detection/run_detector.py +73 -18
detection/run_detector_batch.py +116 -27
detection/run_inference_with_yolov5_val.py +135 -27
detection/run_tiled_inference.py +153 -43
detection/tf_detector.py +2 -1
detection/video_utils.py +4 -2
md_utils/ct_utils.py +101 -6
md_utils/md_tests.py +264 -17
md_utils/path_utils.py +326 -47
md_utils/process_utils.py +26 -7
md_utils/split_locations_into_train_val.py +215 -0
md_utils/string_utils.py +10 -0
md_utils/url_utils.py +66 -3
md_utils/write_html_image_list.py +12 -2
md_visualization/visualization_utils.py +380 -74
md_visualization/visualize_db.py +41 -10
md_visualization/visualize_detector_output.py +185 -104
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
taxonomy_mapping/map_new_lila_datasets.py +43 -39
taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
taxonomy_mapping/preview_lila_taxonomy.py +27 -27
taxonomy_mapping/species_lookup.py +33 -13
taxonomy_mapping/taxonomy_csv_checker.py +7 -5
md_visualization/visualize_megadb.py +0 -183
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0

data_management/coco_to_labelme.py ADDED Viewed

@@ -0,0 +1,263 @@
+########
+#
+# coco_to_labelme.py
+#
+# Converts a COCO dataset to labelme format (one .json per image file).
+#
+# If you want to convert YOLO data to labelme, use yolo_to_coco, then coco_to_labelme.
+#
+########
+#%% Imports and constants
+import os
+import json
+from tqdm import tqdm
+from collections import defaultdict
+from md_visualization.visualization_utils import open_image
+#%% Functions
+def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
+    """
+    For the given image struct in COCO format and associated list of annotations, reformat the detections
+    into labelme format.  Returns a dict.  All annotations in this list should point to this image.
+    "categories" is in the standard COCO format.
+    'height' and 'width' are required in [im].
+    """
+    image_base_name = os.path.basename(im['file_name'])
+    output_dict = {}
+    if info is not None:
+        output_dict['custom_info'] = info
+    output_dict['version'] = '5.3.0a0'
+    output_dict['flags'] = {}
+    output_dict['shapes'] = []
+    output_dict['imagePath'] = image_base_name
+    output_dict['imageHeight'] = im['height']
+    output_dict['imageWidth'] = im['width']
+    output_dict['imageData'] = None
+    # Store COCO categories in case we want to reconstruct the original IDs later
+    output_dict['coco_categories'] = categories
+    category_id_to_name = {c['id']:c['name'] for c in categories}
+    if 'flags' in im:
+        output_dict['flags'] = im['flags']
+    # ann = annotations[0]
+    for ann in annotations:
+        if 'bbox' not in ann:
+            continue
+        shape = {}
+        shape['label'] = category_id_to_name[ann['category_id']]
+        shape['shape_type'] = 'rectangle'
+        shape['description'] = ''
+        shape['group_id'] = None
+        # COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
+        #
+        # labelme boxes are [[x0,y0],[x1,y1]] (absolute)
+        x0 = ann['bbox'][0]
+        y0 = ann['bbox'][1]
+        x1 = ann['bbox'][0] + ann['bbox'][2]
+        y1 = ann['bbox'][1] + ann['bbox'][3]
+        shape['points'] = [[x0,y0],[x1,y1]]
+        output_dict['shapes'].append(shape)
+    # ...for each detection
+    return output_dict
+# ...def get_labelme_dict_for_image()
+def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
+    """
+    For all the images in [coco_data] (a dict or a filename), write a .json file in
+    labelme format alongside the corresponding relative path within image_base.
+    """
+    # Load COCO data if necessary
+    if isinstance(coco_data,str):
+        with open(coco_data,'r') as f:
+            coco_data = json.load(f)
+    assert isinstance(coco_data,dict)
+    ## Read image sizes if necessary
+    if bypass_image_size_check:
+        print('Bypassing size check')
+    else:
+        # TODO: parallelize this loop
+        print('Reading/validating image sizes...')
+        # im = coco_data['images'][0]
+        for im in tqdm(coco_data['images']):
+            # Make sure this file exists
+            im_full_path = os.path.join(image_base,im['file_name'])
+            assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
+            # Load w/h information if necessary
+            if 'height' not in im or 'width' not in im:
+                try:
+                    pil_im = open_image(im_full_path)
+                    im['width'] = pil_im.width
+                    im['height'] = pil_im.height
+                except Exception:
+                    print('Warning: cannot open image {}'.format(im_full_path))
+                    if 'failure' not in im:
+                        im['failure'] = 'Failure image access'
+            # ...if we need to read w/h information
+        # ...for each image
+    # ...if we need to load image sizes
+    ## Generate labelme files
+    print('Generating .json files...')
+    image_id_to_annotations = defaultdict(list)
+    for ann in coco_data['annotations']:
+        image_id_to_annotations[ann['image_id']].append(ann)
+    n_json_files_written = 0
+    n_json_files_error = 0
+    n_json_files_exist = 0
+    # Write output
+    for im in tqdm(coco_data['images']):
+        # Skip this image if it failed to load in whatever system generated this COCO file
+        skip_image = False
+        # Errors are represented differently depending on the source
+        for error_string in ('failure','error'):
+            if (error_string in im) and (im[error_string] is not None):
+                if verbose:
+                    print('Warning: skipping labelme file generation for failed image {}'.format(
+                        im['file_name']))
+                skip_image = True
+                n_json_files_error += 1
+                break
+        if skip_image:
+            continue
+        im_full_path = os.path.join(image_base,im['file_name'])
+        json_path = os.path.splitext(im_full_path)[0] + '.json'
+        if (not overwrite) and (os.path.isfile(json_path)):
+            if verbose:
+                print('Skipping existing file {}'.format(json_path))
+            n_json_files_exist += 1
+            continue
+        annotations_this_image = image_id_to_annotations[im['id']]
+        output_dict = get_labelme_dict_for_image_from_coco_record(im,
+                                                                  annotations_this_image,
+                                                                  coco_data['categories'],
+                                                                  info=None)
+        n_json_files_written += 1
+        with open(json_path,'w') as f:
+            json.dump(output_dict,f,indent=1)
+    # ...for each image
+    print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
+        n_json_files_written,n_json_files_error,n_json_files_exist))
+# ...def coco_to_labelme()
+#%% Interactive driver
+if False:
+    pass
+    #%% Configure options
+    coco_file = \
+        r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
+    image_folder = os.path.dirname(coco_file)
+    overwrite = True
+    #%% Programmatic execution
+    coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
+    #%% Command-line execution
+    s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
+    if overwrite:
+        s += ' --overwrite'
+    print(s)
+    import clipboard; clipboard.copy(s)
+    #%% Opening labelme
+    s = 'python labelme {}'.format(image_folder)
+    print(s)
+    import clipboard; clipboard.copy(s)
+#%% Command-line driver
+import sys,argparse
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert a COCO database to labelme annotation format')
+    parser.add_argument(
+        'coco_file',
+        type=str,
+        help='Path to COCO data file (.json)')
+    parser.add_argument(
+        'image_base',
+        type=str,
+        help='Path to images (also the output folder)')
+    parser.add_argument(
+        '--overwrite',
+        action='store_true',
+        help='Overwrite existing labelme .json files')
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
+if __name__ == '__main__':
+    main()

data_management/coco_to_yolo.py CHANGED Viewed

@@ -56,6 +56,10 @@ def write_yolo_dataset_file(yolo_dataset_file,
         class_lines = [s.strip() for s in class_lines]
         class_list = [s for s in class_lines if len(s) > 0]
+    if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
+        print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
+            yolo_dataset_file))
     # Write dataset.yaml
     with open(yolo_dataset_file,'w') as f:
@@ -89,10 +93,12 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
                  images_to_exclude=None,
                  path_replacement_char='#',
                  category_names_to_exclude=None,
-                 write_output=True):
+                 category_names_to_include=None,
+                 write_output=True,
+                 flatten_paths=True):
     """
-    Convert a COCO-formatted dataset to a YOLO-formatted dataset, flattening the dataset
-    (to a single folder) in the process.
+    Convert a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
+    dataset to a single folder in the process.
     If the input and output folders are the same, writes .txt files to the input folder,
     and neither moves nor modifies images.
@@ -130,6 +136,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
     ## Validate input
+    if category_names_to_include is not None and category_names_to_exclude is not None:
+        raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
     if output_folder is None:
         output_folder = input_image_folder
@@ -138,12 +147,16 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
     if category_names_to_exclude is None:
         category_names_to_exclude = {}
     assert os.path.isdir(input_image_folder)
     assert os.path.isfile(input_file)
     os.makedirs(output_folder,exist_ok=True)
+    if (output_folder == input_image_folder) and (overwrite_images) and \
+        (not create_image_and_label_folders) and (not flatten_paths):
+            print('Warning: output folder and input folder are the same, disabling overwrite_images')
+            overwrite_images = False
     ## Read input data
     with open(input_file,'r') as f:
@@ -180,11 +193,14 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
     coco_id_to_name = {}
     yolo_id_to_name = {}
     coco_category_ids_to_exclude = set()
-    category_exclusion_warnings_printed = set()
     for category in data['categories']:
         coco_id_to_name[category['id']] = category['name']
-        if (category['name'] in category_names_to_exclude):
+        if (category_names_to_include is not None) and \
+            (category['name'] not in category_names_to_include):
+            coco_category_ids_to_exclude.add(category['id'])
+            continue
+        elif (category['name'] in category_names_to_exclude):
             coco_category_ids_to_exclude.add(category['id'])
             continue
         assert category['id'] not in coco_id_to_yolo_id
@@ -228,9 +244,13 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
         tokens = os.path.splitext(im['file_name'])
         if tokens[1].lower() not in typical_image_extensions:
             print('Warning: unusual image file name {}'.format(im['file_name']))
-        image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
-            '_' + str(i_image).zfill(6)
+        if flatten_paths:
+            image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
+                '_' + str(i_image).zfill(6)
+        else:
+            image_name = tokens[0]
         assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
         image_names.add(image_name)
@@ -293,12 +313,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
                 # This category isn't in our category list.  This typically corresponds to whole sets
                 # of images that were excluded from the YOLO set.
                 if ann['category_id'] in coco_category_ids_to_exclude:
-                    category_name = coco_id_to_name[ann['category_id']]
-                    if category_name not in category_exclusion_warnings_printed:
-                        category_exclusion_warnings_printed.add(category_name)
-                        print('Warning: ignoring category {} in image {}'.format(
-                            category_name,image_id),end='')
-                        print('...are you sure you didn\'t mean to exclude this image?')
                     continue
                 yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
@@ -407,30 +421,38 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
         with open(image_id_to_output_image_json_file,'w') as f:
             json.dump(image_id_to_output_image_name,f,indent=1)
-    if (write_output):
+    if (output_folder == input_image_folder) and (not create_image_and_label_folders):
+        print('Creating annotation files (not copying images, input and output folder are the same)')
+    else:
         print('Copying images and creating annotation files')
+    if create_image_and_label_folders:
+        dest_image_folder = os.path.join(output_folder,'images')
+        dest_txt_folder = os.path.join(output_folder,'labels')
+    else:
+        dest_image_folder = output_folder
+        dest_txt_folder = output_folder
+    source_image_to_dest_image = {}
-        if create_image_and_label_folders:
-            dest_image_folder = os.path.join(output_folder,'images')
-            dest_txt_folder = os.path.join(output_folder,'labels')
-        else:
-            dest_image_folder = output_folder
-            dest_txt_folder = output_folder
-        # TODO: parallelize this loop
-        #
-        # output_info = images_to_copy[0]
-        for output_info in tqdm(images_to_copy):
-            source_image = output_info['source_image']
-            dest_image_relative = output_info['dest_image_relative']
-            dest_txt_relative = output_info['dest_txt_relative']
-            dest_image = os.path.join(dest_image_folder,dest_image_relative)
-            os.makedirs(os.path.dirname(dest_image),exist_ok=True)
+    # TODO: parallelize this loop
+    #
+    # output_info = images_to_copy[0]
+    for output_info in tqdm(images_to_copy):
+        source_image = output_info['source_image']
+        dest_image_relative = output_info['dest_image_relative']
+        dest_txt_relative = output_info['dest_txt_relative']
+        dest_image = os.path.join(dest_image_folder,dest_image_relative)
+        dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
+        source_image_to_dest_image[source_image] = dest_image
+        if write_output:
-            dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
+            os.makedirs(os.path.dirname(dest_image),exist_ok=True)
             os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
             if not create_image_and_label_folders:
@@ -438,7 +460,7 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
             if (not os.path.isfile(dest_image)) or (overwrite_images):
                 shutil.copyfile(source_image,dest_image)
             bboxes = output_info['bboxes']
             # Only write an annotation file if there are bounding boxes.  Images with
@@ -446,9 +468,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
             #
             # https://github.com/ultralytics/yolov5/issues/3218
             #
-            # I think this is also true for images with empty annotation files, but
-            # I'm using the convention suggested on that issue, i.e. hard negatives
-            # are expressed as images without .txt files.
+            # I think this is also true for images with empty .txt files, but
+            # I'm using the convention suggested on that issue, i.e. hard
+            # negatives are expressed as images without .txt files.
             if len(bboxes) > 0:
                 with open(dest_txt,'w') as f:
@@ -458,15 +480,17 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
                         assert len(bbox) == 5
                         s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
                         f.write(s + '\n')
-        # ...for each image
+        # ...if we're actually writing output
-    # ...if we're actually writing output
-    return_info = {}
-    return_info['class_list_filename'] = class_list_filename
+    # ...for each image
+    coco_to_yolo_info = {}
+    coco_to_yolo_info['class_list_filename'] = class_list_filename
+    coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
+    coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
-    return return_info
+    return coco_to_yolo_info
 # ...def coco_to_yolo(...)

megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl