PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (48) hide show

api/batch_processing/data_preparation/manage_local_batch.py +28 -14
api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
api/batch_processing/postprocessing/compare_batch_results.py +1 -1
api/batch_processing/postprocessing/convert_output_format.py +24 -6
api/batch_processing/postprocessing/load_api_results.py +1 -3
api/batch_processing/postprocessing/md_to_labelme.py +118 -51
api/batch_processing/postprocessing/merge_detections.py +30 -5
api/batch_processing/postprocessing/postprocess_batch_results.py +24 -12
api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +15 -12
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
data_management/cct_json_utils.py +7 -2
data_management/coco_to_labelme.py +263 -0
data_management/coco_to_yolo.py +7 -4
data_management/databases/integrity_check_json_db.py +68 -59
data_management/databases/subset_json_db.py +1 -1
data_management/get_image_sizes.py +44 -26
data_management/importers/animl_results_to_md_results.py +1 -3
data_management/importers/noaa_seals_2019.py +1 -1
data_management/labelme_to_coco.py +252 -143
data_management/labelme_to_yolo.py +95 -52
data_management/lila/create_lila_blank_set.py +106 -23
data_management/lila/download_lila_subset.py +133 -65
data_management/lila/generate_lila_per_image_labels.py +1 -1
data_management/lila/lila_common.py +8 -38
data_management/read_exif.py +65 -16
data_management/remap_coco_categories.py +84 -0
data_management/resize_coco_dataset.py +3 -22
data_management/wi_download_csv_to_coco.py +239 -0
data_management/yolo_to_coco.py +283 -83
detection/run_detector_batch.py +12 -3
detection/run_inference_with_yolov5_val.py +10 -3
detection/run_tiled_inference.py +2 -2
detection/tf_detector.py +2 -1
detection/video_utils.py +1 -1
md_utils/ct_utils.py +22 -3
md_utils/md_tests.py +11 -2
md_utils/path_utils.py +206 -32
md_utils/url_utils.py +66 -1
md_utils/write_html_image_list.py +12 -3
md_visualization/visualization_utils.py +363 -72
md_visualization/visualize_db.py +33 -10
{megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/METADATA +10 -12
{megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/RECORD +47 -44
{megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
md_visualization/visualize_megadb.py +0 -183
{megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0

data_management/wi_download_csv_to_coco.py ADDED Viewed

@@ -0,0 +1,239 @@
+########
+#
+# wi_download_csv_to_coco.py
+#
+# Convert a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
+#
+# Currently assumes that common names are unique identifiers, which is convenient but unreliable.
+#
+########
+#%% Imports and constants
+import os
+import json
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from collections import defaultdict
+from md_visualization import visualization_utils as vis_utils
+wi_extra_annotation_columns = \
+    ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
+             'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
+             'behavior','highlighted','markings')
+wi_extra_image_columns = ('project_id','deployment_id')
+def make_location_id(project_id,deployment_id):
+    return 'project_' + str(project_id) + '_deployment_' + deployment_id
+def isnan(v):
+    try:
+        return np.isnan(v)
+    except Exception:
+        return False
+default_category_remappings = {
+    'Homo Species':'Human',
+    'Human-Camera Trapper':'Human',
+    'No CV Result':'Unknown'
+}
+#%%
+def wi_download_csv_to_coco(csv_file_in,
+                            coco_file_out=None,
+                            image_folder=None,
+                            validate_images=False,
+                            gs_prefix=None,
+                            verbose=True,
+                            category_remappings=default_category_remappings):
+    """
+    Convert a .csv file from a Wildlife Insights project export to a COCO
+    camera traps .json file.
+    If [coco_file_out] is None, uses [csv_file_in].json
+    gs_prefix is a string to remove from GS URLs to convert to path names... for example, if
+    your gs:// URLs look like:
+    gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg
+    ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
+    the .json file will look like:
+    55554/dfadfasdfs.jpg
+    exclude_re discards matching images; typically use to omit thumbnail images.
+    """
+    #%% Create COCO dictionaries
+    category_name_to_id = {}
+    category_name_to_id['empty'] = 0
+    df = pd.read_csv(csv_file_in)
+    print('Read {} rows from {}'.format(len(df),csv_file_in))
+    image_id_to_image = {}
+    image_id_to_annotations = defaultdict(list)
+    # i_row = 0; row = df.iloc[i_row]
+    for i_row,row in df.iterrows():
+        image_id = row['image_id']
+        if image_id not in image_id_to_image:
+            im = {}
+            image_id_to_image[image_id] = im
+            im['id'] = image_id
+            gs_url = row['location']
+            assert gs_url.startswith('gs://')
+            file_name = gs_url.replace('gs://','')
+            if gs_prefix is not None:
+                file_name = file_name.replace(gs_prefix,'')
+            location_id = make_location_id(row['project_id'],row['deployment_id'])
+            im['file_name'] = file_name
+            im['location'] = location_id
+            im['datetime'] = row['timestamp']
+            im['wi_image_info'] = {}
+            for s in wi_extra_image_columns:
+                im['wi_image_info'][s] = str(row[s])
+        else:
+            im = image_id_to_image[image_id]
+            assert im['datetime'] == row['timestamp']
+            location_id = make_location_id(row['project_id'],row['deployment_id'])
+            assert im['location'] == location_id
+        category_name = row['common_name']
+        if category_remappings is not None and category_name in category_remappings:
+            category_name = category_remappings[category_name]
+        if category_name == 'Blank':
+            category_name = 'empty'
+            assert row['is_blank'] == 1
+        else:
+            assert row['is_blank'] == 0
+        assert isinstance(category_name,str)
+        if category_name in category_name_to_id:
+            category_id = category_name_to_id[category_name]
+        else:
+            category_id = len(category_name_to_id)
+            category_name_to_id[category_name] = category_id
+        ann = {}
+        ann['image_id'] = image_id
+        annotations_this_image = image_id_to_annotations[image_id]
+        annotation_number = len(annotations_this_image)
+        ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
+        ann['category_id'] = category_id
+        annotations_this_image.append(ann)
+        extra_info = {}
+        for s in wi_extra_annotation_columns:
+            v = row[s]
+            if not isnan(v):
+                extra_info[s] = v
+        ann['wi_extra_info'] = extra_info
+    # ...for each row
+    images = list(image_id_to_image.values())
+    categories = []
+    for category_name in category_name_to_id:
+        category_id = category_name_to_id[category_name]
+        categories.append({'id':category_id,'name':category_name})
+    annotations = []
+    for image_id in image_id_to_annotations:
+        annotations_this_image = image_id_to_annotations[image_id]
+        for ann in annotations_this_image:
+            annotations.append(ann)
+    info = {'version':'1.00','description':'converted from WI export'}
+    info['source_file'] = csv_file_in
+    coco_data = {}
+    coco_data['info'] = info
+    coco_data['images'] = images
+    coco_data['annotations'] = annotations
+    coco_data['categories'] = categories
+    ##%% Validate images, add sizes
+    if validate_images:
+        print('Validating images')
+        # TODO: trivially parallelizable
+        assert os.path.isdir(image_folder), \
+            'Must specify a valid image folder if you specify validate_images=True'
+        # im = images[0]
+        for im in tqdm(images):
+            file_name_relative = im['file_name']
+            file_name_abs = os.path.join(image_folder,file_name_relative)
+            assert os.path.isfile(file_name_abs)
+            im['corrupt'] = False
+            try:
+                pil_im = vis_utils.load_image(file_name_abs)
+            except Exception:
+                im['corrupt'] = True
+            if not im['corrupt']:
+                im['width'] = pil_im.width
+                im['height'] = pil_im.height
+    ##%% Write output json
+    if coco_file_out is None:
+        coco_file_out = csv_file_in + '.json'
+        with open(coco_file_out,'w') as f:
+            json.dump(coco_data,f,indent=1)
+    ##%% Validate output
+    from data_management.databases.integrity_check_json_db import \
+        IntegrityCheckOptions,integrity_check_json_db
+    options = IntegrityCheckOptions()
+    options.baseDir = image_folder
+    options.bCheckImageExistence = True
+    options.verbose = verbose
+    _ = integrity_check_json_db(coco_file_out,options)
+#%% Interactive driver
+if False:
+    #%%
+    base_folder = r'a/b/c'
+    csv_file_in = os.path.join(base_folder,'images.csv')
+    coco_file_out = None
+    gs_prefix = 'a_b_c_main/'
+    image_folder = os.path.join(base_folder,'images')
+    validate_images = False
+    verbose = True
+    category_remappings = default_category_remappings
+#%% Command-line driver
+# TODO

data_management/yolo_to_coco.py CHANGED Viewed

@@ -2,10 +2,7 @@
 #
 # yolo_to_coco.py
 #
-# Converts a YOLO-formatted dataset to a COCO-formatted dataset.
-#
-# Currently supports only a single folder (i.e., no recursion).  Treats images without
-# corresponding .txt files as empty.
+# Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
 #
 ########
@@ -14,37 +11,181 @@
 import json
 import os
-from PIL import Image
+from multiprocessing.pool import ThreadPool
+from multiprocessing.pool import Pool
+from functools import partial
 from tqdm import tqdm
 from md_utils.path_utils import find_images
+from md_utils.ct_utils import invert_dictionary
+from md_visualization.visualization_utils import open_image
 from data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
+#%% Support functions
+def filename_to_image_id(fn):
+    return fn.replace(' ','_')
+def _process_image(fn_abs,input_folder,category_id_to_name):
+    """
+    Internal support function for processing one image's labels.
+    """
+    # Create the image object for this image
+    fn_relative = os.path.relpath(fn_abs,input_folder)
+    image_id = filename_to_image_id(fn_relative)
+    # This is done in a separate loop now
+    #
+    # assert image_id not in image_ids, \
+    #    'Oops, you have hit a very esoteric case where you have the same filename ' + \
+    #    'with both spaces and underscores, this is not currently handled.'
+    # image_ids.add(image_id)
+    im = {}
+    im['file_name'] = fn_relative
+    im['id'] = image_id
+    annotations_this_image = []
+    try:
+        pil_im = open_image(fn_abs)
+        im_width, im_height = pil_im.size
+        im['width'] = im_width
+        im['height'] = im_height
+        im['error'] = None
+    except Exception as e:
+        print('Warning: error reading {}:\n{}'.format(fn_relative,str(e)))
+        im['width'] = -1
+        im['height'] = -1
+        im['error'] = str(e)
+        return (im,annotations_this_image)
+    # Is there an annotation file for this image?
+    annotation_file = os.path.splitext(fn_abs)[0] + '.txt'
+    if not os.path.isfile(annotation_file):
+        annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
+    if os.path.isfile(annotation_file):
+        with open(annotation_file,'r') as f:
+            lines = f.readlines()
+        lines = [s.strip() for s in lines]
+        # s = lines[0]
+        annotation_number = 0
+        for s in lines:
+            if len(s.strip()) == 0:
+                continue
+            tokens = s.split()
+            assert len(tokens) == 5
+            category_id = int(tokens[0])
+            assert category_id in category_id_to_name, \
+                'Unrecognized category ID {} in annotation file {}'.format(
+                    category_id,annotation_file)
+            ann = {}
+            ann['id'] = im['id'] + '_' + str(annotation_number)
+            ann['image_id'] = im['id']
+            ann['category_id'] = category_id
+            ann['sequence_level_annotation'] = False
+            # COCO: [x_min, y_min, width, height] in absolute coordinates
+            # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
+            yolo_bbox = [float(x) for x in tokens[1:]]
+            normalized_x_center = yolo_bbox[0]
+            normalized_y_center = yolo_bbox[1]
+            normalized_width = yolo_bbox[2]
+            normalized_height = yolo_bbox[3]
+            absolute_x_center = normalized_x_center * im_width
+            absolute_y_center = normalized_y_center * im_height
+            absolute_width = normalized_width * im_width
+            absolute_height = normalized_height * im_height
+            absolute_x_min = absolute_x_center - absolute_width / 2
+            absolute_y_min = absolute_y_center - absolute_height / 2
+            coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
+            ann['bbox'] = coco_bbox
+            annotation_number += 1
+            annotations_this_image.append(ann)
+        # ...for each annotation
+    # ...if this image has annotations
+    return (im,annotations_this_image)
+# ...def _process_image(...)
 #%% Main conversion function
-def yolo_to_coco(input_folder,class_name_file,output_file=None):
+def yolo_to_coco(input_folder,
+                 class_name_file,
+                 output_file=None,
+                 empty_image_handling='no_annotations',
+                 empty_image_category_name='empty',
+                 error_image_handling='no_annotations',
+                 allow_images_without_label_files=True,
+                 n_workers=1,
+                 pool_type='thread',
+                 recursive=True,
+                 exclude_string=None,
+                 include_string=None):
     """
     Convert the YOLO-formatted data in [input_folder] to a COCO-formatted dictionary,
     reading class names from [class_name_file], which can be a flat list with a .txt
     extension or a YOLO dataset.yml file.  Optionally writes the output dataset to [output_file].
+    empty_image_handling can be:
+    * 'no_annotations': include the image in the image list, with no annotations
+    * 'empty_annotations': include the image in the image list, and add an annotation without
+      any bounding boxes, using a category called [empty_image_category_name].
+    * 'skip': don't include the image in the image list
+    * 'error': there shouldn't be any empty images
+    error_image_handling can be:
+    * 'skip': don't include the image at all
+    * 'no_annotations': include with no annotations
+    All images will be assigned an "error" value, usually None.
     Returns a COCO-formatted dictionary.
     """
-    # Validate input
+    ## Validate input
     assert os.path.isdir(input_folder)
     assert os.path.isfile(class_name_file)
-    # Read class names
+    assert empty_image_handling in \
+        ('no_annotations','empty_annotations','skip','error'), \
+            'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
+    ## Read class names
     ext = os.path.splitext(class_name_file)[1][1:]
-    assert ext in ('yml','txt','yaml'), 'Unrecognized class name file type {}'.format(
+    assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
         class_name_file)
-    if ext == 'txt':
+    if ext in ('txt','data'):
         with open(class_name_file,'r') as f:
             lines = f.readlines()
@@ -70,14 +211,41 @@ def yolo_to_coco(input_folder,class_name_file,output_file=None):
         assert ext in ('yml','yaml')
         category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
+    # Find or create the empty image category, if necessary
+    empty_category_id = None
+    if (empty_image_handling == 'empty_annotations'):
+        category_name_to_id = invert_dictionary(category_id_to_name)
+        if empty_image_category_name in category_name_to_id:
+            empty_category_id = category_name_to_id[empty_image_category_name]
+            print('Using existing empty image category with name {}, ID {}'.format(
+                empty_image_category_name,empty_category_id))
+        else:
+            empty_category_id = len(category_id_to_name)
+            print('Adding an empty category with name {}, ID {}'.format(
+                empty_image_category_name,empty_category_id))
+            category_id_to_name[empty_category_id] = empty_image_category_name
-    # Enumerate images
+    ## Enumerate images
+    print('Enumerating images...')
-    image_files = find_images(input_folder,recursive=False)
+    image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
-    images = []
-    annotations = []
+    n_files_original = len(image_files_abs)
+    # Optionally include/exclude images matching specific strings
+    if exclude_string is not None:
+        image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
+    if include_string is not None:
+        image_files_abs = [fn for fn in image_files_abs if include_string in fn]
+    if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
+        n_excluded = n_files_original - len(image_files_abs)
+        print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
     categories = []
     for category_id in category_id_to_name:
@@ -87,79 +255,111 @@ def yolo_to_coco(input_folder,class_name_file,output_file=None):
     info['version'] = '1.0'
     info['description'] = 'Converted from YOLO format'
-    # fn = image_files[0]
-    for fn in tqdm(image_files):
-        im = Image.open(fn)
-        im_width, im_height = im.size
-        # Create the image object for this image
-        im = {}
-        fn_relative = os.path.relpath(fn,input_folder)
-        im['file_name'] = fn_relative
-        im['id'] = fn_relative.replace(' ','_')
-        im['location'] = 'unknown'
-        images.append(im)
-        # Is there an annotation file for this image?
-        annotation_file = os.path.splitext(fn)[0] + '.txt'
-        if not os.path.isfile(annotation_file):
-            annotation_file = os.path.splitext(fn)[0] + '.TXT'
-        if not os.path.isfile(annotation_file):
-            # This is an image with no annotations, currently don't do anything special
-            # here
-            pass
+    image_ids = set()
+    ## If we're expected to have labels for every image, check before we process all the images
+    if not allow_images_without_label_files:
+        print('Verifying that label files exist')
+        for image_file_abs in tqdm(image_files_abs):
+            label_file_abs = os.path.splitext(image_file_abs)[0] + '.txt'
+            assert os.path.isfile(label_file_abs), \
+                'No annotation file for {}'.format(image_file_abs)
+    ## Initial loop to make sure image IDs will be unique
+    print('Validating image IDs...')
+    for fn_abs in tqdm(image_files_abs):
+        fn_relative = os.path.relpath(fn_abs,input_folder)
+        image_id = filename_to_image_id(fn_relative)
+        assert image_id not in image_ids, \
+            'Oops, you have hit a very esoteric case where you have the same filename ' + \
+            'with both spaces and underscores, this is not currently handled.'
+        image_ids.add(image_id)
+    ## Main loop to process labels
+    print('Processing labels...')
+    if n_workers <= 1:
+        image_results = []
+        for fn_abs in tqdm(image_files_abs):
+            image_results.append(_process_image(fn_abs,input_folder,category_id_to_name))
+    else:
+        assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
+        if pool_type == 'thread':
+            pool = ThreadPool(n_workers)
         else:
-            with open(annotation_file,'r') as f:
-                lines = f.readlines()
-            lines = [s.strip() for s in lines]
-            # s = lines[0]
-            annotation_number = 0
-            for s in lines:
-                if len(s.strip()) == 0:
-                    continue
-                tokens = s.split()
-                assert len(tokens) == 5
-                category_id = int(tokens[0])
-                assert category_id in category_id_to_name, \
-                    'Unrecognized category ID {} in annotation file {}'.format(
-                        category_id,annotation_file)
-                ann = {}
-                ann['id'] = im['id'] + '_' + str(annotation_number)
-                ann['image_id'] = im['id']
-                ann['category_id'] = category_id
-                ann['sequence_level_annotation'] = False
-                # COCO: [x_min, y_min, width, height] in absolute coordinates
-                # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
-                yolo_bbox = [float(x) for x in tokens[1:]]
-                normalized_x_center = yolo_bbox[0]
-                normalized_y_center = yolo_bbox[1]
-                normalized_width = yolo_bbox[2]
-                normalized_height = yolo_bbox[3]
-                absolute_x_center = normalized_x_center * im_width
-                absolute_y_center = normalized_y_center * im_height
-                absolute_width = normalized_width * im_width
-                absolute_height = normalized_height * im_height
-                absolute_x_min = absolute_x_center - absolute_width / 2
-                absolute_y_min = absolute_y_center - absolute_height / 2
-                coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
+            pool = Pool(n_workers)
+        print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
+        p = partial(_process_image,input_folder=input_folder,
+                    category_id_to_name=category_id_to_name)
+        image_results = list(tqdm(pool.imap(p, image_files_abs),
+                                  total=len(image_files_abs)))
-                ann['bbox'] = coco_bbox
-                annotation_number += 1
+    assert len(image_results) == len(image_files_abs)
+    ## Re-assembly of results into a COCO dict
+    print('Assembling labels...')
+    images = []
+    annotations = []
+    for image_result in tqdm(image_results):
+        im = image_result[0]
+        annotations_this_image = image_result[1]
+        # If we have annotations for this image
+        if len(annotations_this_image) > 0:
+            assert im['error'] is None
+            images.append(im)
+            for ann in annotations_this_image:
+                annotations.append(ann)
-                annotations.append(ann)
+        # If this image failed to read
+        elif im['error'] is not None:
+            if error_image_handling == 'skip':
+                pass
+            elif error_image_handling == 'no_annotations':
+                images.append(im)
-            # ...for each annotation
+        # If this image read successfully, but there are no annotations
+        else:
-        # ...if this image has annotations
+            if empty_image_handling == 'skip':
+                pass
+            elif empty_image_handling == 'no_annotations':
+                images.append(im)
+            elif empty_image_handling == 'empty_annotations':
+                assert empty_category_id  is not None
+                ann = {}
+                ann['id'] = im['id'] + '_0'
+                ann['image_id'] = im['id']
+                ann['category_id'] = empty_category_id
+                ann['sequence_level_annotation'] = False
+                # This would also be a reasonable thing to do, but it's not the convention
+                # we're adopting.
+                # ann['bbox'] = [0,0,0,0]
+                annotations.append(ann)
+                images.append(im)
-    # ...for each image
+    # ...for each image result
     print('Read {} annotations for {} images'.format(len(annotations),
                                                      len(images)))

megadetector 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.8py3-none-any.whl