PyPI - megadetector - Versions diffs - 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl - Mend

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show

api/batch_processing/data_preparation/manage_local_batch.py +297 -202
api/batch_processing/data_preparation/manage_video_batch.py +7 -2
api/batch_processing/postprocessing/add_max_conf.py +1 -0
api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
api/batch_processing/postprocessing/compare_batch_results.py +111 -61
api/batch_processing/postprocessing/convert_output_format.py +24 -6
api/batch_processing/postprocessing/load_api_results.py +56 -72
api/batch_processing/postprocessing/md_to_labelme.py +119 -51
api/batch_processing/postprocessing/merge_detections.py +30 -5
api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
classification/prepare_classification_script.py +191 -191
data_management/cct_json_utils.py +7 -2
data_management/coco_to_labelme.py +263 -0
data_management/coco_to_yolo.py +72 -48
data_management/databases/integrity_check_json_db.py +75 -64
data_management/databases/subset_json_db.py +1 -1
data_management/generate_crops_from_cct.py +1 -1
data_management/get_image_sizes.py +44 -26
data_management/importers/animl_results_to_md_results.py +3 -5
data_management/importers/noaa_seals_2019.py +2 -2
data_management/importers/zamba_results_to_md_results.py +2 -2
data_management/labelme_to_coco.py +264 -127
data_management/labelme_to_yolo.py +96 -53
data_management/lila/create_lila_blank_set.py +557 -0
data_management/lila/create_lila_test_set.py +2 -1
data_management/lila/create_links_to_md_results_files.py +1 -1
data_management/lila/download_lila_subset.py +138 -45
data_management/lila/generate_lila_per_image_labels.py +23 -14
data_management/lila/get_lila_annotation_counts.py +16 -10
data_management/lila/lila_common.py +15 -42
data_management/lila/test_lila_metadata_urls.py +116 -0
data_management/read_exif.py +65 -16
data_management/remap_coco_categories.py +84 -0
data_management/resize_coco_dataset.py +14 -31
data_management/wi_download_csv_to_coco.py +239 -0
data_management/yolo_output_to_md_output.py +40 -13
data_management/yolo_to_coco.py +313 -100
detection/process_video.py +36 -14
detection/pytorch_detector.py +1 -1
detection/run_detector.py +73 -18
detection/run_detector_batch.py +116 -27
detection/run_inference_with_yolov5_val.py +135 -27
detection/run_tiled_inference.py +153 -43
detection/tf_detector.py +2 -1
detection/video_utils.py +4 -2
md_utils/ct_utils.py +101 -6
md_utils/md_tests.py +264 -17
md_utils/path_utils.py +326 -47
md_utils/process_utils.py +26 -7
md_utils/split_locations_into_train_val.py +215 -0
md_utils/string_utils.py +10 -0
md_utils/url_utils.py +66 -3
md_utils/write_html_image_list.py +12 -2
md_visualization/visualization_utils.py +380 -74
md_visualization/visualize_db.py +41 -10
md_visualization/visualize_detector_output.py +185 -104
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
taxonomy_mapping/map_new_lila_datasets.py +43 -39
taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
taxonomy_mapping/preview_lila_taxonomy.py +27 -27
taxonomy_mapping/species_lookup.py +33 -13
taxonomy_mapping/taxonomy_csv_checker.py +7 -5
md_visualization/visualize_megadb.py +0 -183
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0

data_management/databases/integrity_check_json_db.py CHANGED Viewed

@@ -24,9 +24,9 @@ import sys
 from multiprocessing.pool import ThreadPool
 from operator import itemgetter
-from PIL import Image
 from tqdm import tqdm
+from md_visualization.visualization_utils import open_image
 from md_utils import ct_utils
@@ -41,6 +41,8 @@ class IntegrityCheckOptions:
     bRequireLocation = True
     iMaxNumImages = -1
     nThreads = 10
+    verbose = True
 # This is used in a medium-hacky way to share modified options across threads
 defaultOptions = IntegrityCheckOptions()
@@ -65,7 +67,9 @@ def check_image_existence_and_size(image,options=None):
             print('Missing image size in {}'.format(filePath))
             return False
-        width, height = Image.open(filePath).size
+        # width, height = Image.open(filePath).size
+        pil_im = open_image(filePath)
+        width,height = pil_im.size
         if (not (width == image['width'] and height == image['height'])):
             print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
                     image['id'], filePath, image['width'], image['height'], width, height))
@@ -86,9 +90,13 @@ def integrity_check_json_db(jsonFile, options=None):
     if options.bCheckImageSizes:
         options.bCheckImageExistence = True
-    print(options.__dict__)
+    if options.verbose:
+        print(options.__dict__)
+    if options.baseDir is None:
+        options.baseDir = ''
     baseDir = options.baseDir
@@ -102,8 +110,9 @@ def integrity_check_json_db(jsonFile, options=None):
         assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
-        print('Reading .json {} with base dir [{}]...'.format(
-                jsonFile,baseDir))
+        if options.verbose:
+            print('Reading .json {} with base dir [{}]...'.format(
+                    jsonFile,baseDir))
         with open(jsonFile,'r') as f:
             data = json.load(f)
@@ -116,10 +125,9 @@ def integrity_check_json_db(jsonFile, options=None):
     annotations = data['annotations']
     categories = data['categories']
     # info = data['info']
-    assert 'info' in data
+    assert 'info' in data, 'No info struct in database'
-    if len(baseDir) > 0:
+    if len(baseDir) > 0:
         assert os.path.isdir(baseDir), 'Base directory {} does not exist'.format(baseDir)
@@ -131,7 +139,8 @@ def integrity_check_json_db(jsonFile, options=None):
     catNameToCat = {}
     imageLocationSet = set()
-    print('Checking categories...')
+    if options.verbose:
+        print('Checking categories...')
     for cat in tqdm(categories):
@@ -139,8 +148,8 @@ def integrity_check_json_db(jsonFile, options=None):
         assert 'name' in cat
         assert 'id' in cat
-        assert isinstance(cat['id'],int), 'Illegal category ID type'
-        assert isinstance(cat['name'],str), 'Illegal category name type'
+        assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
+        assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
         catId = cat['id']
         catName = cat['name']
@@ -155,11 +164,13 @@ def integrity_check_json_db(jsonFile, options=None):
     # ...for each category
-    print('\nChecking images...')
+    if options.verbose:
+        print('\nChecking images...')
     if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
-        print('Trimming image list to {}'.format(options.iMaxNumImages))
+        if options.verbose:
+            print('Trimming image list to {}'.format(options.iMaxNumImages))
         images = images[0:options.iMaxNumImages]
     imagePathsInJson = set()
@@ -215,7 +226,8 @@ def integrity_check_json_db(jsonFile, options=None):
     # Are we checking for unused images?
     if (len(baseDir) > 0) and options.bFindUnusedImages:
-        print('\nEnumerating images...')
+        if options.verbose:
+            print('\nEnumerating images...')
         # Recursively enumerate images
         imagePaths = []
@@ -242,8 +254,9 @@ def integrity_check_json_db(jsonFile, options=None):
         if len(baseDir) == 0:
             print('Warning: checking image sizes without a base directory, assuming "."')
-        print('Checking image existence and/or image sizes...')
+        if options.verbose:
+            print('Checking image existence and/or image sizes...')
         if options.nThreads is not None and options.nThreads > 1:
             pool = ThreadPool(options.nThreads)
@@ -263,9 +276,9 @@ def integrity_check_json_db(jsonFile, options=None):
     # ...for each image
-    print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
-    print('Checking annotations...')
+    if options.verbose:
+        print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
+        print('Checking annotations...')
     nBoxes = 0
@@ -300,58 +313,56 @@ def integrity_check_json_db(jsonFile, options=None):
         catIdToCat[ann['category_id']]['_count'] +=1
     # ...for each annotation
-    ##%% Print statistics
-    # Find un-annotated images and multi-annotation images
-    nUnannotated = 0
-    nMultiAnnotated = 0
+    sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
-    for image in images:
-        if image['_count'] == 0:
-            nUnannotated += 1
-        elif image['_count'] > 1:
-            nMultiAnnotated += 1
-    print('Found {} unannotated images, {} images with multiple annotations'.format(
-            nUnannotated,nMultiAnnotated))
-    if (len(baseDir) > 0) and options.bFindUnusedImages:
-        print('Found {} unused image files'.format(len(unusedFiles)))
-    nUnusedCategories = 0
+    ##%% Print statistics
-    # Find unused categories
-    for cat in categories:
-        if cat['_count'] == 0:
-            print('Unused category: {}'.format(cat['name']))
-            nUnusedCategories += 1
+    if options.verbose:
-    print('Found {} unused categories'.format(nUnusedCategories))
+        # Find un-annotated images and multi-annotation images
+        nUnannotated = 0
+        nMultiAnnotated = 0
+        for image in images:
+            if image['_count'] == 0:
+                nUnannotated += 1
+            elif image['_count'] > 1:
+                nMultiAnnotated += 1
+        print('Found {} unannotated images, {} images with multiple annotations'.format(
+                nUnannotated,nMultiAnnotated))
+        if (len(baseDir) > 0) and options.bFindUnusedImages:
+            print('Found {} unused image files'.format(len(unusedFiles)))
-    sequenceString = 'no sequence info'
-    if len(sequences) > 0:
-        sequenceString = '{} sequences'.format(len(sequences))
+        nUnusedCategories = 0
-    print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
-            len(images),len(annotations),nBoxes,len(categories),sequenceString))
-    if len(imageLocationSet) > 0:
-        print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
-    # Prints a list of categories sorted by count
-    #
-    # https://stackoverflow.com/questions/72899/how-do-i-sort-a-list-of-dictionaries-by-a-value-of-the-dictionary
-    sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
-    print('Categories and annotation (not image) counts:\n')
-    for cat in sortedCategories:
-        print('{:6} {}'.format(cat['_count'],cat['name']))
+        # Find unused categories
+        for cat in categories:
+            if cat['_count'] == 0:
+                print('Unused category: {}'.format(cat['name']))
+                nUnusedCategories += 1
+        print('Found {} unused categories'.format(nUnusedCategories))
+        sequenceString = 'no sequence info'
+        if len(sequences) > 0:
+            sequenceString = '{} sequences'.format(len(sequences))
+        print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
+                len(images),len(annotations),nBoxes,len(categories),sequenceString))
-    print('')
+        if len(imageLocationSet) > 0:
+            print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
+        print('Categories and annotation (not image) counts:\n')
+        for cat in sortedCategories:
+            print('{:6} {}'.format(cat['_count'],cat['name']))
+        print('')
     errorInfo = {}
     errorInfo['unusedFiles'] = unusedFiles

data_management/databases/subset_json_db.py CHANGED Viewed

@@ -65,7 +65,7 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
     # Write the output file if requested
     if output_json is not None:
         print('Writing output .json...')
-        json.dump(output_data,open(output_json,'w'),indent=4)
+        json.dump(output_data,open(output_json,'w'),indent=1)
     return output_data

data_management/generate_crops_from_cct.py CHANGED Viewed

@@ -164,4 +164,4 @@ if False:
     options.box_expansion = 25
     htmlOutputFile,db = visualize_db(db_path,output_dir,image_base_dir,options)

data_management/get_image_sizes.py CHANGED Viewed

@@ -15,6 +15,8 @@ import os
 from PIL import Image
 import sys
+from md_utils.path_utils import find_images
 from multiprocessing.pool import ThreadPool
 from multiprocessing.pool import Pool
 from functools import partial
@@ -27,7 +29,11 @@ use_threads = False
 #%% Processing functions
-def process_image(image_path,image_prefix=None):
+def _get_image_size(image_path,image_prefix=None):
+    """
+    Support function to get the size of a single image.  Returns a (path,w,h) tuple.
+    w and h will be -1 if the image fails to load.
+    """
     if image_prefix is not None:
         full_path = os.path.join(image_prefix,image_path)
@@ -49,43 +55,56 @@ def process_image(image_path,image_prefix=None):
         return (image_path,-1,-1)
-def process_images(filenames,image_prefix=None,n_threads=default_n_threads):
+def get_image_sizes(filenames,image_prefix=None,output_file=None,
+                    n_workers=default_n_threads,use_threads=True,
+                    recursive=True):
+    """
+    Get the width and height of all images in [filenames], which can be:
+    * A .json-formatted file
+    * A folder
+    * A list of files
+    ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
+    """
+    if output_file is not None:
+        assert os.path.isdir(os.path.dirname(output_file)), \
+            'Illegal output file {}, parent folder does not exist'.format(output_file)
+    if isinstance(filenames,str) and os.path.isfile(filenames):
+        with open(filenames,'r') as f:
+            filenames = json.load(f)
+        filenames = [s.strip() for s in filenames]
+    elif isinstance(filenames,str) and os.path.isdir(filenames):
+        filenames = find_images(filenames,recursive=recursive,
+                                return_relative_paths=False,convert_slashes=True)
+    else:
+        assert isinstance(filenames,list)
-    if n_threads <= 1:
+    if n_workers <= 1:
         all_results = []
         for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
-            all_results.append(process_image(fn,image_prefix=image_prefix))
+            all_results.append(_get_image_size(fn,image_prefix=image_prefix))
     else:
-        print('Creating a pool with {} threads'.format(n_threads))
+        print('Creating a pool with {} workers'.format(n_workers))
         if use_threads:
-            pool = ThreadPool(n_threads)
+            pool = ThreadPool(n_workers)
         else:
-            pool = Pool(n_threads)
+            pool = Pool(n_workers)
         # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
         all_results = list(tqdm(pool.imap(
-            partial(process_image,image_prefix=image_prefix), filenames), total=len(filenames)))
-    return all_results
-def process_list_file(input_file,output_file=None,image_prefix=None,n_threads=default_n_threads):
-    assert os.path.isdir(os.path.dirname(output_file))
-    assert os.path.isfile(input_file)
-    with open(input_file,'r') as f:
-        filenames = json.load(f)
-    filenames = [s.strip() for s in filenames]
-    all_results = process_images(filenames,image_prefix=image_prefix,n_threads=n_threads)
+            partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
     if output_file is not None:
         with open(output_file,'w') as f:
             json.dump(all_results,f,indent=1)
+    return all_results
 #%% Interactive driver
@@ -116,8 +135,7 @@ if False:
     #%%
-    # process_list_file(image_list_file,image_size_file,image_prefix=base_dir)
-    process_list_file(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
+    get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
 #%% Command-line driver
@@ -136,7 +154,7 @@ def main():
     args = parser.parse_args()
-    process_list_file(args.input_file,args.output_file,args.image_prefix,args.n_threads)
+    _ = get_image_sizes(args.input_file,args.output_file,args.image_prefix,args.n_threads)
 if __name__ == '__main__':

data_management/importers/animl_results_to_md_results.py CHANGED Viewed

@@ -39,8 +39,6 @@ def animl_results_to_md_results(input_file,output_file=None):
     If [output_file] is None, '.json' will be appended to the input file.
     """
-    #%%
     if output_file is None:
         output_file = input_file + '.json'
@@ -111,7 +109,7 @@ def animl_results_to_md_results(input_file,output_file=None):
     with open(output_file,'w') as f:
         json.dump(results,f,indent=1)
-# ...zamba_results_to_md_results(...)
+# ...animl_results_to_md_results(...)
 #%% Interactive driver
@@ -145,7 +143,7 @@ def main():
         '--output_file',
         type=str,
         default=None,
-        help='output .json file (defaults to input file appened with ".json")')
+        help='output .json file (defaults to input file appended with ".json")')
     if len(sys.argv[1:]) == 0:
         parser.print_help()
@@ -157,4 +155,4 @@ def main():
 if __name__ == '__main__':
     main()

data_management/importers/noaa_seals_2019.py CHANGED Viewed

@@ -2,7 +2,7 @@
 #
 # noaa_seals_2019.py
 #
-# Prepare the NOAA Artic Seals 2019 metadata for LILA.
+# Prepare the NOAA Arctic Seals 2019 metadata for LILA.
 #
 ########
@@ -165,7 +165,7 @@ for b in ir_boxes:
                                                    use_normalized_coordinates=False,
                                                    thickness=3)
-visualization_utils.show_images_in_a_row([img_rgb,img_ir])
+# visualization_utils.show_images_in_a_row([img_rgb,img_ir])
 #%% Save images

data_management/importers/zamba_results_to_md_results.py CHANGED Viewed

@@ -166,7 +166,7 @@ def main():
         '--output_file',
         type=str,
         default=None,
-        help='output .json file (defaults to input file appened with ".json")')
+        help='output .json file (defaults to input file appended with ".json")')
     if len(sys.argv[1:]) == 0:
         parser.print_help()
@@ -178,4 +178,4 @@ def main():
 if __name__ == '__main__':
     main()

megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl