PyPI - megadetector - Versions diffs - 5.0.23__py3-none-any.whl → 5.0.25__py3-none-any.whl - Mend

megadetector 5.0.23py3-none-any.whl → 5.0.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (42) hide show

megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
megadetector/classification/merge_classification_detection_output.py +2 -2
megadetector/data_management/coco_to_labelme.py +2 -1
megadetector/data_management/databases/integrity_check_json_db.py +15 -14
megadetector/data_management/databases/subset_json_db.py +49 -21
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
megadetector/data_management/mewc_to_md.py +340 -0
megadetector/data_management/speciesnet_to_md.py +41 -0
megadetector/data_management/yolo_output_to_md_output.py +15 -8
megadetector/detection/process_video.py +24 -7
megadetector/detection/pytorch_detector.py +841 -160
megadetector/detection/run_detector.py +341 -146
megadetector/detection/run_detector_batch.py +307 -70
megadetector/detection/run_inference_with_yolov5_val.py +61 -4
megadetector/detection/tf_detector.py +6 -1
megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
megadetector/postprocessing/compare_batch_results.py +236 -7
megadetector/postprocessing/create_crop_folder.py +358 -0
megadetector/postprocessing/md_to_labelme.py +7 -7
megadetector/postprocessing/md_to_wi.py +40 -0
megadetector/postprocessing/merge_detections.py +1 -1
megadetector/postprocessing/postprocess_batch_results.py +12 -5
megadetector/postprocessing/separate_detections_into_folders.py +32 -4
megadetector/postprocessing/validate_batch_results.py +9 -4
megadetector/utils/ct_utils.py +236 -45
megadetector/utils/directory_listing.py +3 -3
megadetector/utils/gpu_test.py +125 -0
megadetector/utils/md_tests.py +455 -116
megadetector/utils/path_utils.py +43 -2
megadetector/utils/wi_utils.py +2691 -0
megadetector/visualization/visualization_utils.py +95 -18
megadetector/visualization/visualize_db.py +25 -7
megadetector/visualization/visualize_detector_output.py +60 -13
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/METADATA +11 -23
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/RECORD +39 -36
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/WHEEL +1 -1
megadetector/detection/detector_training/__init__.py +0 -0
megadetector/detection/detector_training/model_main_tf2.py +0 -114
megadetector/utils/torch_test.py +0 -32
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/LICENSE +0 -0
{megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/top_level.txt +0 -0

megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py CHANGED Viewed

@@ -14,9 +14,8 @@ import redis
 import argparse
 import PIL
-from io import BytesIO
-from detection.run_detector import load_detector, convert_to_tf_coords
+from detection.run_detector import load_detector
+from utils.ct_utils import convert_xywh_to_xyxy as convert_to_tf_coords
 import config
 import visualization.visualization_utils as vis_utils

megadetector/classification/merge_classification_detection_output.py CHANGED Viewed

@@ -70,7 +70,7 @@ from typing import Any
 import pandas as pd
 from tqdm import tqdm
-from megadetector.utils.ct_utils import truncate_float
+from megadetector.utils.ct_utils import round_float
 #%% Example usage
@@ -124,7 +124,7 @@ def row_to_classification_list(row: Mapping[str, Any],
         # filter out confidences below the threshold, and set precision to 4
         result = [
-            (k, truncate_float(conf, precision=4))
+            (k, round_float(conf, precision=4))
             for k, conf in result if conf >= threshold
         ]

megadetector/data_management/coco_to_labelme.py CHANGED Viewed

@@ -18,6 +18,7 @@ from tqdm import tqdm
 from collections import defaultdict
 from megadetector.visualization.visualization_utils import open_image
+from megadetector.detection.run_detector import FAILURE_IMAGE_OPEN
 #%% Functions
@@ -145,7 +146,7 @@ def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check
                 except Exception:
                     print('Warning: cannot open image {}'.format(im_full_path))
                     if 'failure' not in im:
-                        im['failure'] = 'Failure image access'
+                        im['failure'] = FAILURE_IMAGE_OPEN
             # ...if we need to read w/h information

megadetector/data_management/databases/integrity_check_json_db.py CHANGED Viewed

@@ -86,7 +86,7 @@ def _check_image_existence_and_size(image,options=None):
         options (IntegrityCheckOptions): parameters impacting validation
     Returns:
-        bool: whether this image passes validation
+        str: None if this image passes validation, otherwise an error string
     """
     if options is None:
@@ -96,23 +96,23 @@ def _check_image_existence_and_size(image,options=None):
     filePath = os.path.join(options.baseDir,image['file_name'])
     if not os.path.isfile(filePath):
-        # print('Image path {} does not exist'.format(filePath))
-        return False
+        s = 'Image path {} does not exist'.format(filePath)
+        return s
     if options.bCheckImageSizes:
         if not ('height' in image and 'width' in image):
-            print('Missing image size in {}'.format(filePath))
-            return False
+            s = 'Missing image size in {}'.format(filePath)
+            return s
         # width, height = Image.open(filePath).size
         pil_im = open_image(filePath)
         width,height = pil_im.size
         if (not (width == image['width'] and height == image['height'])):
-            print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
-                    image['id'], filePath, image['width'], image['height'], width, height))
-            return False
+            s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
+                    image['id'], filePath, image['width'], image['height'], width, height)
+            return s
-    return True
+    return None
 def integrity_check_json_db(jsonFile, options=None):
@@ -287,6 +287,7 @@ def integrity_check_json_db(jsonFile, options=None):
             if fn_relative not in image_paths_in_json:
                 unused_files.append(fn_relative)
+    # List of (filename,error_string) tuples
     validation_errors = []
     # If we're checking image existence but not image size, we don't need to read the images
@@ -298,8 +299,8 @@ def integrity_check_json_db(jsonFile, options=None):
         image_paths_relative_set = set(image_paths_relative)
         for im in images:
-            if im['file_name'] not in image_paths_relative_set:
-                validation_errors.append(im['file_name'])
+            if im['file_name'] not in image_paths_relative_set:
+                validation_errors.append((im['file_name'],'not found in relative path list'))
     # If we're checking image size, we need to read the images
     if options.bCheckImageSizes:
@@ -321,12 +322,12 @@ def integrity_check_json_db(jsonFile, options=None):
             results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
         else:
             results = []
-            for im in tqdm(images):
+            for im in tqdm(images):
                 results.append(_check_image_existence_and_size(im,options))
         for i_image,result in enumerate(results):
-            if result is not None:
-                validation_errors.append(images[i_image]['file_name'])
+            if result is not None:
+                validation_errors.append(images[i_image]['file_name'],result)
     # ...for each image

megadetector/data_management/databases/subset_json_db.py CHANGED Viewed

@@ -12,16 +12,18 @@ subset_json_detector_output.py.
 #%% Constants and imports
+import os
 import sys
 import json
 import argparse
 from tqdm import tqdm
+from copy import copy
 #%% Functions
-def subset_json_db(input_json, query, output_json=None, ignore_case=False):
+def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbose=False):
     """
     Given a json file (or dictionary already loaded from a json file), produce a new
     database containing only the images whose filenames contain the string 'query',
@@ -29,54 +31,80 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
     Args:
         input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
-        query (str): string to query for, only include images in the output whose filenames
-            contain this string.
+        query (str or list): string to query for, only include images in the output whose filenames
+            contain this string.  If this is a list, test for exact matches.
         output_json (str, optional): file to write the resulting .json file to
         ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
+        verbose (bool, optional): enable additional debug output
     Returns:
-        dict: possibly-modified CCT dictionary
+        dict: CCT dictionary containing a subset of the images and annotations in the input dict
     """
-    if ignore_case:
-        query = query.lower()
     # Load the input file if necessary
     if isinstance(input_json,str):
         print('Loading input .json...')
         with open(input_json, 'r') as f:
-            data = json.load(f)
+            input_data = json.load(f)
     else:
-        data = input_json
+        input_data = input_json
     # Find images matching the query
     images = []
-    image_ids = set()
-    for im in tqdm(data['images']):
-        fn = im['file_name']
+    if isinstance(query,str):
+        if ignore_case:
+            query = query.lower()
+        for im in tqdm(input_data['images']):
+            fn = im['file_name']
+            if ignore_case:
+                fn = fn.lower()
+            if query in fn:
+                images.append(im)
+    else:
+        query = set(query)
         if ignore_case:
-            fn = fn.lower()
-        if query in fn:
-            images.append(im)
-            image_ids.add(im['id'])
+            query = set([s.lower() for s in query])
+        for im in input_data['images']:
+            fn = im['file_name']
+            if ignore_case:
+                fn = fn.lower()
+            if fn in query:
+                images.append(im)
+    image_ids = set([im['id'] for im in images])
     # Find annotations referring to those images
     annotations = []
-    for ann in tqdm(data['annotations']):
+    for ann in input_data['annotations']:
         if ann['image_id'] in image_ids:
             annotations.append(ann)
-    output_data = data
+    output_data = copy(input_data)
     output_data['images'] = images
     output_data['annotations'] = annotations
     # Write the output file if requested
     if output_json is not None:
-        print('Writing output .json...')
-        json.dump(output_data,open(output_json,'w'),indent=1)
+        if verbose:
+            print('Writing output .json to {}'.format(output_json))
+        output_dir = os.path.dirname(output_json)
+        os.makedirs(output_dir,exist_ok=True)
+        with open(output_json,'w') as f:
+            json.dump(output_data,f,indent=1)
+    if verbose:
+        print('Keeping {} of {} images, {} of {} annotations'.format(
+            len(output_data['images']),len(input_data['images']),
+            len(output_data['annotations']),len(input_data['annotations'])))
     return output_data

megadetector/data_management/lila/add_locations_to_island_camera_traps.py CHANGED Viewed

@@ -20,78 +20,82 @@ preview_folder = os.path.expanduser('~/tmp/island_conservation_preview')
 image_directory = os.path.expanduser('~/data/icct/public/')
-#%% Read input file
+#%% Prevent imports during testing
-with open(input_fn,'r') as f:
-    d = json.load(f)
-d['info']
-d['info']['version'] = '1.01'
-#%% Find locations
-images = d['images']
-locations = set()
+if False:
-for i_image,im in tqdm(enumerate(images),total=len(images)):
-    tokens_fn = im['file_name'].split('/')
-    tokens_id = im['id'].split('_')
-    assert tokens_fn[0] == tokens_id[0]
-    assert tokens_fn[1] == tokens_id[1]
-    location = tokens_fn[0] + '_' + tokens_fn[1]
-    im['location'] = location
-    locations.add(location)
-locations = sorted(list(locations))
+    #%% Read input file
-for s in locations:
-    print(s)
+    with open(input_fn,'r') as f:
+        d = json.load(f)
+    d['info']
+    d['info']['version'] = '1.01'
-#%% Write output file
-with open(output_fn,'w') as f:
-    json.dump(d,f,indent=1)
+    #%% Find locations
-#%% Validate .json files
-from megadetector.data_management.databases import integrity_check_json_db
-options = integrity_check_json_db.IntegrityCheckOptions()
-options.baseDir = image_directory
-options.bCheckImageSizes = False
-options.bCheckImageExistence = True
-options.bFindUnusedImages = True
-sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
-#%% Preview labels
-from megadetector.visualization import visualize_db
-viz_options = visualize_db.DbVizOptions()
-viz_options.num_to_visualize = 2000
-viz_options.trim_to_images_with_bboxes = False
-viz_options.add_search_links = False
-viz_options.sort_by_filename = False
-viz_options.parallelize_rendering = True
-viz_options.classes_to_exclude = ['test']
-html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
-                                                         output_dir=preview_folder,
-                                                         image_base_dir=image_directory,
-                                                         options=viz_options)
-from megadetector.utils import path_utils
-path_utils.open_file(html_output_file)
-#%% Zip output file
-from megadetector.utils.path_utils import zip_file
-zip_file(output_fn, verbose=True)
-assert os.path.isfile(output_fn + '.zip')
+    images = d['images']
+    locations = set()
+    for i_image,im in tqdm(enumerate(images),total=len(images)):
+        tokens_fn = im['file_name'].split('/')
+        tokens_id = im['id'].split('_')
+        assert tokens_fn[0] == tokens_id[0]
+        assert tokens_fn[1] == tokens_id[1]
+        location = tokens_fn[0] + '_' + tokens_fn[1]
+        im['location'] = location
+        locations.add(location)
+    locations = sorted(list(locations))
+    for s in locations:
+        print(s)
+    #%% Write output file
+    with open(output_fn,'w') as f:
+        json.dump(d,f,indent=1)
+    #%% Validate .json files
+    from megadetector.data_management.databases import integrity_check_json_db
+    options = integrity_check_json_db.IntegrityCheckOptions()
+    options.baseDir = image_directory
+    options.bCheckImageSizes = False
+    options.bCheckImageExistence = True
+    options.bFindUnusedImages = True
+    sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
+    #%% Preview labels
+    from megadetector.visualization import visualize_db
+    viz_options = visualize_db.DbVizOptions()
+    viz_options.num_to_visualize = 2000
+    viz_options.trim_to_images_with_bboxes = False
+    viz_options.add_search_links = False
+    viz_options.sort_by_filename = False
+    viz_options.parallelize_rendering = True
+    viz_options.classes_to_exclude = ['test']
+    html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
+                                                             output_dir=preview_folder,
+                                                             image_base_dir=image_directory,
+                                                             options=viz_options)
+    from megadetector.utils import path_utils
+    path_utils.open_file(html_output_file)
+    #%% Zip output file
+    from megadetector.utils.path_utils import zip_file
+    zip_file(output_fn, verbose=True)
+    assert os.path.isfile(output_fn + '.zip')

megadetector/data_management/lila/add_locations_to_nacti.py CHANGED Viewed

@@ -21,127 +21,131 @@ input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
 output_file = r'g:\temp\nacti_metadata.1.14.json'
-#%% Read metadata
+#%% Prevent execution during testing
-with open(input_file,'r') as f:
-    d = json.load(f)
-assert d['info']['version'] == 1.13
-#%% Map images to locations (according to the metadata)
-file_name_to_original_location = {}
-# im = dataset_labels['images'][0]
-for im in tqdm(d['images']):
-    file_name_to_original_location[im['file_name']] = im['location']
-original_locations = set(file_name_to_original_location.values())
-print('Found {} locations in the original metadata:'.format(len(original_locations)))
-for loc in original_locations:
-    print('[{}]'.format(loc))
+if False:
-#%% Map images to new locations
-def path_to_location(relative_path):
-    relative_path = relative_path.replace('\\','/')
-    if relative_path in file_name_to_original_location:
-        location_name = file_name_to_original_location[relative_path]
-        if location_name == 'San Juan Mntns, Colorado':
-            # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
-            tokens = relative_path.split('/')[-1].split('_')
-            assert tokens[1].startswith('Unit')
-            location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
-        elif location_name == 'Lebec, California':
-            # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
-            tokens = relative_path.split('/')[-1].split('_')
-            assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
-            location_name = 'lebec_{}'.format(tokens[0])
-        elif location_name == 'Archbold, FL':
-            # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
-            tokens = relative_path.split('/')[-1].split('_')
-            assert tokens[0].startswith('FL-')
-            location_name = 'archbold_{}'.format(tokens[0])
+    #%% Read metadata
+    with open(input_file,'r') as f:
+        d = json.load(f)
+    assert d['info']['version'] == 1.13
+    #%% Map images to locations (according to the metadata)
+    file_name_to_original_location = {}
+    # im = dataset_labels['images'][0]
+    for im in tqdm(d['images']):
+        file_name_to_original_location[im['file_name']] = im['location']
+    original_locations = set(file_name_to_original_location.values())
+    print('Found {} locations in the original metadata:'.format(len(original_locations)))
+    for loc in original_locations:
+        print('[{}]'.format(loc))
+    #%% Map images to new locations
+    def path_to_location(relative_path):
+        relative_path = relative_path.replace('\\','/')
+        if relative_path in file_name_to_original_location:
+            location_name = file_name_to_original_location[relative_path]
+            if location_name == 'San Juan Mntns, Colorado':
+                # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
+                tokens = relative_path.split('/')[-1].split('_')
+                assert tokens[1].startswith('Unit')
+                location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
+            elif location_name == 'Lebec, California':
+                # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
+                tokens = relative_path.split('/')[-1].split('_')
+                assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
+                location_name = 'lebec_{}'.format(tokens[0])
+            elif location_name == 'Archbold, FL':
+                # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
+                tokens = relative_path.split('/')[-1].split('_')
+                assert tokens[0].startswith('FL-')
+                location_name = 'archbold_{}'.format(tokens[0])
+            else:
+                assert location_name == ''
+                tokens = relative_path.split('/')[-1].split('_')
+                if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
+                    location_name = '{}'.format(tokens[0])
         else:
-            assert location_name == ''
-            tokens = relative_path.split('/')[-1].split('_')
-            if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
-                location_name = '{}'.format(tokens[0])
-    else:
+            location_name = 'unknown'
-        location_name = 'unknown'
+        # print('Returning location {} for file {}'.format(location_name,relative_path))
-    # print('Returning location {} for file {}'.format(location_name,relative_path))
-    return location_name
-file_name_to_updated_location = {}
-updated_location_to_count = defaultdict(int)
-for im in tqdm(d['images']):
+        return location_name
-    updated_location = path_to_location(im['file_name'])
-    file_name_to_updated_location[im['file_name']] = updated_location
-    updated_location_to_count[updated_location] += 1
-updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
-                         key=lambda item: item[1],
-                         reverse=True)}
-updated_locations = set(file_name_to_updated_location.values())
-print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
-for loc in updated_location_to_count:
-    print('{}: {}'.format(loc,updated_location_to_count[loc]))
-#%% Re-write metadata
-for im in d['images']:
-    im['location'] = file_name_to_updated_location[im['file_name']]
-d['info']['version'] = 1.14
-with open(output_file,'w') as f:
-    json.dump(d,f,indent=1)
+    file_name_to_updated_location = {}
+    updated_location_to_count = defaultdict(int)
+    for im in tqdm(d['images']):
+        updated_location = path_to_location(im['file_name'])
+        file_name_to_updated_location[im['file_name']] = updated_location
+        updated_location_to_count[updated_location] += 1
-#%% For each location, sample some random images to make sure they look consistent
-input_base = r'd:\lila\nacti-unzipped'
-assert os.path.isdir(input_base)
-location_to_images = defaultdict(list)
-for im in d['images']:
-    location_to_images[im['location']].append(im)
+    updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
+                             key=lambda item: item[1],
+                             reverse=True)}
-n_to_sample = 10
-import random
-random.seed(0)
-sampling_folder_base = r'g:\temp\nacti_samples'
-for location in tqdm(location_to_images):
+    updated_locations = set(file_name_to_updated_location.values())
-    images_this_location = location_to_images[location]
-    if len(images_this_location) > n_to_sample:
-        images_this_location = random.sample(images_this_location,n_to_sample)
-    for i_image,im in enumerate(images_this_location):
+    print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
+    for loc in updated_location_to_count:
+        print('{}: {}'.format(loc,updated_location_to_count[loc]))
+    #%% Re-write metadata
+    for im in d['images']:
+        im['location'] = file_name_to_updated_location[im['file_name']]
+    d['info']['version'] = 1.14
+    with open(output_file,'w') as f:
+        json.dump(d,f,indent=1)
-        fn_relative = im['file_name']
-        source_fn_abs = os.path.join(input_base,fn_relative)
-        assert os.path.isfile(source_fn_abs)
-        ext = os.path.splitext(fn_relative)[1]
-        target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
-            location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
-        os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
-        shutil.copyfile(source_fn_abs,target_fn_abs)
+    #%% For each location, sample some random images to make sure they look consistent
+    input_base = r'd:\lila\nacti-unzipped'
+    assert os.path.isdir(input_base)
+    location_to_images = defaultdict(list)
+    for im in d['images']:
+        location_to_images[im['location']].append(im)
-    # ...for each image
+    n_to_sample = 10
+    import random
+    random.seed(0)
+    sampling_folder_base = r'g:\temp\nacti_samples'
-# ...for each location
+    for location in tqdm(location_to_images):
+        images_this_location = location_to_images[location]
+        if len(images_this_location) > n_to_sample:
+            images_this_location = random.sample(images_this_location,n_to_sample)
+        for i_image,im in enumerate(images_this_location):
+            fn_relative = im['file_name']
+            source_fn_abs = os.path.join(input_base,fn_relative)
+            assert os.path.isfile(source_fn_abs)
+            ext = os.path.splitext(fn_relative)[1]
+            target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
+                location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
+            os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
+            shutil.copyfile(source_fn_abs,target_fn_abs)
+        # ...for each image
+    # ...for each location

megadetector 5.0.23__py3-none-any.whl → 5.0.25__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.23py3-none-any.whl → 5.0.25py3-none-any.whl