PyPI - megadetector - Versions diffs - 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl - Mend

megadetector 5.0.24py3-none-any.whl → 5.0.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show

megadetector/data_management/lila/test_lila_metadata_urls.py CHANGED Viewed

@@ -31,9 +31,10 @@ os.makedirs(metadata_dir,exist_ok=True)
 md_results_dir = os.path.join(lila_local_base,'md_results')
 os.makedirs(md_results_dir,exist_ok=True)
-md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
+md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw',
+                   'md1000-redwood_results_raw','md_results_with_rde']
-preferred_cloud = 'gcp' # 'azure', 'aws'
+preferred_cloud = None # 'gcp' # 'azure', 'aws'
 force_download = True
@@ -52,7 +53,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
 #%% Download and extract metadata and MD results for each dataset
-# Takes ~60 seconds if everything needs to beo downloaded and unzipped
+# Takes ~60 seconds if everything needs to be downloaded and unzipped
 for ds_name in metadata_table.keys():
@@ -88,6 +89,8 @@ url_to_source = {}
 # so we pick a semi-arbitrary image that isn't the first.  How about the 2000th?
 image_index = 2000
+# TODO: parallelize this loop
+#
 # ds_name = list(metadata_table.keys())[0]
 for ds_name in metadata_table.keys():
@@ -101,13 +104,21 @@ for ds_name in metadata_table.keys():
     with open(json_filename, 'r') as f:
         data = json.load(f)
-    image_base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
-    assert not image_base_url.endswith('/')
-    # Download a test image
-    test_image_relative_path = data['images'][image_index]['file_name']
-    test_image_url = image_base_url + '/' + test_image_relative_path
-    url_to_source[test_image_url] = ds_name + ' metadata'
+    if preferred_cloud is not None:
+        clouds = [preferred_cloud]
+    else:
+        clouds = ['gcp','aws','azure']
+    for cloud in clouds:
+        image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
+        assert not image_base_url.endswith('/')
+        # Download a test image
+        test_image_relative_path = data['images'][image_index]['file_name']
+        test_image_url = image_base_url + '/' + test_image_relative_path
+        url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
     # Grab an image from the MegaDetector results

megadetector/data_management/remap_coco_categories.py CHANGED Viewed

@@ -12,6 +12,7 @@ import os
 import json
 from copy import deepcopy
+from megadetector.utils.ct_utils import invert_dictionary
 #%% Main function
@@ -19,17 +20,27 @@ from copy import deepcopy
 def remap_coco_categories(input_data,
                           output_category_name_to_id,
                           input_category_name_to_output_category_name,
-                          output_file=None):
+                          output_file=None,
+                          allow_unused_categories=False):
     """
     Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
     writing the results to a new file.
-    output_category_name_to_id is a dict mapping strings to ints.
+    Args:
+        input_data (str or dict): a COCO-formatted dict or a filename.  If it's a dict, it will
+            be copied, not modified in place.
+        output_category_name_to_id (dict) a dict mapping strings to ints.  Categories not in
+            this dict will be ignored or will result in errors, depending on allow_unused_categories.
+        input_category_name_to_output_category_name: a dict mapping strings to strings.
+            Annotations using categories not in this dict will be omitted or will result in
+            errors, depending on allow_unused_categories.
+        output_file (str, optional): output file to which we should write remapped COCO data
+        allow_unused_categories (bool, optional): should we ignore categories not present in the
+            input/output mappings?  If this is False and we encounter an unmapped category, we'll
+            error.
-    input_category_name_to_output_category_name is a dict mapping strings to strings.
-    [input_data] can be a COCO-formatted dict or a filename.  If it's a dict, it will be copied,
-    not modified in place.
+    Returns:
+        dict: COCO-formatted dict
     """
     if isinstance(input_data,str):
@@ -48,23 +59,59 @@ def remap_coco_categories(input_data,
     input_category_name_to_input_category_id = {}
     for c in input_data['categories']:
         input_category_name_to_input_category_id[c['name']] = c['id']
+    input_category_id_to_input_category_name = \
+        invert_dictionary(input_category_name_to_input_category_id)
     # Map input IDs --> output IDs
     input_category_id_to_output_category_id = {}
-    for input_name in input_category_name_to_output_category_name.keys():
+    input_category_names = list(input_category_name_to_output_category_name.keys())
+    # input_name = input_category_names[0]
+    for input_name in input_category_names:
         output_name = input_category_name_to_output_category_name[input_name]
         assert output_name in output_category_name_to_id, \
             'No output ID for {} --> {}'.format(input_name,output_name)
         input_id = input_category_name_to_input_category_id[input_name]
         output_id = output_category_name_to_id[output_name]
         input_category_id_to_output_category_id[input_id] = output_id
+    # ...for each category we want to keep
+    printed_unused_category_warnings = set()
+    valid_annotations = []
     # Map annotations
     for ann in output_data['annotations']:
-        assert ann['category_id'] in input_category_id_to_output_category_id, \
-            'Unrecognized category ID {}'.format(ann['category_id'])
-        ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
+        input_category_id = ann['category_id']
+        if input_category_id not in input_category_id_to_output_category_id:
+            if allow_unused_categories:
+                if input_category_id not in printed_unused_category_warnings:
+                    printed_unused_category_warnings.add(input_category_id)
+                    input_category_name = \
+                        input_category_id_to_input_category_name[input_category_id]
+                    s = 'Skipping unmapped category ID {} ({})'.format(
+                        input_category_id,input_category_name)
+                    print(s)
+                continue
+            else:
+                s = 'Unmapped category ID {}'.format(input_category_id)
+                raise ValueError(s)
+        output_category_id = input_category_id_to_output_category_id[input_category_id]
+        ann['category_id'] = output_category_id
+        valid_annotations.append(ann)
+    # ...for each annotation
+    # The only reason annotations should get excluded is the case where we allow
+    # unused categories
+    if not allow_unused_categories:
+        assert len(valid_annotations) == len(output_data['annotations'])
+    output_data['annotations'] = valid_annotations
     # Update the category list
     output_categories = []
     for output_name in output_category_name_to_id:
@@ -78,6 +125,8 @@ def remap_coco_categories(input_data,
     return input_data
+# ...def remap_coco_categories(...)
 #%% Command-line driver

megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} RENAMED Viewed

@@ -2,7 +2,7 @@
 wi_to_md.py
-Converts the WI predictions.json format to MD .json format.  This is just a
+Converts the WI (SpeciesNet) predictions.json format to MD .json format.  This is just a
 command-line wrapper around utils.wi_utils.generate_md_results_from_predictions_json.
 """
@@ -20,7 +20,7 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('predictions_json_file', action='store', type=str,
-                        help='.json file to convert from predictions.json format to MD format')
+                        help='.json file to convert from SpeciesNet predictions.json format to MD format')
     parser.add_argument('md_results_file', action='store', type=str,
                         help='output file to write in MD format')
     parser.add_argument('--base_folder', action='store', type=str, default=None,

megadetector/data_management/yolo_to_coco.py CHANGED Viewed

@@ -34,7 +34,7 @@ def _filename_to_image_id(fn):
     return fn.replace(' ','_').replace('\\','/')
-def _process_image(fn_abs,input_folder,category_id_to_name):
+def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
     """
     Internal support function for processing one image's labels.
     """
@@ -42,8 +42,8 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
     # Create the image object for this image
     #
     # Always use forward slashes in image filenames and IDs
-    fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
-    image_id = _filename_to_image_id(fn_relative)
+    image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
+    image_id = _filename_to_image_id(image_fn_relative)
     # This is done in a separate loop now
     #
@@ -53,7 +53,7 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
     # image_ids.add(image_id)
     im = {}
-    im['file_name'] = fn_relative
+    im['file_name'] = image_fn_relative
     im['id'] = image_id
     annotations_this_image = []
@@ -65,14 +65,20 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
         im['height'] = im_height
         im['error'] = None
     except Exception as e:
-        print('Warning: error reading {}:\n{}'.format(fn_relative,str(e)))
+        print('Warning: error reading {}:\n{}'.format(image_fn_relative,str(e)))
         im['width'] = -1
         im['height'] = -1
         im['error'] = str(e)
         return (im,annotations_this_image)
     # Is there an annotation file for this image?
-    annotation_file = os.path.splitext(fn_abs)[0] + '.txt'
+    if label_folder is not None:
+        assert input_folder in fn_abs
+        label_file_abs_base = fn_abs.replace(input_folder,label_folder)
+    else:
+        label_file_abs_base = fn_abs
+    annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
     if not os.path.isfile(annotation_file):
         annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
@@ -270,9 +276,14 @@ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
 # ...def validate_label_file(...)
-def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
+def validate_yolo_dataset(input_folder,
+                          class_name_file,
+                          n_workers=1,
+                          pool_type='thread',
+                          verbose=False):
     """
-    Verifies all the labels in a YOLO dataset folder.
+    Verifies all the labels in a YOLO dataset folder.  Does not yet support the case where the
+    labels and images are in different folders (yolo_to_coco() supports this).
     Looks for:
@@ -396,14 +407,17 @@ def yolo_to_coco(input_folder,
                  recursive=True,
                  exclude_string=None,
                  include_string=None,
-                 overwrite_handling='overwrite'):
+                 overwrite_handling='overwrite',
+                 label_folder=None):
     """
     Converts a YOLO-formatted dataset to a COCO-formatted dataset.
     All images will be assigned an "error" value, usually None.
     Args:
-        input_folder (str): the YOLO dataset folder to validate
+        input_folder (str): the YOLO dataset folder to convert.  If the image and label
+            folders are different, this is the image folder, and [label_folder] is the
+            label folder.
         class_name_file (str or list): a list of classes, a flat text file, or a yolo
             dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
             input_folder as the base folder, though this is not explicitly checked.
@@ -432,6 +446,7 @@ def yolo_to_coco(input_folder,
         include_string (str, optional): include only images whose filename contains a string
         overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
             'error')
+        label_folder (str, optional): label folder, if different from the image folder
     Returns:
         dict: COCO-formatted data, the same as what's written to [output_file]
@@ -439,6 +454,8 @@ def yolo_to_coco(input_folder,
     ## Validate input
+    input_folder = input_folder.replace('\\','/')
     assert os.path.isdir(input_folder)
     assert os.path.isfile(class_name_file)
@@ -487,6 +504,7 @@ def yolo_to_coco(input_folder,
     print('Enumerating images...')
     image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
+    assert not any(['\\' in fn for fn in image_files_abs])
     n_files_original = len(image_files_abs)
@@ -516,8 +534,14 @@ def yolo_to_coco(input_folder,
     if not allow_images_without_label_files:
         print('Verifying that label files exist')
+        # image_file_abs = image_files_abs[0]
         for image_file_abs in tqdm(image_files_abs):
-            label_file_abs = os.path.splitext(image_file_abs)[0] + '.txt'
+            if label_folder is not None:
+                assert input_folder in image_file_abs
+                label_file_abs_base = image_file_abs.replace(input_folder,label_folder)
+            else:
+                label_file_abs_base = image_file_abs
+            label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
             assert os.path.isfile(label_file_abs), \
                 'No annotation file for {}'.format(image_file_abs)
@@ -528,7 +552,7 @@ def yolo_to_coco(input_folder,
     for fn_abs in tqdm(image_files_abs):
-        fn_relative = os.path.relpath(fn_abs,input_folder)
+        fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
         image_id = _filename_to_image_id(fn_relative)
         assert image_id not in image_ids, \
             'Oops, you have hit a very esoteric case where you have the same filename ' + \
@@ -543,8 +567,12 @@ def yolo_to_coco(input_folder,
     if n_workers <= 1:
         image_results = []
+        # fn_abs = image_files_abs[0]
         for fn_abs in tqdm(image_files_abs):
-            image_results.append(_process_image(fn_abs,input_folder,category_id_to_name))
+            image_results.append(_process_image(fn_abs,
+                                                input_folder,
+                                                category_id_to_name,
+                                                label_folder))
     else:
@@ -557,8 +585,10 @@ def yolo_to_coco(input_folder,
         print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
-        p = partial(_process_image,input_folder=input_folder,
-                    category_id_to_name=category_id_to_name)
+        p = partial(_process_image,
+                    input_folder=input_folder,
+                    category_id_to_name=category_id_to_name,
+                    label_folder=label_folder)
         image_results = list(tqdm(pool.imap(p, image_files_abs),
                                   total=len(image_files_abs)))

megadetector/detection/run_detector.py CHANGED Viewed

@@ -96,6 +96,7 @@ model_string_to_model_version = {
     'cedar':'v1000.0.0-cedar',
     'larch':'v1000.0.0-larch',
     'default':'v5a.0.0',
+    'default-model':'v5a.0.0',
     'megadetector':'v5a.0.0'
 }

megadetector/detection/run_detector_batch.py CHANGED Viewed

@@ -735,7 +735,7 @@ def load_and_run_detector_batch(model_file,
     """
     # Validate input arguments
-    if n_cores is None:
+    if n_cores is None or n_cores <= 0:
         n_cores = 1
     if confidence_threshold is None:
@@ -1331,13 +1331,14 @@ def main():
     parser.add_argument(
         '--ncores',
         type=int,
-        default=0,
-        help='Number of cores to use for inference; only applies to CPU-based inference')
+        default=1,
+        help='Number of cores to use for inference; only applies to CPU-based inference (default 1)')
     parser.add_argument(
         '--loader_workers',
         type=int,
         default=default_loaders,
-        help='Number of image loader workers to use; only relevant when --use_image_queue is set')
+        help='Number of image loader workers to use; only relevant when --use_image_queue ' + \
+            'is set (default {})'.format(default_loaders))
     parser.add_argument(
         '--class_mapping_filename',
         type=str,

megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.24py3-none-any.whl → 5.0.26py3-none-any.whl