PyPI - megadetector - Versions diffs - 5.0.15__py3-none-any.whl → 5.0.17__py3-none-any.whl - Mend

megadetector 5.0.15py3-none-any.whl → 5.0.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (34) hide show

megadetector/postprocessing/md_to_coco.py CHANGED Viewed

@@ -31,11 +31,18 @@ def md_to_coco(md_results_file,
                validate_image_sizes=False,
                info=None,
                preserve_nonstandard_metadata=True,
-               include_failed_images=True):
+               include_failed_images=True,
+               include_annotations_without_bounding_boxes=True,
+               empty_category_id='0'):
     """
     "Converts" MegaDetector output files to COCO format.  "Converts" is in quotes because
     this is an opinionated transformation that requires a confidence threshold.
+    The default confidence threshold is not 0; the assumption is that by default, you are
+    going to treat the resulting COCO file as a set of labels.  If you are using the resulting COCO
+    file to evaluate a detector, you likely want a default confidence threshold of 0.  Confidence
+    values will be written to the semi-standard "score" field for each image
     A folder of images is required if width and height information are not available
     in the MD results file.
@@ -54,8 +61,13 @@ def md_to_coco(md_results_file,
         preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
             non-standard "conf" field in each annotation, and any random fields present in each image's data
             (e.g. EXIF metadata) will be propagated to COCO output
-        include_failed_images (boo, optional): if this is True, failed images will be propagated to COCO output
+        include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
             with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
+        include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
+            only class labels (no bounding boxes) will be included in the output.  If this is False, empty
+            images will be represented with no annotations.
+        empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
+            attached to any bounding boxes
     Returns:
         dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
@@ -67,6 +79,8 @@ def md_to_coco(md_results_file,
     coco_images = []
     coco_annotations = []
+    print('Converting MD results to COCO...')
     # im = md_results['images'][0]
     for im in tqdm(md_results['images']):
@@ -129,13 +143,13 @@ def md_to_coco(md_results_file,
             coco_category_id = int(md_category_id)
             ann['category_id'] = coco_category_id
-            # In very esoteric cases, we use the empty category (0) in MD-formatted output files
-            if md_category_id != '0':
+            if md_category_id != empty_category_id:
                 assert 'bbox' in detection,\
                     'Oops: non-empty category with no bbox in {}'.format(im['file'])
                 ann['bbox'] = detection['bbox']
                 # MegaDetector: [x,y,width,height] (normalized, origin upper-left)
                 # COCO: [x,y,width,height] (absolute, origin upper-left)
                 ann['bbox'][0] = ann['bbox'][0] * coco_im['width']
@@ -144,13 +158,19 @@ def md_to_coco(md_results_file,
                 ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
             else:
-                print('Warning: empty category annotation in file {}'.format(im['file']))
+                # In very esoteric cases, we use the empty category (0) in MD-formatted output files
+                print('Warning: empty category ({}) used for annotation in file {}'.format(
+                    empty_category_id,im['file']))
+                pass
             if preserve_nonstandard_metadata:
-                ann['conf'] = detection['conf']
-            coco_annotations.append(ann)
+                # "Score" is a semi-standard string here, recognized by at least pycocotools
+                # ann['conf'] = detection['conf']
+                ann['score'] = detection['conf']
+            if 'bbox' in ann or include_annotations_without_bounding_boxes:
+                coco_annotations.append(ann)
         # ...for each detection
@@ -176,6 +196,8 @@ def md_to_coco(md_results_file,
                          'name':md_results['detection_categories'][md_category_id]}
         output_dict['categories'].append(coco_category)
+    print('Writing COCO output file...')
     if coco_output_file is not None:
         with open(coco_output_file,'w') as f:
             json.dump(output_dict,f,indent=1)

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -770,7 +770,7 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
             if det['conf'] > max_conf:
                 max_conf = det['conf']
-            if ('classifications' in det):
+            if ('classifications' in det) and (len(det['classifications']) > 0):
                 # This is a list of [class,confidence] pairs, sorted by confidence
                 classifications = det['classifications']
@@ -949,6 +949,13 @@ def process_batch_results(options):
               f'negative, {n_positive} positive, {n_unknown} unknown, '
               f'{n_ambiguous} ambiguous')
+        if n_positive == 0:
+            print('\n*** Warning: no positives found in ground truth, analysis won\'t be very meaningful ***\n')
+        if n_negative == 0:
+            print('\n*** Warning: no negatives found in ground truth, analysis won\'t be very meaningful ***\n')
+        if n_ambiguous > 0:
+            print('\n*** Warning: {} images with ambiguous positive/negative status found in ground truth ***\n'.format(
+                n_ambiguous))
     ##%% Load detection (and possibly classification) results
@@ -1095,25 +1102,34 @@ def process_batch_results(options):
         ##%% Detection evaluation: compute precision/recall
-        # numpy array of detection probabilities
+        # numpy array of maximum confidence values
         p_detection = detections_df['max_detection_conf'].values
-        n_detections = len(p_detection)
+        n_detection_values = len(p_detection)
         # numpy array of bools (0.0/1.0), and -1 as null value
-        gt_detections = np.zeros(n_detections, dtype=float)
+        gt_detections = np.zeros(n_detection_values, dtype=float)
+        n_positive = 0
+        n_negative = 0
         for i_detection, fn in enumerate(detector_files):
             image_id = ground_truth_indexed_db.filename_to_id[fn]
             image = ground_truth_indexed_db.image_id_to_image[image_id]
             detection_status = image['_detection_status']
             if detection_status == DetectionStatus.DS_NEGATIVE:
                 gt_detections[i_detection] = 0.0
+                n_negative += 1
             elif detection_status == DetectionStatus.DS_POSITIVE:
                 gt_detections[i_detection] = 1.0
+                n_positive += 1
             else:
                 gt_detections[i_detection] = -1.0
+        print('Of {} ground truth values, found {} positives and {} negatives'.format(
+            len(detections_df),n_positive,n_negative))
         # Don't include ambiguous/unknown ground truth in precision/recall analysis
         b_valid_ground_truth = gt_detections >= 0.0
@@ -1187,13 +1203,13 @@ def process_batch_results(options):
         # Rows / first index is ground truth, columns / second index is predicted category
         classifier_cm = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
-        # iDetection = 0; fn = detector_files[iDetection]; print(fn)
+        # i_detection = 0; fn = detector_files[i_detection]; print(fn)
         assert len(detector_files) == len(detections_df)
-        for iDetection, fn in enumerate(detector_files):
+        for i_detection, fn in enumerate(detector_files):
             image_id = ground_truth_indexed_db.filename_to_id[fn]
             image = ground_truth_indexed_db.image_id_to_image[image_id]
-            detections = detections_df['detections'].iloc[iDetection]
+            detections = detections_df['detections'].iloc[i_detection]
             pred_class_ids = [det['classifications'][0][0] \
                 for det in detections if 'classifications' in det.keys()]
             pred_classnames = [classification_categories[pd] for pd in pred_class_ids]

megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py CHANGED Viewed

@@ -210,9 +210,12 @@ class RepeatDetectionOptions:
         #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
         #:
         #: See ct_utils for a common replacement function that handles most common
-        #: manufacturer folder names.
+        #: manufacturer folder names:
+        #:
+        #: from megadetector.utils import ct_utils
+        #: self.customDirNameFunction = ct_utils.image_file_to_camera_folder
         self.customDirNameFunction = None
         #: Include only specific folders, mutually exclusive with [excludeFolders]
         self.includeFolders = None

megadetector/postprocessing/subset_json_detector_output.py CHANGED Viewed

@@ -124,7 +124,7 @@ class SubsetJsonDetectorOutputOptions:
         self.remove_failed_images = False
         #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
-        #: (as string-ints) (not names) to thresholds.  Removes  non-matching detections, does not
+        #: (as string-ints) (not names) to thresholds.  Removes non-matching detections, does not
         #: remove images.  Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
         #: scenario indeed where you would want to specify both.
         self.categories_to_keep = None
@@ -517,7 +517,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
     else:
         # Map images to unique folders
-        print('Finding unique folders')
+        print('Finding unique folders')
         folders_to_images = {}
@@ -670,16 +670,26 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('input_file', type=str, help='Input .json filename')
     parser.add_argument('output_file', type=str, help='Output .json filename')
-    parser.add_argument('--query', type=str, default=None, help='Query string to search for (omitting this matches all)')
-    parser.add_argument('--replacement', type=str, default=None, help='Replace [query] with this')
-    parser.add_argument('--confidence_threshold', type=float, default=None, help='Remove detections below this confidence level')
-    parser.add_argument('--split_folders', action='store_true', help='Split .json files by leaf-node folder')
-    parser.add_argument('--split_folder_param', type=int, help='Directory level count for n_from_bottom and n_from_top splitting')
-    parser.add_argument('--split_folder_mode', type=str, help='Folder level to use for splitting ("top" or "bottom")')
-    parser.add_argument('--make_folder_relative', action='store_true', help='Make image paths relative to their containing folder (only meaningful with split_folders)')
-    parser.add_argument('--overwrite_json_files', action='store_true', help='Overwrite output files')
-    parser.add_argument('--copy_jsons_to_folders', action='store_true', help='When using split_folders and make_folder_relative, copy jsons to their corresponding folders (relative to output_file)')
-    parser.add_argument('--create_folders', action='store_true', help='When using copy_jsons_to_folders, create folders that don''t exist')
+    parser.add_argument('--query', type=str, default=None,
+                        help='Query string to search for (omitting this matches all)')
+    parser.add_argument('--replacement', type=str, default=None,
+                        help='Replace [query] with this')
+    parser.add_argument('--confidence_threshold', type=float, default=None,
+                        help='Remove detections below this confidence level')
+    parser.add_argument('--split_folders', action='store_true',
+                        help='Split .json files by leaf-node folder')
+    parser.add_argument('--split_folder_param', type=int,
+                        help='Directory level count for n_from_bottom and n_from_top splitting')
+    parser.add_argument('--split_folder_mode', type=str,
+                        help='Folder level to use for splitting ("top" or "bottom")')
+    parser.add_argument('--make_folder_relative', action='store_true',
+                        help='Make image paths relative to their containing folder (only meaningful with split_folders)')
+    parser.add_argument('--overwrite_json_files', action='store_true',
+                        help='Overwrite output files')
+    parser.add_argument('--copy_jsons_to_folders', action='store_true',
+                        help='When using split_folders and make_folder_relative, copy jsons to their corresponding folders (relative to output_file)')
+    parser.add_argument('--create_folders', action='store_true',
+                        help='When using copy_jsons_to_folders, create folders that don''t exist')
     if len(sys.argv[1:]) == 0:
         parser.print_help()

megadetector/taxonomy_mapping/map_new_lila_datasets.py CHANGED Viewed

@@ -15,10 +15,10 @@ import json
 # Created by get_lila_category_list.py
 input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
-output_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
+output_file = os.path.expanduser('~/lila/lila_additions_2024.07.16.csv')
 datasets_to_map = [
-    'Trail Camera Images of New Zealand Animals'
+    'Desert Lion Conservation Camera Traps'
     ]
@@ -133,7 +133,7 @@ if False:
     # q = 'white-throated monkey'
     # q = 'cingulata'
     # q = 'notamacropus'
-    q = 'porzana'
+    q = 'aves'
     taxonomy_preference = 'inat'
     m = get_preferred_taxonomic_match(q,taxonomy_preference)
     # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)

megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py CHANGED Viewed

@@ -24,7 +24,7 @@ if False:
     release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
     # import clipboard; clipboard.copy(release_taxonomy_file)
-    # Created by get_lila_category_list.py... contains counts for each category
+    # Created by get_lila_annotation_counts.py... contains counts for each category
     lila_dataset_to_categories_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
     assert os.path.isfile(lila_dataset_to_categories_file)
@@ -140,3 +140,4 @@ if False:
     print('Wrote final output to {}'.format(release_taxonomy_file))
+# ...if False

megadetector/taxonomy_mapping/preview_lila_taxonomy.py CHANGED Viewed

@@ -16,7 +16,7 @@ import os
 import pandas as pd
 # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
-lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
+lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.07.16.csv')
 preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
 os.makedirs(preview_base,exist_ok=True)

megadetector/taxonomy_mapping/simple_image_download.py CHANGED Viewed

@@ -8,6 +8,11 @@ Slightly modified from:
 https://github.com/RiddlerQ/simple_image_download
+pip install python-magic
+# On Windows, also run:
+pip install python-magic-bin
 """
 #%% Imports

megadetector/taxonomy_mapping/species_lookup.py CHANGED Viewed

@@ -208,7 +208,7 @@ def initialize_taxonomy_lookup(force_init=False) -> None:
     # Load GBIF taxonomy
     gbif_taxonomy_file = os.path.join(taxonomy_download_dir, 'GBIF', 'Taxon.tsv')
     print('Loading GBIF taxonomy from {}'.format(gbif_taxonomy_file))
-    gbif_taxonomy = pd.read_csv(gbif_taxonomy_file, sep='\t')
+    gbif_taxonomy = pd.read_csv(gbif_taxonomy_file, sep='\t', encoding='utf-8',on_bad_lines='warn')
     gbif_taxonomy['scientificName'] = gbif_taxonomy['scientificName'].fillna('').str.strip()
     gbif_taxonomy['canonicalName'] = gbif_taxonomy['canonicalName'].fillna('').str.strip()

megadetector/utils/ct_utils.py CHANGED Viewed

@@ -16,6 +16,8 @@ import os
 import jsonpickle
 import numpy as np
+from operator import itemgetter
 # List of file extensions we'll consider images; comparisons will be case-insensitive
 # (i.e., no need to include both .jpg and .JPG on this list).
 image_extensions = ['.jpg', '.jpeg', '.gif', '.png']
@@ -294,6 +296,29 @@ def get_max_conf(im):
     return max_conf
+def sort_results_for_image(im):
+    """
+    Sort classification and detection results in descending order by confidence (in place).
+    Args:
+        im (dict): image dictionary in the MD output format (with a 'detections' field)
+    """
+    if 'detections' not in im or im['detections'] is None:
+        return
+    # Sort detections in descending order by confidence
+    im['detections'] = sort_list_of_dicts_by_key(im['detections'],k='conf',reverse=True)
+    for det in im['detections']:
+        # Sort classifications (which are (class,conf) tuples) in descending order by confidence
+        if 'classifications' in det and \
+            (det['classifications'] is not None) and \
+            (len(det['classifications']) > 0):
+            L = det['classifications']
+            det['classifications'] = sorted(L,key=itemgetter(1),reverse=True)
 def point_dist(p1,p2):
     """
     Computes the distance between two points, represented as length-two tuples.
@@ -406,6 +431,21 @@ def split_list_into_n_chunks(L, n, chunk_strategy='greedy'):
         raise ValueError('Invalid chunk strategy: {}'.format(chunk_strategy))
+def sort_list_of_dicts_by_key(L,k,reverse=False):
+    """
+    Sorts the list of dictionaries [L] by the key [k].
+    Args:
+        L (list): list of dictionaries to sort
+        k (object, typically str): the sort key
+        reverse (bool, optional): whether to sort in reverse (descending) order
+    Returns:
+        dict: sorted copy of [d]
+    """
+    return sorted(L, key=lambda d: d[k], reverse=reverse)
 def sort_dictionary_by_key(d,reverse=False):
     """
     Sorts the dictionary [d] by key.
@@ -611,3 +651,11 @@ if False:
     r1 = [0.4,0.8,10,22]; r2 = [100, 101, 200, 210.4]; assert abs(rect_distance(r1,r2)-119.753) < 0.001
     r1 = [0.4,0.8,10,22]; r2 = [101, 101, 200, 210.4]; assert abs(rect_distance(r1,r2)-120.507) < 0.001
     r1 = [0.4,0.8,10,22]; r2 = [120, 120, 200, 210.4]; assert abs(rect_distance(r1,r2)-147.323) < 0.001
+    #%% Test dictionary sorting
+    L = [{'a':5},{'a':0},{'a':10}]
+    k = 'a'
+    sort_list_of_dicts_by_key(L, k, reverse=True)

megadetector 5.0.15__py3-none-any.whl → 5.0.17__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.15py3-none-any.whl → 5.0.17py3-none-any.whl