PyPI - megadetector - Versions diffs - 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl - Mend

megadetector 10.0.9py3-none-any.whl → 10.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (84) hide show

megadetector/postprocessing/md_to_labelme.py CHANGED Viewed

@@ -28,6 +28,7 @@ from functools import partial
 from megadetector.visualization.visualization_utils import open_image
 from megadetector.utils.ct_utils import round_float
+from megadetector.utils.ct_utils import write_json
 from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP, FAILURE_IMAGE_OPEN
 output_precision = 3
@@ -36,8 +37,11 @@ default_confidence_threshold = 0.15
 #%% Functions
-def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
-                               info=None,confidence_threshold=None):
+def get_labelme_dict_for_image(im,
+                               image_base_name=None,
+                               category_id_to_name=None,
+                               info=None,
+                               confidence_threshold=None):
     """
     For the given image struct in MD results format, reformat the detections into
     labelme format.
@@ -60,7 +64,7 @@ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
     if image_base_name is None:
         image_base_name = os.path.basename(im['file'])
-    if category_id_to_name:
+    if category_id_to_name is None:
         category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP
     if confidence_threshold is None:
@@ -138,8 +142,7 @@ def _write_output_for_image(im,
                                              info=info,
                                              confidence_threshold=confidence_threshold)
-    with open(json_path,'w') as f:
-        json.dump(output_dict,f,indent=1)
+    write_json(json_path,output_dict)
 # ...def write_output_for_image(...)
@@ -256,9 +259,10 @@ def md_to_labelme(results_file,
                     md_results['images']),
                     total=len(md_results['images'])))
         finally:
-            pool.close()
-            pool.join()
-            print("Pool closed and joined for labelme file writes")
+            if pool is not None:
+                pool.close()
+                pool.join()
+                print("Pool closed and joined for labelme file writes")
     # ...for each image

megadetector/postprocessing/md_to_wi.py CHANGED Viewed

@@ -10,6 +10,7 @@ Converts the MD .json format to the WI predictions.json format.
 import sys
 import argparse
 from megadetector.utils.wi_taxonomy_utils import generate_predictions_json_from_md_results
@@ -34,7 +35,7 @@ def main(): # noqa
     generate_predictions_json_from_md_results(args.md_results_file,
                                               args.predictions_json_file,
-                                              base_folder=None)
+                                              base_folder=args.base_folder)
 if __name__ == '__main__':
     main()

megadetector/postprocessing/merge_detections.py CHANGED Viewed

@@ -23,6 +23,7 @@ import os
 from tqdm import tqdm
 from megadetector.utils.ct_utils import get_iou
+from megadetector.utils.ct_utils import write_json
 #%% Structs
@@ -121,8 +122,6 @@ def merge_detections(source_files,target_file,output_file,options=None):
     assert os.path.isfile(target_file)
-    os.makedirs(os.path.dirname(output_file),exist_ok=True)
     with open(target_file,'r') as f:
         output_data = json.load(f)
@@ -290,8 +289,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
     # ...for each source file
-    with open(output_file,'w') as f:
-        json.dump(output_data,f,indent=1)
+    write_json(output_file,output_data)
     print('Saved merged results to {}'.format(output_file))
@@ -308,7 +306,7 @@ def main():
     default_options = MergeDetectionsOptions()
     parser = argparse.ArgumentParser(
-        description='Merge detections from one or more MegaDetector results files into an existing reuslts file')
+        description='Merge detections from one or more MegaDetector results files into an existing results file')
     parser.add_argument(
         'source_files',
         nargs='+',
@@ -359,7 +357,7 @@ def main():
         type=int,
         nargs='+',
         default=None,
-        help='List of numeric detection categories to include')
+        help='List of numeric detection categories to exclude')
     parser.add_argument(
         '--merge_empty_only',
         action='store_true',

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -1889,8 +1889,9 @@ def process_batch_results(options):
             if options.include_classification_category_report:
                 # TODO: it's only for silly historical reasons that we re-read
-                # the input file in this case; we're not currently carrying the json
-                # representation around, only the Pandas representation.
+                # the input file in this case; because this module has used Pandas
+                # forever, we're not currently carrying the json representation around,
+                # only the Pandas representation.
                 print('Generating classification category report')
@@ -1905,7 +1906,7 @@ def process_batch_results(options):
                             if ('classifications' in det) and (len(det['classifications']) > 0):
                                 class_id = det['classifications'][0][0]
                                 if class_id not in classification_category_to_count:
-                                    classification_category_to_count[class_id] = 0
+                                    classification_category_to_count[class_id] = 1
                                 else:
                                     classification_category_to_count[class_id] = \
                                         classification_category_to_count[class_id] + 1

megadetector/postprocessing/remap_detection_categories.py CHANGED Viewed

@@ -18,6 +18,7 @@ import argparse
 from tqdm import tqdm
 from megadetector.utils.ct_utils import invert_dictionary
+from megadetector.utils.ct_utils import write_json
 #%% Main function
@@ -132,14 +133,16 @@ def remap_detection_categories(input_file,
         for det in im['detections']:
             det['category'] = input_category_id_to_output_category_id[det['category']]
-    input_data['detection_categories'] = target_category_map
+    # ...for each image
-    with open(output_file,'w') as f:
-        json.dump(input_data,f,indent=1)
+    input_data['detection_categories'] = target_category_map
+    write_json(output_file,input_data)
     print('Saved remapped results to {}'.format(output_file))
+# ...def remap_detection_categories(...)
 #%% Interactive driver

megadetector/postprocessing/render_detection_confusion_matrix.py CHANGED Viewed

@@ -252,9 +252,10 @@ def render_detection_confusion_matrix(ground_truth_file,
                                     md_formatted_results['images']),
                                     total=len(md_formatted_results['images'])))
         finally:
-            pool.close()
-            pool.join()
-            print("Pool closed and joined for confusion matrix rendering")
+            if pool is not None:
+                pool.close()
+                pool.join()
+                print("Pool closed and joined for confusion matrix rendering")
     else:
@@ -369,11 +370,15 @@ def render_detection_confusion_matrix(ground_truth_file,
         # If there were no detections at all, call this image empty
         if len(results_im['detections']) == 0:
             predicted_category_name = empty_category_name
         # Otherwise look for above-threshold detections
         else:
             results_category_name_to_confidence = defaultdict(int)
             for det in results_im['detections']:
                 category_name = results_category_id_to_name[det['category']]
                 detection_threshold = confidence_thresholds['default']
                 if category_name in confidence_thresholds:
@@ -381,12 +386,15 @@ def render_detection_confusion_matrix(ground_truth_file,
                 if det['conf'] > detection_threshold:
                     results_category_name_to_confidence[category_name] = max(
                         results_category_name_to_confidence[category_name],det['conf'])
-                # If there were no detections above threshold
-                if len(results_category_name_to_confidence) == 0:
-                    predicted_category_name = empty_category_name
-                else:
-                    predicted_category_name = max(results_category_name_to_confidence,
-                        key=results_category_name_to_confidence.get)
+            # ...for each detection
+            # If there were no detections above threshold
+            if len(results_category_name_to_confidence) == 0:
+                predicted_category_name = empty_category_name
+            else:
+                predicted_category_name = max(results_category_name_to_confidence,
+                    key=results_category_name_to_confidence.get)
         ground_truth_category_index = gt_category_name_to_category_index[ground_truth_category_name]
         predicted_category_index = gt_category_name_to_category_index[predicted_category_name]
@@ -396,7 +404,7 @@ def render_detection_confusion_matrix(ground_truth_file,
         confusion_matrix[ground_truth_category_index,predicted_category_index] += 1
-    # ...for each file
+    # ...for each ground truth file
     plt.ioff()

megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py CHANGED Viewed

@@ -37,7 +37,7 @@ def remove_repeat_detections(input_file,output_file,filtering_dir):
     """
     assert os.path.isfile(input_file), "Can't find file {}".format(input_file)
-    assert os.path.isdir(filtering_dir), "Can't find folder {}".format(filtering_dir)
+    assert os.path.exists(filtering_dir), "Can't find input file/folder {}".format(filtering_dir)
     options = repeat_detections_core.RepeatDetectionOptions()
     if os.path.isfile(filtering_dir):
         options.filterFileToLoad = filtering_dir

megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py CHANGED Viewed

@@ -869,7 +869,7 @@ def _update_detection_table(repeat_detection_results, options, output_file_name=
                 detection_to_modify = row_detections[instance.i_detection]
                 # Make sure the bounding box matches
-                assert (instance_bbox[0:3] == detection_to_modify['bbox'][0:3])
+                assert (instance_bbox[0:4] == detection_to_modify['bbox'][0:4])
                 # Make the probability negative, if it hasn't been switched by
                 # another bounding box
@@ -1149,7 +1149,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
         # Load the filtering file
         detection_index_file_name = options.filterFileToLoad
-        s_in = open(detection_index_file_name, 'r').read()
+        with open(detection_index_file_name, 'r') as f:
+            s_in = f.read()
         detection_info = jsonpickle.decode(s_in)
         filtering_base_dir = os.path.dirname(options.filterFileToLoad)
         suspicious_detections = detection_info['suspicious_detections']
@@ -1382,7 +1383,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
                 # candidate_detection_file = all_candidate_detection_files[0]
                 for candidate_detection_file in all_candidate_detection_files:
-                    s = open(candidate_detection_file, 'r').read()
+                    with open(candidate_detection_file, 'r') as f:
+                        s = f.read()
                     candidate_detections_this_file = jsonpickle.decode(s)
                     all_candidate_detections.append(candidate_detections_this_file)

megadetector/postprocessing/separate_detections_into_folders.py CHANGED Viewed

@@ -494,7 +494,8 @@ def separate_detections_into_folders(options):
     # Load detection results
     print('Loading detection results')
-    results = json.load(open(options.results_file))
+    with open(options.results_file,'r') as f:
+        results = json.load(f)
     images = results['images']
     for im in images:
@@ -618,8 +619,13 @@ def separate_detections_into_folders(options):
         print('Starting a pool with {} threads'.format(options.n_threads))
         pool = ThreadPool(options.n_threads)
-        process_detections_with_options = partial(_process_detections, options=options)
-        _ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
+        try:
+            process_detections_with_options = partial(_process_detections, options=options)
+            _ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
+        finally:
+            pool.close()
+            pool.join()
+            print('Pool closed and joined for folder separation')
     if options.remove_empty_folders:
         print('Removing empty folders from {}'.format(options.base_output_folder))
@@ -736,7 +742,7 @@ def main(): # noqa
                         help='Line thickness (in pixels) for rendering, only meaningful if ' + \
                              'using render_boxes (defaults to {})'.format(
                              default_line_thickness))
-    parser.add_argument('--box_expansion', type=int, default=default_line_thickness,
+    parser.add_argument('--box_expansion', type=int, default=default_box_expansion,
                         help='Box expansion (in pixels) for rendering, only meaningful if ' + \
                              'using render_boxes (defaults to {})'.format(
                              default_box_expansion))

megadetector/postprocessing/subset_json_detector_output.py CHANGED Viewed

@@ -433,7 +433,7 @@ def subset_json_detector_output_by_list(data, options):
     """
     if options.keep_files_in_list is None:
-        return
+        return data
     files_to_keep = None

megadetector/postprocessing/top_folders_to_bottom.py CHANGED Viewed

@@ -45,7 +45,12 @@ class TopFoldersToBottomOptions:
     Options used to parameterize top_folders_to_bottom()
     """
-    def __init__(self,input_folder,output_folder,copy=True,n_threads=1):
+    def __init__(self,
+                 input_folder,
+                 output_folder,
+                 copy=True,
+                 n_threads=1,
+                 overwrite=False):
         #: Whether to copy (True) vs. move (False) false when re-organizing
         self.copy = copy
@@ -60,7 +65,7 @@ class TopFoldersToBottomOptions:
         self.output_folder = output_folder
         #: If this is False and an output file exists, throw an error
-        self.overwrite = False
+        self.overwrite = overwrite
 #%% Main functions
@@ -130,6 +135,7 @@ def top_folders_to_bottom(options):
         options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
     """
     os.makedirs(options.output_folder,exist_ok=True)
     # Enumerate input folder
@@ -167,10 +173,15 @@ def top_folders_to_bottom(options):
         print('Starting a pool with {} threads'.format(options.n_threads))
         pool = ThreadPool(options.n_threads)
-        process_file_with_options = partial(_process_file, options=options)
-        _ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
+        try:
+            process_file_with_options = partial(_process_file, options=options)
+            _ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
+        finally:
+            pool.close()
+            pool.join()
+            print('Pool closed and join for folder inversion')
-# ...def top_folders_to_bottom()
+# ...def top_folders_to_bottom(...)
 #%% Interactive driver
@@ -192,7 +203,7 @@ if False:
 #%% Command-line driver
-# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 100
+# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 10
 def main(): # noqa
@@ -215,7 +226,11 @@ def main(): # noqa
     # Convert to an options object
     options = TopFoldersToBottomOptions(
-        args.input_folder,args.output_folder,copy=args.copy,n_threads=args.n_threads)
+        args.input_folder,
+        args.output_folder,
+        copy=args.copy,
+        n_threads=args.n_threads,
+        overwrite=args.overwrite)
     top_folders_to_bottom(options)

megadetector/postprocessing/validate_batch_results.py CHANGED Viewed

@@ -39,7 +39,7 @@ typical_keys = ['classification_categories',
 class ValidateBatchResultsOptions:
     """
-    Options controlling the behavior of validate_bach_results()
+    Options controlling the behavior of validate_batch_results()
     """
     def __init__(self):

megadetector/taxonomy_mapping/map_new_lila_datasets.py CHANGED Viewed

@@ -15,10 +15,10 @@ import json
 # Created by get_lila_category_list.py
 input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
-output_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
+output_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
 datasets_to_map = [
-    'Nkhotakota Camera Traps'
+    'California Small Animals'
     ]
@@ -128,6 +128,52 @@ output_df.to_csv(output_file, index=None, header=True)
 # from megadetector.utils.path_utils import open_file; open_file(output_file)
+#%% Remap missing entries in the .csv file
+# ...typically because I made a change to the mapping code.
+from megadetector.utils.path_utils import insert_before_extension
+from megadetector.utils.ct_utils import is_empty
+remapped_file = insert_before_extension(output_file,'remapped')
+df = pd.read_csv(output_file)
+for i_row,row in df.iterrows():
+    # Do we need to map this row?
+    if is_empty(row['source']):
+        query = row['query']
+        print('Mapping {}'.format(query))
+        taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
+        if (taxonomic_match.source == taxonomy_preference):
+            source = taxonomic_match.source
+            taxonomy_level = taxonomic_match.taxonomic_level
+            scientific_name = taxonomic_match.scientific_name
+            common_name  = taxonomic_match.common_name
+            taxonomy_string = taxonomic_match.taxonomy_string
+            # Write source, taxonomy_level, scientific_name, common_name, and taxonomy_string
+            # to the corresponding columns in the current row in df
+            df.loc[i_row, 'source'] = source
+            df.loc[i_row, 'taxonomy_level'] = taxonomy_level
+            df.loc[i_row, 'scientific_name'] = scientific_name
+            df.loc[i_row, 'common_name'] = common_name
+            df.loc[i_row, 'taxonomy_string'] = taxonomy_string
+        # ...if we found a match
+    # ...do we need to map this row?
+# ...for each row
+df.to_csv(remapped_file, index=None, header=True)
 #%% Manual lookup
 if False:
@@ -140,11 +186,19 @@ if False:
     #%%
-    q = 'animalia'
+    from megadetector.taxonomy_mapping.species_lookup import pop_levels
+    # Use this when an iNat match includes an empty subgenus with the same name as the genus
+    n_levels_to_pop = 0
+    q = 'sus scrofa'
     taxonomy_preference = 'inat'
     m = get_preferred_taxonomic_match(q,taxonomy_preference)
+    if n_levels_to_pop > 0:
+        m = pop_levels(m,n_levels_to_pop)
     # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
+    # common_name = eval(m.__dict__['taxonomy_string'])[0][-1][0]; print(common_name); clipboard.copy(common_name)
     if (m is None) or (len(m.taxonomy_string) == 0):
         print('No match')
@@ -155,3 +209,5 @@ if False:
         print(m.source)
         print(m.taxonomy_string)
         import clipboard; clipboard.copy(m.taxonomy_string)

megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py CHANGED Viewed

@@ -162,4 +162,4 @@ if False:
     print('Wrote final output to {}'.format(release_taxonomy_file))
-# ...if False

megadetector/taxonomy_mapping/preview_lila_taxonomy.py CHANGED Viewed

@@ -16,7 +16,7 @@ import os
 import pandas as pd
 # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
-lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
+lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
 preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
 os.makedirs(preview_base,exist_ok=True)
@@ -56,11 +56,6 @@ def taxonomy_string_to_level(taxonomy_string):
     return level
-#%% Read the taxonomy mapping file
-df = pd.read_csv(lila_taxonomy_file)
 #%% Prepare taxonomy lookup
 from megadetector.taxonomy_mapping.species_lookup import \
@@ -95,20 +90,29 @@ taxonomy_preference = 'inat'
 # i_row = 0; row = df.iloc[i_row]
 for i_row,row in tqdm(df.iterrows(),total=len(df)):
-    sn = row['scientific_name']
-    if not isinstance(sn,str):
-        continue
+    try:
+        sn = row['scientific_name']
+        if not isinstance(sn,str):
+            continue
-    m = get_preferred_taxonomic_match(sn,taxonomy_preference)
-    assert m.scientific_name == sn
+        m = get_preferred_taxonomic_match(sn,taxonomy_preference)
+        assert m.scientific_name == sn
-    ts = row['taxonomy_string']
-    assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
-        row['dataset_name'],ts,m.taxonomy_string)
+        ts = row['taxonomy_string']
+        assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
+            row['dataset_name'],ts,m.taxonomy_string)
+        if ts != m.taxonomy_string:
+            n_taxonomy_changes += 1
+            df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
+    except Exception as e:
-    if ts != m.taxonomy_string:
-        n_taxonomy_changes += 1
-        df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
+        print('Error at row {}: {}'.format(i_row,str(e)))
+        raise
+# ...for each row
 print('\nMade {} taxonomy changes'.format(n_taxonomy_changes))
@@ -325,6 +329,11 @@ for i_row,row in df.iterrows():
 #%% Download sample images for all scientific names
+# You might have to do this:
+#
+# pip install python-magic
+# pip install python-magic-bin
 # Takes ~1 minute per 10 rows
 remapped_queries = {'papio':'papio+baboon',

megadetector/taxonomy_mapping/species_lookup.py CHANGED Viewed

@@ -560,6 +560,7 @@ def get_taxonomic_info(query: str) -> List[Dict[str, Any]]:
     Main entry point: get taxonomic matches from both taxonomies for [query],
     which may be a scientific or common name.
     """
     query = query.strip().lower()
     # print("Finding taxonomy information for: {0}".format(query))
@@ -682,6 +683,35 @@ hyphenated_terms = ['crowned', 'backed', 'throated', 'tailed', 'headed', 'cheeke
                     'fronted', 'bellied', 'spotted', 'eared', 'collared', 'breasted',
                     'necked']
+def pop_levels(m, n_levels=1):
+    """
+    Remove [n_levels] levels from the bottom of the TaxonomicMatch object m, typically used to remove
+    silly subgenera.
+    """
+    v = eval(m.taxonomy_string)
+    assert v[0][1] == m.taxonomic_level
+    assert v[0][2] == m.scientific_name
+    popped_v = v[n_levels:]
+    taxonomic_level = popped_v[0][1]
+    scientific_name = popped_v[0][2]
+    common_name = popped_v[0][3]
+    if len(common_name) == 0:
+        common_name = ''
+    else:
+        common_name = common_name[0]
+    taxonomy_string = str(popped_v)
+    source = m.source
+    return TaxonomicMatch(scientific_name=scientific_name,
+                          common_name=common_name,
+                          taxonomic_level=taxonomic_level,
+                          source=source,
+                          taxonomy_string=taxonomy_string,
+                          match=None)
+# ...def pop_levels(...)
 def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retry=True) -> TaxonomicMatch:
     """
     Wrapper for _get_preferred_taxonomic_match, but expressing a variety of heuristics
@@ -704,6 +734,17 @@ def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retr
     for s in hyphenated_terms:
         query = query.replace(' ' + s,'-' + s)
     m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
+    if (len(m.scientific_name) > 0) or (not retry):
+        return m
+    query = query.replace(' species','')
+    query = query.replace(' order','')
+    query = query.replace(' genus','')
+    query = query.replace(' family','')
+    query = query.replace(' subfamily','')
+    m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
     return m
@@ -887,8 +928,16 @@ def _get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat') ->
     taxonomy_string = str(match)
-    return TaxonomicMatch(scientific_name, common_name, taxonomic_level, source,
-                          taxonomy_string, match),query
+    m = TaxonomicMatch(scientific_name, common_name, taxonomic_level, source,
+                        taxonomy_string, match)
+    if (m.taxonomic_level == 'subgenus' and \
+        match[1][1] == 'genus' and \
+        match[1][2] == m.scientific_name):
+        print('Removing redundant subgenus {}'.format(scientific_name))
+        m = pop_levels(m,1)
+    return m,query
 # ...def _get_preferred_taxonomic_match()

megadetector 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 10.0.9py3-none-any.whl → 10.0.11py3-none-any.whl