PyPI - megadetector - Versions diffs - 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl - Mend

megadetector 10.0.10py3-none-any.whl → 10.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (80) hide show

megadetector/postprocessing/classification_postprocessing.py CHANGED Viewed

@@ -25,6 +25,7 @@ from megadetector.utils.ct_utils import is_empty
 from megadetector.utils.ct_utils import sort_dictionary_by_value
 from megadetector.utils.ct_utils import sort_dictionary_by_key
 from megadetector.utils.ct_utils import invert_dictionary
+from megadetector.utils.ct_utils import write_json
 from megadetector.utils.wi_taxonomy_utils import clean_taxonomy_string
 from megadetector.utils.wi_taxonomy_utils import taxonomy_level_index
@@ -420,7 +421,7 @@ def _smooth_classifications_for_list_of_detections(detections,
     if verbose_debug_enabled:
         _print_counts_with_names(category_to_count,classification_descriptions)
-        from IPython import embed; embed()
+        # from IPython import embed; embed()
     ## Possibly change "other" classifications to the most common category
@@ -448,7 +449,7 @@ def _smooth_classifications_for_list_of_detections(detections,
                 if verbose_debug_enabled:
                     print('Replacing {} with {}'.format(
                         classification_descriptions[c[0]],
-                        classification_descriptions[c[1]]))
+                        most_common_category))
                 n_other_classifications_changed_this_image += 1
                 c[0] = most_common_category
@@ -918,8 +919,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
     if output_file is not None:
         print('Writing results after image-level smoothing to:\n{}'.format(output_file))
-        with open(output_file,'w') as f:
-            json.dump(d,f,indent=1)
+        write_json(output_file,d)
     return d
@@ -1092,8 +1092,7 @@ def smooth_classification_results_sequence_level(input_file,
     if output_file is not None:
         print('Writing sequence-smoothed classification results to {}'.format(
             output_file))
-        with open(output_file,'w') as f:
-            json.dump(d,f,indent=1)
+        write_json(output_file,d)
     return d
@@ -1681,7 +1680,7 @@ def restrict_to_taxa_list(taxa_list,
     ##%% Write output
-    with open(output_file,'w') as f:
-        json.dump(output_data,f,indent=1)
+    write_json(output_file,output_data)
 # ...def restrict_to_taxa_list(...)

megadetector/postprocessing/combine_batch_outputs.py CHANGED Viewed

@@ -203,7 +203,8 @@ def combine_api_shard_files(input_files, output_file=None):
     input_lists = []
     print('Loading input files')
     for fn in input_files:
-        input_lists.append(json.load(open(fn)))
+        with open(fn,'r') as f:
+            input_lists.append(json.load(f))
     detections = []
     # detection_list = input_lists[0]
@@ -214,7 +215,7 @@ def combine_api_shard_files(input_files, output_file=None):
             assert 'file' in d
             assert 'max_detection_conf' in d
             assert 'detections' in d
-            detections.extend([d])
+            detections.append(d)
     print('Writing output')
     if output_file is not None:

megadetector/postprocessing/compare_batch_results.py CHANGED Viewed

@@ -353,10 +353,11 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
         im_gt = image_pair['im_gt']
         annotations_gt = image_pair['annotations_gt']
         gt_boxes = []
+        gt_categories = []
         for ann in annotations_gt:
             if 'bbox' in ann:
                 gt_boxes.append(ann['bbox'])
-        gt_categories = [ann['category_id'] for ann in annotations_gt]
+                gt_categories.append(ann['category_id'])
         if len(gt_boxes) > 0:
@@ -474,7 +475,7 @@ def _result_types_to_comparison_category(result_types_present_a,
                 ('tp' not in result_types_present_b):
                 return 'clean_tp_a_only'
         # Otherwise, TPs are cases where one model has only TPs, and the other model
-        # has any mistakse
+        # has any mistakes
         if ('fn' in result_types_present_b) or ('fp' in result_types_present_b):
             return 'tp_a_only'
@@ -486,7 +487,7 @@ def _result_types_to_comparison_category(result_types_present_a,
                 ('tp' not in result_types_present_a):
                 return 'clean_tp_b_only'
         # Otherwise, TPs are cases where one model has only TPs, and the other model
-        # has any mistakse
+        # has any mistakes
         if ('fn' in result_types_present_a) or ('fp' in result_types_present_a):
             return 'tp_b_only'
@@ -674,11 +675,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
     category_ids_to_include_a = []
     category_ids_to_include_b = []
-    for category_name in options.category_names_to_include:
-        if category_name in category_name_to_id_a:
-            category_ids_to_include_a.append(category_name_to_id_a[category_name])
-        if category_name in category_name_to_id_b:
-            category_ids_to_include_b.append(category_name_to_id_b[category_name])
+    # If we're supposed to be including all categories, we don't actually need to
+    # populate category_ids_to_include_a/b, but we're doing this for future-proofing.
+    if options.category_names_to_include is None:
+        category_ids_to_include_a = sorted(list(category_name_to_id_a.values()))
+        category_ids_to_include_b = sorted(list(category_name_to_id_b.values()))
+    else:
+        for category_name in options.category_names_to_include:
+            if category_name in category_name_to_id_a:
+                category_ids_to_include_a.append(category_name_to_id_a[category_name])
+            if category_name in category_name_to_id_b:
+                category_ids_to_include_b.append(category_name_to_id_b[category_name])
     if pairwise_options.results_description_a is None:
         if 'detector' not in results_a['info']:
@@ -814,7 +821,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             print('Warning: {} files are only available in the ground truth (not in MD results)'.format(
                 len(filenames_only_in_gt)))
-        filenames_only_in_results = gt_filenames_set.difference(gt_filenames)
+        filenames_only_in_results = filenames_to_compare_set.difference(gt_filenames_set)
         if len(filenames_only_in_results) > 0:
             print('Warning: {} files are only available in the MD results (not in ground truth)'.format(
                 len(filenames_only_in_results)))
@@ -1185,13 +1192,6 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
             if ('tp' in result_types_present_b) or ('fn' in result_types_present_b):
                 assert 'tp' in result_types_present_a or 'fn' in result_types_present_a
-            # If either model has a TP or FN, the other has to have a TP or FN, since
-            # there was something in the GT
-            if ('tp' in result_types_present_a) or ('fn' in result_types_present_a):
-                assert 'tp' in result_types_present_b or 'fn' in result_types_present_b
-            if ('tp' in result_types_present_b) or ('fn' in result_types_present_b):
-                assert 'tp' in result_types_present_a or 'fn' in result_types_present_a
             ## Choose a comparison category based on result types
@@ -1677,8 +1677,8 @@ def n_way_comparison(filenames,
         '[detection_thresholds] should be the same length as [filenames]'
     if rendering_thresholds is not None:
-        assert len(rendering_thresholds) == len(filenames)
-        '[rendering_thresholds] should be the same length as [filenames]'
+        assert len(rendering_thresholds) == len(filenames), \
+            '[rendering_thresholds] should be the same length as [filenames]'
     else:
         rendering_thresholds = [(x*0.6666) for x in detection_thresholds]
@@ -1932,32 +1932,54 @@ def find_equivalent_threshold(results_a,
 if False:
+    #%% Prepare test files
+    from megadetector.utils.path_utils import insert_before_extension
+    model_names = ['mdv5a','mdv5b']
+    image_folder = 'g:/temp/md-test-images'
+    output_filename_base = os.path.join(image_folder,'comparison_test.json')
+    output_filenames = []
+    commands = []
+    for model_name in model_names:
+        output_filename = insert_before_extension(output_filename_base,model_name)
+        output_filenames.append(output_filename)
+        cmd = 'python -m megadetector.detection.run_detector_batch'
+        cmd += ' {} {} {} --recursive --output_relative_filenames'.format(
+            model_name, image_folder,output_filename)
+        commands.append(cmd)
+    cmd = '\n\n'.join(commands)
+    print(cmd)
+    import clipboard
+    clipboard.copy(cmd)
     #%% Test two-way comparison
     options = BatchComparisonOptions()
     options.parallelize_rendering_with_threads = True
-    options.job_name = 'BCT'
+    options.job_name = 'md-test-images'
     options.output_folder = r'g:\temp\comparisons'
-    options.image_folder = r'g:\camera_traps\camera_trap_images'
+    options.image_folder = image_folder
     options.max_images_per_category = 100
     options.sort_by_confidence = True
     options.pairwise_options = []
     results_base = os.path.expanduser('~/postprocessing/bellevue-camera-traps')
-    filenames = [
-        os.path.join(results_base,r'bellevue-camera-traps-2023-12-05-v5a.0.0\combined_api_outputs\bellevue-camera-traps-2023-12-05-v5a.0.0_detections.json'),
-        os.path.join(results_base,r'bellevue-camera-traps-2023-12-05-aug-v5a.0.0\combined_api_outputs\bellevue-camera-traps-2023-12-05-aug-v5a.0.0_detections.json')
-        ]
     detection_thresholds = [0.15,0.15]
     rendering_thresholds = None
-    results = n_way_comparison(filenames,
-                               options,
-                               detection_thresholds,
+    results = n_way_comparison(filenames=output_filenames,
+                               options=options,
+                               detection_thresholds=detection_thresholds,
                                rendering_thresholds=rendering_thresholds)
     from megadetector.utils.path_utils import open_file

megadetector/postprocessing/convert_output_format.py CHANGED Viewed

@@ -22,7 +22,8 @@ import pandas as pd
 from megadetector.postprocessing.load_api_results import load_api_results_csv
 from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
 from megadetector.data_management.annotations import annotation_constants
-from megadetector.utils import ct_utils
+from megadetector.utils.ct_utils import get_max_conf
+from megadetector.utils.ct_utils import write_json
 CONF_DIGITS = 3
@@ -138,7 +139,7 @@ def convert_json_to_csv(input_path,
             # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
             continue
-        max_conf = ct_utils.get_max_conf(im)
+        max_conf = get_max_conf(im)
         detection_category_id_to_max_conf = defaultdict(float)
         classification_category_id_to_max_conf = defaultdict(float)
         detections = []
@@ -177,7 +178,8 @@ def convert_json_to_csv(input_path,
                     classification_category_max = \
                         classification_category_id_to_max_conf[classification_category_id]
                     if classification_conf > classification_category_max:
-                        classification_category_id_to_max_conf[classification_category_id] = d['conf']
+                        classification_category_id_to_max_conf[classification_category_id] = \
+                            classification_conf
                 # ...for each classification
@@ -210,7 +212,7 @@ def convert_json_to_csv(input_path,
     if omit_bounding_boxes:
         df = df.drop('detections',axis=1)
-    df.to_csv(output_path,index=False,header=True)
+    df.to_csv(output_path,index=False,header=True,encoding=output_encoding)
 # ...def convert_json_to_csv(...)
@@ -295,7 +297,7 @@ def convert_csv_to_json(input_path,output_path=None,overwrite=True):
     json_out['classification_categories'] = classification_categories
     json_out['images'] = images
-    json.dump(json_out,open(output_path,'w'),indent=1)
+    write_json(output_path,json_out)
 # ...def convert_csv_to_json(...)
@@ -372,7 +374,7 @@ def main():
                         help='Output filename ending in .json or .csv (defaults to ' + \
                              'input file, with .json/.csv replaced by .csv/.json)')
     parser.add_argument('--omit_bounding_boxes',action='store_true',
-                        help='Output bounding box text from .csv output (large and usually not useful)')
+                        help='Omit bounding box text from .csv output (large and usually not useful)')
     if len(sys.argv[1:]) == 0:
         parser.print_help()

megadetector/postprocessing/create_crop_folder.py CHANGED Viewed

@@ -169,7 +169,9 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
         'Could not find image-level input file {}'.format(image_results_file_with_crop_ids)
     assert os.path.isfile(crop_results_file), \
         'Could not find crop results file {}'.format(crop_results_file)
-    os.makedirs(os.path.dirname(output_file),exist_ok=True)
+    output_dir = os.path.dirname(output_file)
+    if len(output_dir) > 0:
+        os.makedirs(output_dir,exist_ok=True)
     ##%% Read input files
@@ -259,7 +261,11 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
                                 detections_without_classification_handling
                         ))
-                if not skip_detection:
+                if skip_detection:
+                    n_skipped_detections += 1
+                else:
                     crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
@@ -340,8 +346,11 @@ def create_crop_folder(input_file,
     assert os.path.isfile(input_file), 'Input file {} not found'.format(input_file)
     assert os.path.isdir(input_folder), 'Input folder {} not found'.format(input_folder)
     os.makedirs(output_folder,exist_ok=True)
     if output_file is not None:
-        os.makedirs(os.path.dirname(output_file),exist_ok=True)
+        output_dir = os.path.dirname(output_file)
+        if len(output_dir) > 0:
+            os.makedirs(output_dir,exist_ok=True)
     ##%% Read input
@@ -599,7 +608,7 @@ def main():
     print('Starting crop folder creation...')
     print('Input MD results: {}'.format(args.input_file))
-    print('Input image folder {}'.format(args.input_folder))
+    print('Input image folder: {}'.format(args.input_folder))
     print('Output crop folder: {}'.format(args.output_folder))
     if args.output_file:

megadetector/postprocessing/generate_csv_report.py CHANGED Viewed

@@ -126,6 +126,7 @@ def generate_csv_report(md_results_file,
                                                      recursive=True)
         else:
             assert os.path.isfile(datetime_source), \
                 'datetime source {} is neither a folder nor a file'.format(datetime_source)
@@ -153,11 +154,14 @@ def generate_csv_report(md_results_file,
                     print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
                           'to contain datetime information.')
+        # ...if datetime_source is a folder/file
         assert all_exif_results is not None
         filename_to_datetime_string = {}
         for exif_result in all_exif_results:
             datetime_string = unknown_datetime_tag
             if ('exif_tags' in exif_result) and \
                (exif_result['exif_tags'] is not None) and \
@@ -169,6 +173,8 @@ def generate_csv_report(md_results_file,
                     assert isinstance(datetime_string,str), 'Unrecognized datetime format'
             filename_to_datetime_string[exif_result['file_name']] = datetime_string
+        # ...for each exif result
         image_files = [im['file'] for im in results['images']]
         image_files_set = set(image_files)
@@ -250,11 +256,10 @@ def generate_csv_report(md_results_file,
         base_record['filename'] = im['file'].replace('\\','/')
         # Datetime (if necessary)
+        datetime_string = ''
         if filename_to_datetime_string is not None:
             if im['file'] in filename_to_datetime_string:
                 datetime_string = filename_to_datetime_string[im['file']]
-            else:
-                datetime_string = ''
         base_record['datetime'] = datetime_string
         for s in ['detection_category','max_detection_confidence',
@@ -383,13 +388,22 @@ def generate_csv_report(md_results_file,
     # ...for each image
     # Make sure every record has the same columns
-    column_names = output_records[0].keys()
-    for record in output_records:
-        assert record.keys() == column_names
-    # Write to .csv
-    df = pd.DataFrame(output_records)
-    df.to_csv(output_file,header=True,index=False)
+    if len(output_records) == 0:
+        print('Warning: no output records generated')
+    else:
+        column_names = output_records[0].keys()
+        for record in output_records:
+            assert record.keys() == column_names
+        # Create folder for output file if necessary
+        output_dir = os.path.dirname(output_file)
+        if len(output_dir) > 0:
+            os.makedirs(output_dir, exist_ok=True)
+        # Write to .csv
+        df = pd.DataFrame(output_records)
+        df.to_csv(output_file,header=True,index=False)
     # from megadetector.utils.path_utils import open_file; open_file(output_file)

megadetector/postprocessing/load_api_results.py CHANGED Viewed

@@ -23,7 +23,8 @@ from collections.abc import Mapping
 import pandas as pd
-from megadetector.utils import ct_utils
+from megadetector.utils.ct_utils import get_max_conf
+from megadetector.utils.ct_utils import write_json
 from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
@@ -85,7 +86,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
     # add them, because our unofficial internal dataframe format includes this.
     for im in detection_results['images']:
         if 'max_detection_conf' not in im:
-            im['max_detection_conf'] = ct_utils.get_max_conf(im)
+            im['max_detection_conf'] = get_max_conf(im)
     # Pack the json output into a Pandas DataFrame
     detection_results = pd.DataFrame(detection_results['images'])
@@ -139,8 +140,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
         print('Warning: error removing max_detection_conf from output')
         pass
-    with open(out_path, 'w') as f:
-        json.dump(fields, f, indent=1)
+    write_json(out_path,fields)
     print('Finished writing detection results to {}'.format(out_path))
@@ -214,6 +214,10 @@ def write_api_results_csv(detection_results, filename):
     print('Writing detection results to {}'.format(filename))
+    output_dir = os.path.dirname(filename)
+    if len(output_dir) > 0:
+        os.makedirs(output_dir, exist_ok=True)
     detection_results.to_csv(filename, index=False)
     print('Finished writing detection results to {}'.format(filename))

megadetector/postprocessing/md_to_coco.py CHANGED Viewed

@@ -22,6 +22,7 @@ from tqdm import tqdm
 from megadetector.visualization import visualization_utils as vis_utils
 from megadetector.utils.path_utils import insert_before_extension
+from megadetector.utils.ct_utils import write_json
 default_confidence_threshold = 0.15
@@ -296,9 +297,7 @@ def md_to_coco(md_results_file,
     if verbose:
         print('Writing COCO output file...')
-    if coco_output_file is not None:
-        with open(coco_output_file,'w') as f:
-            json.dump(output_dict,f,indent=1)
+    write_json(coco_output_file,output_dict)
     return output_dict

megadetector/postprocessing/md_to_labelme.py CHANGED Viewed

@@ -28,6 +28,7 @@ from functools import partial
 from megadetector.visualization.visualization_utils import open_image
 from megadetector.utils.ct_utils import round_float
+from megadetector.utils.ct_utils import write_json
 from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP, FAILURE_IMAGE_OPEN
 output_precision = 3
@@ -36,8 +37,11 @@ default_confidence_threshold = 0.15
 #%% Functions
-def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
-                               info=None,confidence_threshold=None):
+def get_labelme_dict_for_image(im,
+                               image_base_name=None,
+                               category_id_to_name=None,
+                               info=None,
+                               confidence_threshold=None):
     """
     For the given image struct in MD results format, reformat the detections into
     labelme format.
@@ -60,7 +64,7 @@ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
     if image_base_name is None:
         image_base_name = os.path.basename(im['file'])
-    if category_id_to_name:
+    if category_id_to_name is None:
         category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP
     if confidence_threshold is None:
@@ -138,8 +142,7 @@ def _write_output_for_image(im,
                                              info=info,
                                              confidence_threshold=confidence_threshold)
-    with open(json_path,'w') as f:
-        json.dump(output_dict,f,indent=1)
+    write_json(json_path,output_dict)
 # ...def write_output_for_image(...)
@@ -256,9 +259,10 @@ def md_to_labelme(results_file,
                     md_results['images']),
                     total=len(md_results['images'])))
         finally:
-            pool.close()
-            pool.join()
-            print("Pool closed and joined for labelme file writes")
+            if pool is not None:
+                pool.close()
+                pool.join()
+                print("Pool closed and joined for labelme file writes")
     # ...for each image

megadetector/postprocessing/md_to_wi.py CHANGED Viewed

@@ -10,6 +10,7 @@ Converts the MD .json format to the WI predictions.json format.
 import sys
 import argparse
 from megadetector.utils.wi_taxonomy_utils import generate_predictions_json_from_md_results
@@ -34,7 +35,7 @@ def main(): # noqa
     generate_predictions_json_from_md_results(args.md_results_file,
                                               args.predictions_json_file,
-                                              base_folder=None)
+                                              base_folder=args.base_folder)
 if __name__ == '__main__':
     main()

megadetector/postprocessing/merge_detections.py CHANGED Viewed

@@ -23,6 +23,7 @@ import os
 from tqdm import tqdm
 from megadetector.utils.ct_utils import get_iou
+from megadetector.utils.ct_utils import write_json
 #%% Structs
@@ -121,8 +122,6 @@ def merge_detections(source_files,target_file,output_file,options=None):
     assert os.path.isfile(target_file)
-    os.makedirs(os.path.dirname(output_file),exist_ok=True)
     with open(target_file,'r') as f:
         output_data = json.load(f)
@@ -290,8 +289,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
     # ...for each source file
-    with open(output_file,'w') as f:
-        json.dump(output_data,f,indent=1)
+    write_json(output_file,output_data)
     print('Saved merged results to {}'.format(output_file))
@@ -308,7 +306,7 @@ def main():
     default_options = MergeDetectionsOptions()
     parser = argparse.ArgumentParser(
-        description='Merge detections from one or more MegaDetector results files into an existing reuslts file')
+        description='Merge detections from one or more MegaDetector results files into an existing results file')
     parser.add_argument(
         'source_files',
         nargs='+',
@@ -359,7 +357,7 @@ def main():
         type=int,
         nargs='+',
         default=None,
-        help='List of numeric detection categories to include')
+        help='List of numeric detection categories to exclude')
     parser.add_argument(
         '--merge_empty_only',
         action='store_true',

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -1889,8 +1889,9 @@ def process_batch_results(options):
             if options.include_classification_category_report:
                 # TODO: it's only for silly historical reasons that we re-read
-                # the input file in this case; we're not currently carrying the json
-                # representation around, only the Pandas representation.
+                # the input file in this case; because this module has used Pandas
+                # forever, we're not currently carrying the json representation around,
+                # only the Pandas representation.
                 print('Generating classification category report')
@@ -1905,7 +1906,7 @@ def process_batch_results(options):
                             if ('classifications' in det) and (len(det['classifications']) > 0):
                                 class_id = det['classifications'][0][0]
                                 if class_id not in classification_category_to_count:
-                                    classification_category_to_count[class_id] = 0
+                                    classification_category_to_count[class_id] = 1
                                 else:
                                     classification_category_to_count[class_id] = \
                                         classification_category_to_count[class_id] + 1

megadetector/postprocessing/remap_detection_categories.py CHANGED Viewed

@@ -18,6 +18,7 @@ import argparse
 from tqdm import tqdm
 from megadetector.utils.ct_utils import invert_dictionary
+from megadetector.utils.ct_utils import write_json
 #%% Main function
@@ -132,14 +133,16 @@ def remap_detection_categories(input_file,
         for det in im['detections']:
             det['category'] = input_category_id_to_output_category_id[det['category']]
-    input_data['detection_categories'] = target_category_map
+    # ...for each image
-    with open(output_file,'w') as f:
-        json.dump(input_data,f,indent=1)
+    input_data['detection_categories'] = target_category_map
+    write_json(output_file,input_data)
     print('Saved remapped results to {}'.format(output_file))
+# ...def remap_detection_categories(...)
 #%% Interactive driver

megadetector 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 10.0.10py3-none-any.whl → 10.0.11py3-none-any.whl