PyPI - megadetector - Versions diffs - 10.0.15__py3-none-any.whl - Mend

megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

megadetector/__init__.py +0 -0
megadetector/api/__init__.py +0 -0
megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
megadetector/classification/__init__.py +0 -0
megadetector/classification/aggregate_classifier_probs.py +108 -0
megadetector/classification/analyze_failed_images.py +227 -0
megadetector/classification/cache_batchapi_outputs.py +198 -0
megadetector/classification/create_classification_dataset.py +626 -0
megadetector/classification/crop_detections.py +516 -0
megadetector/classification/csv_to_json.py +226 -0
megadetector/classification/detect_and_crop.py +853 -0
megadetector/classification/efficientnet/__init__.py +9 -0
megadetector/classification/efficientnet/model.py +415 -0
megadetector/classification/efficientnet/utils.py +608 -0
megadetector/classification/evaluate_model.py +520 -0
megadetector/classification/identify_mislabeled_candidates.py +152 -0
megadetector/classification/json_to_azcopy_list.py +63 -0
megadetector/classification/json_validator.py +696 -0
megadetector/classification/map_classification_categories.py +276 -0
megadetector/classification/merge_classification_detection_output.py +509 -0
megadetector/classification/prepare_classification_script.py +194 -0
megadetector/classification/prepare_classification_script_mc.py +228 -0
megadetector/classification/run_classifier.py +287 -0
megadetector/classification/save_mislabeled.py +110 -0
megadetector/classification/train_classifier.py +827 -0
megadetector/classification/train_classifier_tf.py +725 -0
megadetector/classification/train_utils.py +323 -0
megadetector/data_management/__init__.py +0 -0
megadetector/data_management/animl_to_md.py +161 -0
megadetector/data_management/annotations/__init__.py +0 -0
megadetector/data_management/annotations/annotation_constants.py +33 -0
megadetector/data_management/camtrap_dp_to_coco.py +270 -0
megadetector/data_management/cct_json_utils.py +566 -0
megadetector/data_management/cct_to_md.py +184 -0
megadetector/data_management/cct_to_wi.py +293 -0
megadetector/data_management/coco_to_labelme.py +284 -0
megadetector/data_management/coco_to_yolo.py +701 -0
megadetector/data_management/databases/__init__.py +0 -0
megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
megadetector/data_management/databases/integrity_check_json_db.py +563 -0
megadetector/data_management/databases/subset_json_db.py +195 -0
megadetector/data_management/generate_crops_from_cct.py +200 -0
megadetector/data_management/get_image_sizes.py +164 -0
megadetector/data_management/labelme_to_coco.py +559 -0
megadetector/data_management/labelme_to_yolo.py +349 -0
megadetector/data_management/lila/__init__.py +0 -0
megadetector/data_management/lila/create_lila_blank_set.py +556 -0
megadetector/data_management/lila/create_lila_test_set.py +192 -0
megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
megadetector/data_management/lila/download_lila_subset.py +182 -0
megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
megadetector/data_management/lila/get_lila_image_counts.py +112 -0
megadetector/data_management/lila/lila_common.py +319 -0
megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
megadetector/data_management/mewc_to_md.py +344 -0
megadetector/data_management/ocr_tools.py +873 -0
megadetector/data_management/read_exif.py +964 -0
megadetector/data_management/remap_coco_categories.py +195 -0
megadetector/data_management/remove_exif.py +156 -0
megadetector/data_management/rename_images.py +194 -0
megadetector/data_management/resize_coco_dataset.py +665 -0
megadetector/data_management/speciesnet_to_md.py +41 -0
megadetector/data_management/wi_download_csv_to_coco.py +247 -0
megadetector/data_management/yolo_output_to_md_output.py +594 -0
megadetector/data_management/yolo_to_coco.py +984 -0
megadetector/data_management/zamba_to_md.py +188 -0
megadetector/detection/__init__.py +0 -0
megadetector/detection/change_detection.py +840 -0
megadetector/detection/process_video.py +479 -0
megadetector/detection/pytorch_detector.py +1451 -0
megadetector/detection/run_detector.py +1267 -0
megadetector/detection/run_detector_batch.py +2172 -0
megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
megadetector/detection/run_md_and_speciesnet.py +1604 -0
megadetector/detection/run_tiled_inference.py +1044 -0
megadetector/detection/tf_detector.py +209 -0
megadetector/detection/video_utils.py +1379 -0
megadetector/postprocessing/__init__.py +0 -0
megadetector/postprocessing/add_max_conf.py +72 -0
megadetector/postprocessing/categorize_detections_by_size.py +166 -0
megadetector/postprocessing/classification_postprocessing.py +1943 -0
megadetector/postprocessing/combine_batch_outputs.py +249 -0
megadetector/postprocessing/compare_batch_results.py +2110 -0
megadetector/postprocessing/convert_output_format.py +403 -0
megadetector/postprocessing/create_crop_folder.py +629 -0
megadetector/postprocessing/detector_calibration.py +570 -0
megadetector/postprocessing/generate_csv_report.py +522 -0
megadetector/postprocessing/load_api_results.py +223 -0
megadetector/postprocessing/md_to_coco.py +428 -0
megadetector/postprocessing/md_to_labelme.py +351 -0
megadetector/postprocessing/md_to_wi.py +41 -0
megadetector/postprocessing/merge_detections.py +392 -0
megadetector/postprocessing/postprocess_batch_results.py +2140 -0
megadetector/postprocessing/remap_detection_categories.py +226 -0
megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
megadetector/postprocessing/separate_detections_into_folders.py +795 -0
megadetector/postprocessing/subset_json_detector_output.py +964 -0
megadetector/postprocessing/top_folders_to_bottom.py +238 -0
megadetector/postprocessing/validate_batch_results.py +332 -0
megadetector/taxonomy_mapping/__init__.py +0 -0
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
megadetector/taxonomy_mapping/simple_image_download.py +231 -0
megadetector/taxonomy_mapping/species_lookup.py +1008 -0
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
megadetector/tests/__init__.py +0 -0
megadetector/tests/test_nms_synthetic.py +335 -0
megadetector/utils/__init__.py +0 -0
megadetector/utils/ct_utils.py +1857 -0
megadetector/utils/directory_listing.py +199 -0
megadetector/utils/extract_frames_from_video.py +307 -0
megadetector/utils/gpu_test.py +125 -0
megadetector/utils/md_tests.py +2072 -0
megadetector/utils/path_utils.py +2872 -0
megadetector/utils/process_utils.py +172 -0
megadetector/utils/split_locations_into_train_val.py +237 -0
megadetector/utils/string_utils.py +234 -0
megadetector/utils/url_utils.py +825 -0
megadetector/utils/wi_platform_utils.py +968 -0
megadetector/utils/wi_taxonomy_utils.py +1766 -0
megadetector/utils/write_html_image_list.py +239 -0
megadetector/visualization/__init__.py +0 -0
megadetector/visualization/plot_utils.py +309 -0
megadetector/visualization/render_images_with_thumbnails.py +243 -0
megadetector/visualization/visualization_utils.py +1973 -0
megadetector/visualization/visualize_db.py +630 -0
megadetector/visualization/visualize_detector_output.py +498 -0
megadetector/visualization/visualize_video_output.py +705 -0
megadetector-10.0.15.dist-info/METADATA +115 -0
megadetector-10.0.15.dist-info/RECORD +147 -0
megadetector-10.0.15.dist-info/WHEEL +5 -0
megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
megadetector-10.0.15.dist-info/top_level.txt +1 -0

megadetector/postprocessing/convert_output_format.py ADDED Viewed

@@ -0,0 +1,403 @@
+"""
+convert_output_format.py
+Converts between file .json and .csv representations of MD output.  The .csv format is
+largely obsolete, don't use it unless you're super-duper sure you need it.
+"""
+#%% Constants and imports
+import argparse
+import json
+import sys
+import os
+from tqdm import tqdm
+from collections import defaultdict
+import pandas as pd
+from megadetector.postprocessing.load_api_results import load_api_results_csv
+from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
+from megadetector.data_management.annotations import annotation_constants
+from megadetector.utils.ct_utils import get_max_conf
+from megadetector.utils.ct_utils import write_json
+CONF_DIGITS = 3
+#%% Conversion functions
+def convert_json_to_csv(input_path,
+                        output_path=None,
+                        min_confidence=None,
+                        omit_bounding_boxes=False,
+                        output_encoding=None,
+                        overwrite=True,
+                        verbose=False):
+    """
+    Converts a MD results .json file to a totally non-standard .csv format.
+    If [output_path] is None, will convert x.json to x.csv.
+    Args:
+        input_path (str): the input .json file to convert
+        output_path (str, optional): the output .csv file to generate; if this is None, uses
+            [input_path].csv
+        min_confidence (float, optional): the minimum-confidence detection we should include
+            in the "detections" column; has no impact on the other columns
+        omit_bounding_boxes (bool, optional): whether to leave out the json-formatted bounding
+            boxes that make up the "detections" column, which are not generally useful for someone
+            who wants to consume this data as a .csv file
+        output_encoding (str, optional): encoding to use for the .csv file
+        overwrite (bool, optional): whether to overwrite an existing .csv file; if this is False and
+            the output file exists, no-ops and returns
+        verbose (bool, optional): enable additional debug output
+    """
+    if output_path is None:
+        output_path = os.path.splitext(input_path)[0]+'.csv'
+    if os.path.isfile(output_path) and (not overwrite):
+        print('File {} exists, skipping json --> csv conversion'.format(output_path))
+        return
+    print('Loading json results from {}...'.format(input_path))
+    json_output = load_md_or_speciesnet_file(input_path,
+                                             verbose=verbose)
+    def clean_category_name(s):
+        return s.replace(',','_').replace(' ','_').lower()
+    # Create column names for max detection confidences
+    detection_category_id_to_max_conf_column_name = {}
+    for category_id in json_output['detection_categories'].keys():
+        category_name = clean_category_name(json_output['detection_categories'][category_id])
+        detection_category_id_to_max_conf_column_name[category_id] = \
+            'max_conf_' + category_name
+    classification_category_id_to_max_conf_column_name = {}
+    # Create column names for max classification confidences (if necessary)
+    if 'classification_categories' in json_output.keys():
+        for category_id in json_output['classification_categories'].keys():
+            category_name = clean_category_name(json_output['classification_categories'][category_id])
+            classification_category_id_to_max_conf_column_name[category_id] = \
+                'max_classification_conf_' + category_name
+    # There are several .json fields for which we add .csv columns; other random bespoke fields
+    # will be ignored.
+    optional_fields = ['width','height','datetime','exif_metadata']
+    optional_fields_present = set()
+    # Iterate once over the data to check for optional fields
+    print('Looking for optional fields...')
+    for im in tqdm(json_output['images']):
+        # Which optional fields are present for this image?
+        for k in im.keys():
+            if k in optional_fields:
+                optional_fields_present.add(k)
+    optional_fields_present = sorted(list(optional_fields_present))
+    if len(optional_fields_present) > 0:
+        print('Found {} optional fields'.format(len(optional_fields_present)))
+    print('Formatting results...')
+    output_records = []
+    # i_image = 0; im = json_output['images'][i_image]
+    for im in tqdm(json_output['images']):
+        output_record = {}
+        output_records.append(output_record)
+        output_record['image_path'] = im['file']
+        output_record['max_confidence'] = ''
+        output_record['detections'] = ''
+        for field_name in optional_fields_present:
+            output_record[field_name] = ''
+            if field_name in im:
+                output_record[field_name] = im[field_name]
+        for detection_category_id in detection_category_id_to_max_conf_column_name:
+            column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
+            output_record[column_name] = 0
+        for classification_category_id in classification_category_id_to_max_conf_column_name:
+            column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
+            output_record[column_name] = 0
+        if 'failure' in im and im['failure'] is not None:
+            output_record['max_confidence'] = 'failure'
+            output_record['detections'] = im['failure']
+            # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
+            continue
+        max_conf = get_max_conf(im)
+        detection_category_id_to_max_conf = defaultdict(float)
+        classification_category_id_to_max_conf = defaultdict(float)
+        detections = []
+        # d = im['detections'][0]
+        for d in im['detections']:
+            # Skip sub-threshold detections
+            if (min_confidence is not None) and (d['conf'] < min_confidence):
+                continue
+            input_bbox = d['bbox']
+            # Our .json format is xmin/ymin/w/h
+            #
+            # Our .csv format was ymin/xmin/ymax/xmax
+            xmin = input_bbox[0]
+            ymin = input_bbox[1]
+            xmax = input_bbox[0] + input_bbox[2]
+            ymax = input_bbox[1] + input_bbox[3]
+            output_detection = [ymin, xmin, ymax, xmax]
+            output_detection.append(d['conf'])
+            output_detection.append(int(d['category']))
+            detections.append(output_detection)
+            detection_category_id = d['category']
+            detection_category_max = detection_category_id_to_max_conf[detection_category_id]
+            if d['conf'] > detection_category_max:
+                detection_category_id_to_max_conf[detection_category_id] = d['conf']
+            if 'classifications' in d:
+                for c in d['classifications']:
+                    classification_category_id = c[0]
+                    classification_conf = c[1]
+                    classification_category_max = \
+                        classification_category_id_to_max_conf[classification_category_id]
+                    if classification_conf > classification_category_max:
+                        classification_category_id_to_max_conf[classification_category_id] = \
+                            classification_conf
+                # ...for each classification
+            # ...if we have classification results for this detection
+        # ...for each detection
+        detection_string = ''
+        if not omit_bounding_boxes:
+            detection_string = json.dumps(detections)
+        output_record['detections'] = detection_string
+        output_record['max_confidence'] = max_conf
+        for detection_category_id in detection_category_id_to_max_conf_column_name:
+            column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
+            output_record[column_name] = \
+                detection_category_id_to_max_conf[detection_category_id]
+        for classification_category_id in classification_category_id_to_max_conf_column_name:
+            column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
+            output_record[column_name] = \
+                classification_category_id_to_max_conf[classification_category_id]
+    # ...for each image
+    print('Writing to csv...')
+    df = pd.DataFrame(output_records)
+    if omit_bounding_boxes:
+        df = df.drop('detections',axis=1)
+    df.to_csv(output_path,index=False,header=True,encoding=output_encoding)
+# ...def convert_json_to_csv(...)
+def convert_csv_to_json(input_path,output_path=None,overwrite=True):
+    """
+    Convert .csv to .json.  If output_path is None, will convert x.csv to x.json.  This
+    supports a largely obsolete .csv format, there's almost no reason you want to do this.
+    Args:
+        input_path (str): .csv filename to convert to .json
+        output_path (str, optional): the output .json file to generate; if this is None, uses
+            [input_path].json
+        overwrite (bool, optional): whether to overwrite an existing .json file; if this is
+            False and the output file exists, no-ops and returns
+    """
+    if output_path is None:
+        output_path = os.path.splitext(input_path)[0]+'.json'
+    if os.path.isfile(output_path) and (not overwrite):
+        print('File {} exists, skipping csv --> json conversion'.format(output_path))
+        return
+    # Format spec:
+    #
+    # https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing
+    print('Loading csv results...')
+    df = load_api_results_csv(input_path)
+    info = {
+        "format_version":"1.2",
+        "detector": "unknown",
+        "detection_completion_time" : "unknown",
+        "classifier": "unknown",
+        "classification_completion_time": "unknown"
+    }
+    classification_categories = {}
+    detection_categories = annotation_constants.detector_bbox_categories
+    images = []
+    # i_file = 0; row = df.iloc[i_file]
+    for i_file,row in df.iterrows():
+        image = {}
+        image['file'] = row['image_path']
+        image['max_detection_conf'] = round(row['max_confidence'], CONF_DIGITS)
+        src_detections = row['detections']
+        out_detections = []
+        for i_detection,detection in enumerate(src_detections):
+            # Our .csv format was ymin/xmin/ymax/xmax
+            #
+            # Our .json format is xmin/ymin/w/h
+            ymin = detection[0]
+            xmin = detection[1]
+            ymax = detection[2]
+            xmax = detection[3]
+            bbox = [xmin, ymin, xmax-xmin, ymax-ymin]
+            conf = detection[4]
+            i_class = detection[5]
+            out_detection = {}
+            out_detection['category'] = str(i_class)
+            out_detection['conf'] = conf
+            out_detection['bbox'] = bbox
+            out_detections.append(out_detection)
+        # ...for each detection
+        image['detections'] = out_detections
+        images.append(image)
+    # ...for each image
+    json_out = {}
+    json_out['info'] = info
+    json_out['detection_categories'] = detection_categories
+    json_out['classification_categories'] = classification_categories
+    json_out['images'] = images
+    write_json(output_path,json_out)
+# ...def convert_csv_to_json(...)
+#%% Interactive driver
+if False:
+    #%%
+    input_path = r'c:\temp\test.json'
+    min_confidence = None
+    output_path = input_path + '.csv'
+    convert_json_to_csv(input_path,output_path,min_confidence=min_confidence,
+                        omit_bounding_boxes=False)
+    #%%
+    base_path = r'c:\temp\json'
+    input_paths = os.listdir(base_path)
+    input_paths = [os.path.join(base_path,s) for s in input_paths]
+    min_confidence = None
+    for input_path in input_paths:
+        output_path = input_path + '.csv'
+        convert_json_to_csv(input_path,output_path,min_confidence=min_confidence,
+                            omit_bounding_boxes=True)
+    #%% Concatenate .csv files from a folder
+    import glob
+    csv_files = glob.glob(os.path.join(base_path,'*.json.csv' ))
+    master_csv = os.path.join(base_path,'all.csv')
+    print('Concatenating {} files to {}'.format(len(csv_files),master_csv))
+    header = None
+    with open(master_csv, 'w') as fout:
+        for filename in tqdm(csv_files):
+            with open(filename) as fin:
+                lines = fin.readlines()
+                if header is not None:
+                    assert lines[0] == header
+                else:
+                    header = lines[0]
+                    fout.write(header)
+                for line in lines[1:]:
+                    if len(line.strip()) == 0:
+                        continue
+                    fout.write(line)
+        # ...for each .csv file
+    # with open(master_csv)
+#%% Command-line driver
+def main():
+    """
+    Command-line driver for convert_output_format(), which converts
+    json <--> csv.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_path',type=str,
+                        help='Input filename ending in .json or .csv')
+    parser.add_argument('--output_path',type=str,default=None,
+                        help='Output filename ending in .json or .csv (defaults to ' + \
+                             'input file, with .json/.csv replaced by .csv/.json)')
+    parser.add_argument('--omit_bounding_boxes',action='store_true',
+                        help='Omit bounding box text from .csv output (large and usually not useful)')
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    if args.output_path is None:
+        if args.input_path.endswith('.csv'):
+            args.output_path = args.input_path[:-4] + '.json'
+        elif args.input_path.endswith('.json'):
+            args.output_path = args.input_path[:-5] + '.csv'
+        else:
+            raise ValueError('Illegal input file extension')
+    if args.input_path.endswith('.csv') and args.output_path.endswith('.json'):
+        assert not args.omit_bounding_boxes, \
+            '--omit_bounding_boxes does not apply to csv --> json conversion'
+        convert_csv_to_json(args.input_path,args.output_path)
+    elif args.input_path.endswith('.json') and args.output_path.endswith('.csv'):
+        convert_json_to_csv(args.input_path,args.output_path,omit_bounding_boxes=args.omit_bounding_boxes)
+    else:
+        raise ValueError('Illegal format combination')
+if __name__ == '__main__':
+    main()