PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 5.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show

megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +23 -23
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -1
megadetector/data_management/camtrap_dp_to_coco.py +45 -45
megadetector/data_management/cct_json_utils.py +101 -101
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +189 -189
megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
megadetector/data_management/databases/integrity_check_json_db.py +202 -188
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +38 -38
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +130 -124
megadetector/data_management/labelme_to_yolo.py +78 -72
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +70 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
megadetector/data_management/mewc_to_md.py +339 -340
megadetector/data_management/ocr_tools.py +258 -252
megadetector/data_management/read_exif.py +231 -224
megadetector/data_management/remap_coco_categories.py +26 -26
megadetector/data_management/remove_exif.py +31 -20
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +41 -41
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +117 -120
megadetector/data_management/yolo_to_coco.py +195 -188
megadetector/detection/change_detection.py +831 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +304 -262
megadetector/detection/run_detector.py +177 -164
megadetector/detection/run_detector_batch.py +364 -363
megadetector/detection/run_inference_with_yolov5_val.py +328 -325
megadetector/detection/run_tiled_inference.py +256 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +290 -282
megadetector/postprocessing/add_max_conf.py +15 -11
megadetector/postprocessing/categorize_detections_by_size.py +44 -44
megadetector/postprocessing/classification_postprocessing.py +415 -415
megadetector/postprocessing/combine_batch_outputs.py +20 -21
megadetector/postprocessing/compare_batch_results.py +528 -517
megadetector/postprocessing/convert_output_format.py +97 -97
megadetector/postprocessing/create_crop_folder.py +219 -146
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +23 -20
megadetector/postprocessing/md_to_coco.py +129 -98
megadetector/postprocessing/md_to_labelme.py +89 -83
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +87 -114
megadetector/postprocessing/postprocess_batch_results.py +313 -298
megadetector/postprocessing/remap_detection_categories.py +36 -36
megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +33 -33
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/azure_utils.py +22 -22
megadetector/utils/ct_utils.py +1018 -200
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +541 -518
megadetector/utils/path_utils.py +1457 -398
megadetector/utils/process_utils.py +41 -41
megadetector/utils/sas_blob_utils.py +53 -49
megadetector/utils/split_locations_into_train_val.py +61 -61
megadetector/utils/string_utils.py +147 -26
megadetector/utils/url_utils.py +463 -173
megadetector/utils/wi_utils.py +2629 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +21 -21
megadetector/visualization/render_images_with_thumbnails.py +37 -73
megadetector/visualization/visualization_utils.py +401 -397
megadetector/visualization/visualize_db.py +197 -190
megadetector/visualization/visualize_detector_output.py +79 -73
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
megadetector-5.0.29.dist-info/RECORD +163 -0
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector-5.0.28.dist-info/RECORD +0 -209

megadetector/postprocessing/generate_csv_report.py CHANGED Viewed

@@ -1,499 +1,508 @@
-"""
-generate_csv_report.py
-Generates a .csv report from a MD-formatted .json file with the following columns:
-* filename
-* datetime (if images or EXIF information is supplied)
-* detection_category
-* max_detection_confidence
-* classification_category
-* max_classification_confidence
-* count
-One row is generated per category pair per image.  For example, these would be unique rows:
-image0001.jpg,animal,deer,4
-image0001.jpg,animal,lion,4
-image0001.jpg,animal,[none],4
-image0001.jpg,person,[none],2
-Images with no above-threshold detections will have a single row:
-image0001.jpg,empty,[none],-1
-Images with processing errors will have a single row:
-image0001.jpg,error,error_string,-1
-"""
-#%% Constants and imports
-import os
-import json
-import tempfile
-import uuid
-import pandas as pd
-from copy import deepcopy
-from megadetector.utils.wi_utils import load_md_or_speciesnet_file
-from megadetector.utils.ct_utils import get_max_conf
-from megadetector.utils.ct_utils import is_list_sorted
-from megadetector.detection.run_detector import \
-    get_typical_confidence_threshold_from_results
-from megadetector.data_management.read_exif import \
-    read_exif_from_folder, ReadExifOptions, minimal_exif_tags
-default_classification_threshold = 0.3
-unknown_datetime_tag = ''
-#%% Functions
-def generate_csv_report(md_results_file,
-                        output_file=None,
-                        datetime_source=None,
-                        folder_level_columns=None,
-                        detection_confidence_threshold=None,
-                        classification_confidence_threshold=None,
-                        verbose=True):
-    """
-    Generates a .csv report from a MD-formatted .json file
-    Args:
-        md_results_file (str): MD results .json file for which we should generate a report
-        output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
-        datetime_source (str, optional): if datetime information is required, this should point to
-            a folder of images, a MD results .json file (can be the same as the input file), or
-            an exif_info.json file created with read_exif().
-        folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
-            folder in a path name) for which we should create separate columns.  Should be zero-indexed ints,
-            or a comma-delimited list of zero-indexed int-strings.
-        detection_confidence_threshold (float, optional): detections below this confidence threshold will not
-            be included in the output data.  Defaults to the recommended value based on the .json file.
-        classification_confidence_threshold (float, optional): classifications below this confidence threshold will
-            not be included in the output data (i.e., detections will be considered "animal").
-        verbose (bool, optional): enable debug output, including the progress bar,
-    """
-    ##%% Load results file
-    results = load_md_or_speciesnet_file(md_results_file)
-    print('Loaded results for {} images'.format(len(results['images'])))
-    detection_category_id_to_name = results['detection_categories']
-    classification_category_id_to_name = None
-    if 'classification_categories' in results:
-        classification_category_id_to_name = results['classification_categories']
-    if output_file is None:
-        output_file = md_results_file + '.csv'
-    ##%% Read datetime information if necessary
-    filename_to_datetime_string = None
-    if datetime_source is not None:
-        all_exif_results = None
-        if os.path.isdir(datetime_source):
-            # Read EXIF info from images
-            read_exif_options = ReadExifOptions()
-            read_exif_options.tags_to_include = minimal_exif_tags
-            read_exif_options.byte_handling = 'delete'
-            exif_cache_file = os.path.join(tempfile.gettempdir(),
-                                           'md-exif-data',
-                                           str(uuid.uuid1())+'.json')
-            print('Reading EXIF datetime info from {}, writing to {}'.format(
-                datetime_source,exif_cache_file))
-            os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
-            all_exif_results = read_exif_from_folder(input_folder=datetime_source,
-                                                     output_file=exif_cache_file,
-                                                     options=read_exif_options,
-                                                     recursive=True)
-        else:
-            assert os.path.isfile(datetime_source), \
-                'datetime source {} is neither a folder nor a file'.format(datetime_source)
-            # Is this the same file we've already read?
-            # Load this, decide whether it's a MD file or an exif_info file
-            with open(datetime_source,'r') as f:
-                d = json.load(f)
-            if isinstance(d,list):
-                all_exif_results = d
-            else:
-                assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
-                assert 'images' in d,\
-                    'The datetime source you provided doesn\'t look like a valid source .json file'
-                all_exif_results = []
-                found_datetime = False
-                for im in d['images']:
-                    exif_result = {'file_name':im['file']}
-                    if 'datetime' in im:
-                        found_datetime = True
-                        exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
-                    all_exif_results.append(exif_result)
-                if not found_datetime:
-                    print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
-                          'to contain datetime information.')
-        assert all_exif_results is not None
-        filename_to_datetime_string = {}
-        for exif_result in all_exif_results:
-            datetime_string = unknown_datetime_tag
-            if ('exif_tags' in exif_result) and \
-               (exif_result['exif_tags'] is not None) and \
-               ('DateTimeOriginal' in exif_result['exif_tags']):
-                datetime_string = exif_result['exif_tags']['DateTimeOriginal']
-                if datetime_string is None:
-                    datetime_string = ''
-                else:
-                    assert isinstance(datetime_string,str), 'Unrecognized datetime format'
-            filename_to_datetime_string[exif_result['file_name']] = datetime_string
-        image_files = [im['file'] for im in results['images']]
-        image_files_set = set(image_files)
-        files_in_exif_but_not_in_results = []
-        files_in_results_but_not_in_exif = []
-        files_with_no_datetime_info = []
-        for fn in filename_to_datetime_string:
-            dts = filename_to_datetime_string[fn]
-            if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
-                files_with_no_datetime_info.append(fn)
-            if fn not in image_files_set:
-                files_in_exif_but_not_in_results.append(fn)
-        for fn in image_files_set:
-            if fn not in filename_to_datetime_string:
-                files_in_results_but_not_in_exif.append(fn)
-        print('{} files (of {}) in EXIF info not found in MD results'.format(
-            len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
-        ))
-        print('{} files (of {}) in MD results not found in MD EXIF info'.format(
-            len(files_in_results_but_not_in_exif),len(image_files_set)
-        ))
-        print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
-            len(files_with_no_datetime_info),len(filename_to_datetime_string)
-        ))
-    # ...if we need to deal with datetimes
-    ##%% Parse folder level column specifier
-    if folder_level_columns is not None:
-        if isinstance(folder_level_columns,str):
-            tokens = folder_level_columns.split(',')
-            folder_level_columns = [int(s) for s in tokens]
-        for folder_level in folder_level_columns:
-            if (not isinstance(folder_level,int)) or (folder_level < 0):
-                raise ValueError('Illegal folder level specifier {}'.format(
-                    str(folder_level_columns)))
-    ##%% Fill in default thresholds
-    if classification_confidence_threshold is None:
-        classification_confidence_threshold = default_classification_threshold
-    if detection_confidence_threshold is None:
-        detection_confidence_threshold = \
-            get_typical_confidence_threshold_from_results(results)
-    assert detection_confidence_threshold is not None
-    ##%% Fill in output records
-    output_records = []
-    # For each image
-    #
-    # im = results['images'][0]
-    for im in results['images']:
-        """
-        * filename
-        * datetime (if images or EXIF information is supplied)
-        * detection_category
-        * max_detection_confidence
-        * classification_category
-        * max_classification_confidence
-        * count
-        """
-        base_record = {}
-        base_record['filename'] = im['file'].replace('\\','/')
-        # Datetime (if necessary)
-        if filename_to_datetime_string is not None:
-            if im['file'] in filename_to_datetime_string:
-                datetime_string = filename_to_datetime_string[im['file']]
-            else:
-                datetime_string = ''
-        base_record['datetime'] = datetime_string
-        for s in ['detection_category','max_detection_confidence',
-                  'classification_category','max_classification_confidence',
-                  'count']:
-            base_record[s] = ''
-        # Folder level columns
-        tokens = im['file'].split('/')
-        for folder_level in folder_level_columns:
-            folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
-            if folder_level >= len(tokens):
-                folder_level_value = ''
-            else:
-                folder_level_value = tokens[folder_level]
-            base_record[folder_level_column_name] = folder_level_value
-        records_this_image = []
-        # Create one output row if this image failed
-        if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
-            record = deepcopy(base_record)
-            record['detection_category'] = 'error'
-            record['classification_category'] = im['failure']
-            records_this_image.append(record)
-            assert 'detections' not in im or im['detections'] is None
-        else:
-            assert 'detections' in im and im['detections'] is not None
-            # Count above-threshold detections
-            detections_above_threshold = []
-            for det in im['detections']:
-                if det['conf'] >= detection_confidence_threshold:
-                    detections_above_threshold.append(det)
-            max_detection_conf = get_max_conf(im)
-            # Create one output row if this image is empty (i.e., has no
-            # above-threshold detections)
-            if len(detections_above_threshold) == 0:
-                record = deepcopy(base_record)
-                record['detection_category'] = 'empty'
-                record['max_detection_confidence'] = max_detection_conf
-                records_this_image.append(record)
-            # ...if this image is empty
-            else:
-                # Maps a string of the form:
-                #
-                # detection_category:classification_category
-                #
-                # ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
-                category_info_string_to_record = {}
-                for det in detections_above_threshold:
-                    assert det['conf'] >= detection_confidence_threshold
-                    detection_category_name = detection_category_id_to_name[det['category']]
-                    detection_confidence = det['conf']
-                    classification_category_name = ''
-                    classificaition_confidence = 0.0
-                    if ('classifications' in det) and (len(det['classifications']) > 0):
-                        # Classifications should always be sorted by confidence.  Not
-                        # technically required, but always true in practice.
-                        assert is_list_sorted([c[1] for c in det['classifications']]), \
-                            'This script does not yet support unsorted classifications'
-                        assert classification_category_id_to_name is not None, \
-                            'If classifications are present, category mappings should be present'
-                        # Only use the first classification
-                        classification = det['classifications'][0]
-                        if classification[1] >= classification_confidence_threshold:
-                            classification_category_name = \
-                                classification_category_id_to_name[classification[0]]
-                            classification_confidence = classification[1]
-                    # ...if classifications are present
-                    # E.g. "animal:rodent", or "vehicle:"
-                    category_info_string = detection_category_name + ':' + classification_category_name
-                    if category_info_string not in category_info_string_to_record:
-                        category_info_string_to_record[category_info_string] = {
-                            'max_detection_confidence':0.0,
-                            'max_classification_confidence':0.0,
-                            'count':0,
-                            'detection_category':detection_category_name,
-                            'classification_category':classification_category_name
-                        }
-                    record = category_info_string_to_record[category_info_string]
-                    record['count'] += 1
-                    if detection_confidence > record['max_detection_confidence']:
-                        record['max_detection_confidence'] = detection_confidence
-                    if classification_confidence > record['max_classification_confidence']:
-                        record['max_classification_confidence'] = classification_confidence
-                # ...for each detection
-                for record_in in category_info_string_to_record.values():
-                    assert record_in['count'] > 0
-                    record_out = deepcopy(base_record)
-                    for k in record_in.keys():
-                        assert k in record_out.keys()
-                        record_out[k] = record_in[k]
-                    records_this_image.append(record_out)
-            # ...is this empty/non-empty?
-        # ...if this image failed/didn't fail
-        # Add to [records]
-        output_records.extend(records_this_image)
-    # ...for each image
-    # Make sure every record has the same columns
-    column_names = output_records[0].keys()
-    for record in output_records:
-        assert record.keys() == column_names
-    # Write to .csv
-    df = pd.DataFrame(output_records)
-    df.to_csv(output_file,header=True,index=False)
-    # from megadetector.utils.path_utils import open_file; open_file(output_file)
-# ...generate_csv_report(...)
-#%% Interactive driver
-if False:
-    pass
-    #%% Configure options
-    """
-    python run_detector_batch.py MDV5A "g:\temp\md-test-images" "g:\temp\md-test-images\md_results_with_datetime.json" --recursive --output_relative_filenames --include_image_timestamp --include_exif_data
-    """
-    md_results_file = 'g:/temp/csv-report-test/md-results.json'
-    datetime_source = 'g:/temp/csv-report-test/exif_data.json'
-    # datetime_source = 'g:/temp/md-test-images'
-    # datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
-    # md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
-    # md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
-    output_file = None
-    folder_level_columns = [0,1,2,3]
-    detection_confidence_threshold = None
-    classification_confidence_threshold = None
-    verbose = True
-    #%% Programmatic execution
-    generate_csv_report(md_results_file=md_results_file,
-                        output_file=output_file,
-                        datetime_source=datetime_source,
-                        folder_level_columns=folder_level_columns,
-                        detection_confidence_threshold=detection_confidence_threshold,
-                        classification_confidence_threshold=classification_confidence_threshold,
-                        verbose=verbose)
-#%% Command-line driver
-import sys,argparse
-def main():
-    parser = argparse.ArgumentParser(
-        description='Generates a .csv report from a MD-formatted .json file')
-    parser.add_argument(
-        'md_results_file',
-        type=str,
-        help='Path to MD results file (.json)')
-    parser.add_argument(
-        '--output_file',
-        type=str,
-        help='Output filename (.csv) (if omitted, will append .csv to the input file)')
-    parser.add_argument(
-        '--datetime_source',
-        type=str,
-        default=None,
-        help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
-        )
-    parser.add_argument(
-        '--folder_level_columns',
-        type=str,
-        default=None,
-        help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
-        )
-    parser.add_argument(
-        '--detection_confidence_threshold',
-        type=float,
-        default=None,
-        help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
-        )
-    parser.add_argument(
-        '--classification_confidence_threshold',
-        type=float,
-        default=None,
-        help='Classification threshold (default {})'.format(default_classification_threshold)
-        )
-    parser.add_argument(
-        '--verbose',
-        action='store_true',
-        help='Enable additional debug output'
-        )
-    if len(sys.argv[1:]) == 0:
-        parser.print_help()
-        parser.exit()
-    args = parser.parse_args()
-    generate_csv_report(md_results_file=args.md_results_file,
-                        output_file=args.output_file,
-                        datetime_source=args.datetime_source,
-                        folder_level_columns=args.folder_level_columns,
-                        detection_confidence_threshold=args.detection_confidence_threshold,
-                        classification_confidence_threshold=args.classification_confidence_threshold,
-                        verbose=args.verbose)
-if __name__ == '__main__':
-    main()
+"""
+generate_csv_report.py
+Generates a .csv report from a MD-formatted .json file with the following columns:
+* filename
+* datetime (if images or EXIF information is supplied)
+* detection_category
+* max_detection_confidence
+* classification_category
+* max_classification_confidence
+* count
+One row is generated per category pair per image.  For example, these would be unique rows:
+image0001.jpg,animal,deer,4
+image0001.jpg,animal,lion,4
+image0001.jpg,animal,[none],4
+image0001.jpg,person,[none],2
+Images with no above-threshold detections will have a single row:
+image0001.jpg,empty,[none],-1
+Images with processing errors will have a single row:
+image0001.jpg,error,error_string,-1
+"""
+#%% Constants and imports
+import os
+import json
+import tempfile
+import sys
+import argparse
+import uuid
+import pandas as pd
+from copy import deepcopy
+from megadetector.utils.wi_utils import load_md_or_speciesnet_file
+from megadetector.utils.ct_utils import get_max_conf
+from megadetector.utils.ct_utils import is_list_sorted
+from megadetector.detection.run_detector import \
+    get_typical_confidence_threshold_from_results
+from megadetector.data_management.read_exif import \
+    read_exif_from_folder, ReadExifOptions, minimal_exif_tags
+default_classification_threshold = 0.3
+unknown_datetime_tag = ''
+#%% Functions
+def generate_csv_report(md_results_file,
+                        output_file=None,
+                        datetime_source=None,
+                        folder_level_columns=None,
+                        detection_confidence_threshold=None,
+                        classification_confidence_threshold=None,
+                        verbose=True):
+    """
+    Generates a .csv report from a MD-formatted .json file
+    Args:
+        md_results_file (str): MD results .json file for which we should generate a report
+        output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
+        datetime_source (str, optional): if datetime information is required, this should point to
+            a folder of images, a MD results .json file (can be the same as the input file), or
+            an exif_info.json file created with read_exif().
+        folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
+            folder in a path name) for which we should create separate columns.  Should be zero-indexed ints,
+            or a comma-delimited list of zero-indexed int-strings.
+        detection_confidence_threshold (float, optional): detections below this confidence threshold will not
+            be included in the output data.  Defaults to the recommended value based on the .json file.
+        classification_confidence_threshold (float, optional): classifications below this confidence threshold will
+            not be included in the output data (i.e., detections will be considered "animal").
+        verbose (bool, optional): enable debug output, including the progress bar,
+    Returns:
+        str: the output .csv filename
+    """
+    ##%% Load results file
+    results = load_md_or_speciesnet_file(md_results_file)
+    print('Loaded results for {} images'.format(len(results['images'])))
+    detection_category_id_to_name = results['detection_categories']
+    classification_category_id_to_name = None
+    if 'classification_categories' in results:
+        classification_category_id_to_name = results['classification_categories']
+    if output_file is None:
+        output_file = md_results_file + '.csv'
+    ##%% Read datetime information if necessary
+    filename_to_datetime_string = None
+    if datetime_source is not None:
+        all_exif_results = None
+        if os.path.isdir(datetime_source):
+            # Read EXIF info from images
+            read_exif_options = ReadExifOptions()
+            read_exif_options.tags_to_include = minimal_exif_tags
+            read_exif_options.byte_handling = 'delete'
+            exif_cache_file = os.path.join(tempfile.gettempdir(),
+                                           'md-exif-data',
+                                           str(uuid.uuid1())+'.json')
+            print('Reading EXIF datetime info from {}, writing to {}'.format(
+                datetime_source,exif_cache_file))
+            os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
+            all_exif_results = read_exif_from_folder(input_folder=datetime_source,
+                                                     output_file=exif_cache_file,
+                                                     options=read_exif_options,
+                                                     recursive=True)
+        else:
+            assert os.path.isfile(datetime_source), \
+                'datetime source {} is neither a folder nor a file'.format(datetime_source)
+            # Is this the same file we've already read?
+            # Load this, decide whether it's a MD file or an exif_info file
+            with open(datetime_source,'r') as f:
+                d = json.load(f)
+            if isinstance(d,list):
+                all_exif_results = d
+            else:
+                assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
+                assert 'images' in d,\
+                    'The datetime source you provided doesn\'t look like a valid source .json file'
+                all_exif_results = []
+                found_datetime = False
+                for im in d['images']:
+                    exif_result = {'file_name':im['file']}
+                    if 'datetime' in im:
+                        found_datetime = True
+                        exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
+                    all_exif_results.append(exif_result)
+                if not found_datetime:
+                    print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
+                          'to contain datetime information.')
+        assert all_exif_results is not None
+        filename_to_datetime_string = {}
+        for exif_result in all_exif_results:
+            datetime_string = unknown_datetime_tag
+            if ('exif_tags' in exif_result) and \
+               (exif_result['exif_tags'] is not None) and \
+               ('DateTimeOriginal' in exif_result['exif_tags']):
+                datetime_string = exif_result['exif_tags']['DateTimeOriginal']
+                if datetime_string is None:
+                    datetime_string = ''
+                else:
+                    assert isinstance(datetime_string,str), 'Unrecognized datetime format'
+            filename_to_datetime_string[exif_result['file_name']] = datetime_string
+        image_files = [im['file'] for im in results['images']]
+        image_files_set = set(image_files)
+        files_in_exif_but_not_in_results = []
+        files_in_results_but_not_in_exif = []
+        files_with_no_datetime_info = []
+        for fn in filename_to_datetime_string:
+            dts = filename_to_datetime_string[fn]
+            if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
+                files_with_no_datetime_info.append(fn)
+            if fn not in image_files_set:
+                files_in_exif_but_not_in_results.append(fn)
+        for fn in image_files_set:
+            if fn not in filename_to_datetime_string:
+                files_in_results_but_not_in_exif.append(fn)
+        print('{} files (of {}) in EXIF info not found in MD results'.format(
+            len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
+        ))
+        print('{} files (of {}) in MD results not found in MD EXIF info'.format(
+            len(files_in_results_but_not_in_exif),len(image_files_set)
+        ))
+        print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
+            len(files_with_no_datetime_info),len(filename_to_datetime_string)
+        ))
+    # ...if we need to deal with datetimes
+    ##%% Parse folder level column specifier
+    if folder_level_columns is not None:
+        if isinstance(folder_level_columns,str):
+            tokens = folder_level_columns.split(',')
+            folder_level_columns = [int(s) for s in tokens]
+        for folder_level in folder_level_columns:
+            if (not isinstance(folder_level,int)) or (folder_level < 0):
+                raise ValueError('Illegal folder level specifier {}'.format(
+                    str(folder_level_columns)))
+    ##%% Fill in default thresholds
+    if classification_confidence_threshold is None:
+        classification_confidence_threshold = default_classification_threshold
+    if detection_confidence_threshold is None:
+        detection_confidence_threshold = \
+            get_typical_confidence_threshold_from_results(results)
+    assert detection_confidence_threshold is not None
+    ##%% Fill in output records
+    output_records = []
+    # For each image
+    #
+    # im = results['images'][0]
+    for im in results['images']:
+        """
+        * filename
+        * datetime (if images or EXIF information is supplied)
+        * detection_category
+        * max_detection_confidence
+        * classification_category
+        * max_classification_confidence
+        * count
+        """
+        base_record = {}
+        base_record['filename'] = im['file'].replace('\\','/')
+        # Datetime (if necessary)
+        if filename_to_datetime_string is not None:
+            if im['file'] in filename_to_datetime_string:
+                datetime_string = filename_to_datetime_string[im['file']]
+            else:
+                datetime_string = ''
+        base_record['datetime'] = datetime_string
+        for s in ['detection_category','max_detection_confidence',
+                  'classification_category','max_classification_confidence',
+                  'count']:
+            base_record[s] = ''
+        # Folder level columns
+        tokens = im['file'].split('/')
+        if folder_level_columns is not None:
+            for folder_level in folder_level_columns:
+                folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
+                if folder_level >= len(tokens):
+                    folder_level_value = ''
+                else:
+                    folder_level_value = tokens[folder_level]
+                base_record[folder_level_column_name] = folder_level_value
+        records_this_image = []
+        # Create one output row if this image failed
+        if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
+            record = deepcopy(base_record)
+            record['detection_category'] = 'error'
+            record['classification_category'] = im['failure']
+            records_this_image.append(record)
+            assert 'detections' not in im or im['detections'] is None
+        else:
+            assert 'detections' in im and im['detections'] is not None
+            # Count above-threshold detections
+            detections_above_threshold = []
+            for det in im['detections']:
+                if det['conf'] >= detection_confidence_threshold:
+                    detections_above_threshold.append(det)
+            max_detection_conf = get_max_conf(im)
+            # Create one output row if this image is empty (i.e., has no
+            # above-threshold detections)
+            if len(detections_above_threshold) == 0:
+                record = deepcopy(base_record)
+                record['detection_category'] = 'empty'
+                record['max_detection_confidence'] = max_detection_conf
+                records_this_image.append(record)
+            # ...if this image is empty
+            else:
+                # Maps a string of the form:
+                #
+                # detection_category:classification_category
+                #
+                # ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
+                category_info_string_to_record = {}
+                for det in detections_above_threshold:
+                    assert det['conf'] >= detection_confidence_threshold
+                    detection_category_name = detection_category_id_to_name[det['category']]
+                    detection_confidence = det['conf']
+                    classification_category_name = ''
+                    classification_confidence = 0.0
+                    if ('classifications' in det) and (len(det['classifications']) > 0):
+                        # Classifications should always be sorted by confidence.  Not
+                        # technically required, but always true in practice.
+                        assert is_list_sorted([c[1] for c in det['classifications']]), \
+                            'This script does not yet support unsorted classifications'
+                        assert classification_category_id_to_name is not None, \
+                            'If classifications are present, category mappings should be present'
+                        # Only use the first classification
+                        classification = det['classifications'][0]
+                        if classification[1] >= classification_confidence_threshold:
+                            classification_category_name = \
+                                classification_category_id_to_name[classification[0]]
+                            classification_confidence = classification[1]
+                    # ...if classifications are present
+                    # E.g. "animal:rodent", or "vehicle:"
+                    category_info_string = detection_category_name + ':' + classification_category_name
+                    if category_info_string not in category_info_string_to_record:
+                        category_info_string_to_record[category_info_string] = {
+                            'max_detection_confidence':0.0,
+                            'max_classification_confidence':0.0,
+                            'count':0,
+                            'detection_category':detection_category_name,
+                            'classification_category':classification_category_name
+                        }
+                    record = category_info_string_to_record[category_info_string]
+                    record['count'] += 1
+                    if detection_confidence > record['max_detection_confidence']:
+                        record['max_detection_confidence'] = detection_confidence
+                    if classification_confidence > record['max_classification_confidence']:
+                        record['max_classification_confidence'] = classification_confidence
+                # ...for each detection
+                for record_in in category_info_string_to_record.values():
+                    assert record_in['count'] > 0
+                    record_out = deepcopy(base_record)
+                    for k in record_in.keys():
+                        assert k in record_out.keys()
+                        record_out[k] = record_in[k]
+                    records_this_image.append(record_out)
+            # ...is this empty/non-empty?
+        # ...if this image failed/didn't fail
+        # Add to [records]
+        output_records.extend(records_this_image)
+    # ...for each image
+    # Make sure every record has the same columns
+    column_names = output_records[0].keys()
+    for record in output_records:
+        assert record.keys() == column_names
+    # Write to .csv
+    df = pd.DataFrame(output_records)
+    df.to_csv(output_file,header=True,index=False)
+    # from megadetector.utils.path_utils import open_file; open_file(output_file)
+# ...generate_csv_report(...)
+# %%
+#%% Interactive driver
+if False:
+    pass
+    #%% Configure options
+    r"""
+    python run_detector_batch.py MDV5A "g:\temp\md-test-images"
+    "g:\temp\md-test-images\md_results_with_datetime.json"
+    --recursive --output_relative_filenames --include_image_timestamp --include_exif_data
+    """
+    md_results_file = 'g:/temp/csv-report-test/md-results.json'
+    datetime_source = 'g:/temp/csv-report-test/exif_data.json'
+    # datetime_source = 'g:/temp/md-test-images'
+    # datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
+    # md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
+    # md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
+    output_file = None
+    folder_level_columns = [0,1,2,3]
+    detection_confidence_threshold = None
+    classification_confidence_threshold = None
+    verbose = True
+    #%% Programmatic execution
+    generate_csv_report(md_results_file=md_results_file,
+                        output_file=output_file,
+                        datetime_source=datetime_source,
+                        folder_level_columns=folder_level_columns,
+                        detection_confidence_threshold=detection_confidence_threshold,
+                        classification_confidence_threshold=classification_confidence_threshold,
+                        verbose=verbose)
+#%% Command-line driver
+def main(): # noqa
+    parser = argparse.ArgumentParser(
+        description='Generates a .csv report from a MD-formatted .json file')
+    parser.add_argument(
+        'md_results_file',
+        type=str,
+        help='Path to MD results file (.json)')
+    parser.add_argument(
+        '--output_file',
+        type=str,
+        help='Output filename (.csv) (if omitted, will append .csv to the input file)')
+    parser.add_argument(
+        '--datetime_source',
+        type=str,
+        default=None,
+        help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
+        )
+    parser.add_argument(
+        '--folder_level_columns',
+        type=str,
+        default=None,
+        help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
+        )
+    parser.add_argument(
+        '--detection_confidence_threshold',
+        type=float,
+        default=None,
+        help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
+        )
+    parser.add_argument(
+        '--classification_confidence_threshold',
+        type=float,
+        default=None,
+        help='Classification threshold (default {})'.format(default_classification_threshold)
+        )
+    parser.add_argument(
+        '--verbose',
+        action='store_true',
+        help='Enable additional debug output'
+        )
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    generate_csv_report(md_results_file=args.md_results_file,
+                        output_file=args.output_file,
+                        datetime_source=args.datetime_source,
+                        folder_level_columns=args.folder_level_columns,
+                        detection_confidence_threshold=args.detection_confidence_threshold,
+                        classification_confidence_threshold=args.classification_confidence_threshold,
+                        verbose=args.verbose)
+if __name__ == '__main__':
+    main()

megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 5.0.29py3-none-any.whl