PyPI - megadetector - Versions diffs - 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl - Mend

megadetector 5.0.27py3-none-any.whl → 5.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show

megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +23 -23
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -1
megadetector/data_management/camtrap_dp_to_coco.py +45 -45
megadetector/data_management/cct_json_utils.py +101 -101
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +189 -189
megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
megadetector/data_management/databases/integrity_check_json_db.py +202 -188
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +38 -38
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +130 -124
megadetector/data_management/labelme_to_yolo.py +78 -72
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +70 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
megadetector/data_management/mewc_to_md.py +339 -340
megadetector/data_management/ocr_tools.py +258 -252
megadetector/data_management/read_exif.py +232 -223
megadetector/data_management/remap_coco_categories.py +26 -26
megadetector/data_management/remove_exif.py +31 -20
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +41 -41
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +117 -120
megadetector/data_management/yolo_to_coco.py +195 -188
megadetector/detection/change_detection.py +831 -0
megadetector/detection/process_video.py +341 -338
megadetector/detection/pytorch_detector.py +308 -266
megadetector/detection/run_detector.py +186 -166
megadetector/detection/run_detector_batch.py +366 -364
megadetector/detection/run_inference_with_yolov5_val.py +328 -325
megadetector/detection/run_tiled_inference.py +312 -253
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +291 -283
megadetector/postprocessing/add_max_conf.py +15 -11
megadetector/postprocessing/categorize_detections_by_size.py +44 -44
megadetector/postprocessing/classification_postprocessing.py +808 -311
megadetector/postprocessing/combine_batch_outputs.py +20 -21
megadetector/postprocessing/compare_batch_results.py +528 -517
megadetector/postprocessing/convert_output_format.py +97 -97
megadetector/postprocessing/create_crop_folder.py +220 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -0
megadetector/postprocessing/load_api_results.py +25 -22
megadetector/postprocessing/md_to_coco.py +129 -98
megadetector/postprocessing/md_to_labelme.py +89 -83
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +87 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -302
megadetector/postprocessing/remap_detection_categories.py +36 -36
megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +33 -33
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/azure_utils.py +22 -22
megadetector/utils/ct_utils.py +1019 -200
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +541 -518
megadetector/utils/path_utils.py +1511 -406
megadetector/utils/process_utils.py +41 -41
megadetector/utils/sas_blob_utils.py +53 -49
megadetector/utils/split_locations_into_train_val.py +73 -60
megadetector/utils/string_utils.py +147 -26
megadetector/utils/url_utils.py +463 -173
megadetector/utils/wi_utils.py +2629 -2868
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +21 -21
megadetector/visualization/render_images_with_thumbnails.py +37 -73
megadetector/visualization/visualization_utils.py +424 -404
megadetector/visualization/visualize_db.py +197 -190
megadetector/visualization/visualize_detector_output.py +126 -98
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
megadetector-5.0.29.dist-info/RECORD +163 -0
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector-5.0.27.dist-info/RECORD +0 -208
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0

megadetector/postprocessing/generate_csv_report.py ADDED Viewed

@@ -0,0 +1,508 @@
+"""
+generate_csv_report.py
+Generates a .csv report from a MD-formatted .json file with the following columns:
+* filename
+* datetime (if images or EXIF information is supplied)
+* detection_category
+* max_detection_confidence
+* classification_category
+* max_classification_confidence
+* count
+One row is generated per category pair per image.  For example, these would be unique rows:
+image0001.jpg,animal,deer,4
+image0001.jpg,animal,lion,4
+image0001.jpg,animal,[none],4
+image0001.jpg,person,[none],2
+Images with no above-threshold detections will have a single row:
+image0001.jpg,empty,[none],-1
+Images with processing errors will have a single row:
+image0001.jpg,error,error_string,-1
+"""
+#%% Constants and imports
+import os
+import json
+import tempfile
+import sys
+import argparse
+import uuid
+import pandas as pd
+from copy import deepcopy
+from megadetector.utils.wi_utils import load_md_or_speciesnet_file
+from megadetector.utils.ct_utils import get_max_conf
+from megadetector.utils.ct_utils import is_list_sorted
+from megadetector.detection.run_detector import \
+    get_typical_confidence_threshold_from_results
+from megadetector.data_management.read_exif import \
+    read_exif_from_folder, ReadExifOptions, minimal_exif_tags
+default_classification_threshold = 0.3
+unknown_datetime_tag = ''
+#%% Functions
+def generate_csv_report(md_results_file,
+                        output_file=None,
+                        datetime_source=None,
+                        folder_level_columns=None,
+                        detection_confidence_threshold=None,
+                        classification_confidence_threshold=None,
+                        verbose=True):
+    """
+    Generates a .csv report from a MD-formatted .json file
+    Args:
+        md_results_file (str): MD results .json file for which we should generate a report
+        output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
+        datetime_source (str, optional): if datetime information is required, this should point to
+            a folder of images, a MD results .json file (can be the same as the input file), or
+            an exif_info.json file created with read_exif().
+        folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
+            folder in a path name) for which we should create separate columns.  Should be zero-indexed ints,
+            or a comma-delimited list of zero-indexed int-strings.
+        detection_confidence_threshold (float, optional): detections below this confidence threshold will not
+            be included in the output data.  Defaults to the recommended value based on the .json file.
+        classification_confidence_threshold (float, optional): classifications below this confidence threshold will
+            not be included in the output data (i.e., detections will be considered "animal").
+        verbose (bool, optional): enable debug output, including the progress bar,
+    Returns:
+        str: the output .csv filename
+    """
+    ##%% Load results file
+    results = load_md_or_speciesnet_file(md_results_file)
+    print('Loaded results for {} images'.format(len(results['images'])))
+    detection_category_id_to_name = results['detection_categories']
+    classification_category_id_to_name = None
+    if 'classification_categories' in results:
+        classification_category_id_to_name = results['classification_categories']
+    if output_file is None:
+        output_file = md_results_file + '.csv'
+    ##%% Read datetime information if necessary
+    filename_to_datetime_string = None
+    if datetime_source is not None:
+        all_exif_results = None
+        if os.path.isdir(datetime_source):
+            # Read EXIF info from images
+            read_exif_options = ReadExifOptions()
+            read_exif_options.tags_to_include = minimal_exif_tags
+            read_exif_options.byte_handling = 'delete'
+            exif_cache_file = os.path.join(tempfile.gettempdir(),
+                                           'md-exif-data',
+                                           str(uuid.uuid1())+'.json')
+            print('Reading EXIF datetime info from {}, writing to {}'.format(
+                datetime_source,exif_cache_file))
+            os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
+            all_exif_results = read_exif_from_folder(input_folder=datetime_source,
+                                                     output_file=exif_cache_file,
+                                                     options=read_exif_options,
+                                                     recursive=True)
+        else:
+            assert os.path.isfile(datetime_source), \
+                'datetime source {} is neither a folder nor a file'.format(datetime_source)
+            # Is this the same file we've already read?
+            # Load this, decide whether it's a MD file or an exif_info file
+            with open(datetime_source,'r') as f:
+                d = json.load(f)
+            if isinstance(d,list):
+                all_exif_results = d
+            else:
+                assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
+                assert 'images' in d,\
+                    'The datetime source you provided doesn\'t look like a valid source .json file'
+                all_exif_results = []
+                found_datetime = False
+                for im in d['images']:
+                    exif_result = {'file_name':im['file']}
+                    if 'datetime' in im:
+                        found_datetime = True
+                        exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
+                    all_exif_results.append(exif_result)
+                if not found_datetime:
+                    print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
+                          'to contain datetime information.')
+        assert all_exif_results is not None
+        filename_to_datetime_string = {}
+        for exif_result in all_exif_results:
+            datetime_string = unknown_datetime_tag
+            if ('exif_tags' in exif_result) and \
+               (exif_result['exif_tags'] is not None) and \
+               ('DateTimeOriginal' in exif_result['exif_tags']):
+                datetime_string = exif_result['exif_tags']['DateTimeOriginal']
+                if datetime_string is None:
+                    datetime_string = ''
+                else:
+                    assert isinstance(datetime_string,str), 'Unrecognized datetime format'
+            filename_to_datetime_string[exif_result['file_name']] = datetime_string
+        image_files = [im['file'] for im in results['images']]
+        image_files_set = set(image_files)
+        files_in_exif_but_not_in_results = []
+        files_in_results_but_not_in_exif = []
+        files_with_no_datetime_info = []
+        for fn in filename_to_datetime_string:
+            dts = filename_to_datetime_string[fn]
+            if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
+                files_with_no_datetime_info.append(fn)
+            if fn not in image_files_set:
+                files_in_exif_but_not_in_results.append(fn)
+        for fn in image_files_set:
+            if fn not in filename_to_datetime_string:
+                files_in_results_but_not_in_exif.append(fn)
+        print('{} files (of {}) in EXIF info not found in MD results'.format(
+            len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
+        ))
+        print('{} files (of {}) in MD results not found in MD EXIF info'.format(
+            len(files_in_results_but_not_in_exif),len(image_files_set)
+        ))
+        print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
+            len(files_with_no_datetime_info),len(filename_to_datetime_string)
+        ))
+    # ...if we need to deal with datetimes
+    ##%% Parse folder level column specifier
+    if folder_level_columns is not None:
+        if isinstance(folder_level_columns,str):
+            tokens = folder_level_columns.split(',')
+            folder_level_columns = [int(s) for s in tokens]
+        for folder_level in folder_level_columns:
+            if (not isinstance(folder_level,int)) or (folder_level < 0):
+                raise ValueError('Illegal folder level specifier {}'.format(
+                    str(folder_level_columns)))
+    ##%% Fill in default thresholds
+    if classification_confidence_threshold is None:
+        classification_confidence_threshold = default_classification_threshold
+    if detection_confidence_threshold is None:
+        detection_confidence_threshold = \
+            get_typical_confidence_threshold_from_results(results)
+    assert detection_confidence_threshold is not None
+    ##%% Fill in output records
+    output_records = []
+    # For each image
+    #
+    # im = results['images'][0]
+    for im in results['images']:
+        """
+        * filename
+        * datetime (if images or EXIF information is supplied)
+        * detection_category
+        * max_detection_confidence
+        * classification_category
+        * max_classification_confidence
+        * count
+        """
+        base_record = {}
+        base_record['filename'] = im['file'].replace('\\','/')
+        # Datetime (if necessary)
+        if filename_to_datetime_string is not None:
+            if im['file'] in filename_to_datetime_string:
+                datetime_string = filename_to_datetime_string[im['file']]
+            else:
+                datetime_string = ''
+        base_record['datetime'] = datetime_string
+        for s in ['detection_category','max_detection_confidence',
+                  'classification_category','max_classification_confidence',
+                  'count']:
+            base_record[s] = ''
+        # Folder level columns
+        tokens = im['file'].split('/')
+        if folder_level_columns is not None:
+            for folder_level in folder_level_columns:
+                folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
+                if folder_level >= len(tokens):
+                    folder_level_value = ''
+                else:
+                    folder_level_value = tokens[folder_level]
+                base_record[folder_level_column_name] = folder_level_value
+        records_this_image = []
+        # Create one output row if this image failed
+        if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
+            record = deepcopy(base_record)
+            record['detection_category'] = 'error'
+            record['classification_category'] = im['failure']
+            records_this_image.append(record)
+            assert 'detections' not in im or im['detections'] is None
+        else:
+            assert 'detections' in im and im['detections'] is not None
+            # Count above-threshold detections
+            detections_above_threshold = []
+            for det in im['detections']:
+                if det['conf'] >= detection_confidence_threshold:
+                    detections_above_threshold.append(det)
+            max_detection_conf = get_max_conf(im)
+            # Create one output row if this image is empty (i.e., has no
+            # above-threshold detections)
+            if len(detections_above_threshold) == 0:
+                record = deepcopy(base_record)
+                record['detection_category'] = 'empty'
+                record['max_detection_confidence'] = max_detection_conf
+                records_this_image.append(record)
+            # ...if this image is empty
+            else:
+                # Maps a string of the form:
+                #
+                # detection_category:classification_category
+                #
+                # ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
+                category_info_string_to_record = {}
+                for det in detections_above_threshold:
+                    assert det['conf'] >= detection_confidence_threshold
+                    detection_category_name = detection_category_id_to_name[det['category']]
+                    detection_confidence = det['conf']
+                    classification_category_name = ''
+                    classification_confidence = 0.0
+                    if ('classifications' in det) and (len(det['classifications']) > 0):
+                        # Classifications should always be sorted by confidence.  Not
+                        # technically required, but always true in practice.
+                        assert is_list_sorted([c[1] for c in det['classifications']]), \
+                            'This script does not yet support unsorted classifications'
+                        assert classification_category_id_to_name is not None, \
+                            'If classifications are present, category mappings should be present'
+                        # Only use the first classification
+                        classification = det['classifications'][0]
+                        if classification[1] >= classification_confidence_threshold:
+                            classification_category_name = \
+                                classification_category_id_to_name[classification[0]]
+                            classification_confidence = classification[1]
+                    # ...if classifications are present
+                    # E.g. "animal:rodent", or "vehicle:"
+                    category_info_string = detection_category_name + ':' + classification_category_name
+                    if category_info_string not in category_info_string_to_record:
+                        category_info_string_to_record[category_info_string] = {
+                            'max_detection_confidence':0.0,
+                            'max_classification_confidence':0.0,
+                            'count':0,
+                            'detection_category':detection_category_name,
+                            'classification_category':classification_category_name
+                        }
+                    record = category_info_string_to_record[category_info_string]
+                    record['count'] += 1
+                    if detection_confidence > record['max_detection_confidence']:
+                        record['max_detection_confidence'] = detection_confidence
+                    if classification_confidence > record['max_classification_confidence']:
+                        record['max_classification_confidence'] = classification_confidence
+                # ...for each detection
+                for record_in in category_info_string_to_record.values():
+                    assert record_in['count'] > 0
+                    record_out = deepcopy(base_record)
+                    for k in record_in.keys():
+                        assert k in record_out.keys()
+                        record_out[k] = record_in[k]
+                    records_this_image.append(record_out)
+            # ...is this empty/non-empty?
+        # ...if this image failed/didn't fail
+        # Add to [records]
+        output_records.extend(records_this_image)
+    # ...for each image
+    # Make sure every record has the same columns
+    column_names = output_records[0].keys()
+    for record in output_records:
+        assert record.keys() == column_names
+    # Write to .csv
+    df = pd.DataFrame(output_records)
+    df.to_csv(output_file,header=True,index=False)
+    # from megadetector.utils.path_utils import open_file; open_file(output_file)
+# ...generate_csv_report(...)
+# %%
+#%% Interactive driver
+if False:
+    pass
+    #%% Configure options
+    r"""
+    python run_detector_batch.py MDV5A "g:\temp\md-test-images"
+    "g:\temp\md-test-images\md_results_with_datetime.json"
+    --recursive --output_relative_filenames --include_image_timestamp --include_exif_data
+    """
+    md_results_file = 'g:/temp/csv-report-test/md-results.json'
+    datetime_source = 'g:/temp/csv-report-test/exif_data.json'
+    # datetime_source = 'g:/temp/md-test-images'
+    # datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
+    # md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
+    # md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
+    output_file = None
+    folder_level_columns = [0,1,2,3]
+    detection_confidence_threshold = None
+    classification_confidence_threshold = None
+    verbose = True
+    #%% Programmatic execution
+    generate_csv_report(md_results_file=md_results_file,
+                        output_file=output_file,
+                        datetime_source=datetime_source,
+                        folder_level_columns=folder_level_columns,
+                        detection_confidence_threshold=detection_confidence_threshold,
+                        classification_confidence_threshold=classification_confidence_threshold,
+                        verbose=verbose)
+#%% Command-line driver
+def main(): # noqa
+    parser = argparse.ArgumentParser(
+        description='Generates a .csv report from a MD-formatted .json file')
+    parser.add_argument(
+        'md_results_file',
+        type=str,
+        help='Path to MD results file (.json)')
+    parser.add_argument(
+        '--output_file',
+        type=str,
+        help='Output filename (.csv) (if omitted, will append .csv to the input file)')
+    parser.add_argument(
+        '--datetime_source',
+        type=str,
+        default=None,
+        help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
+        )
+    parser.add_argument(
+        '--folder_level_columns',
+        type=str,
+        default=None,
+        help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
+        )
+    parser.add_argument(
+        '--detection_confidence_threshold',
+        type=float,
+        default=None,
+        help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
+        )
+    parser.add_argument(
+        '--classification_confidence_threshold',
+        type=float,
+        default=None,
+        help='Classification threshold (default {})'.format(default_classification_threshold)
+        )
+    parser.add_argument(
+        '--verbose',
+        action='store_true',
+        help='Enable additional debug output'
+        )
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    generate_csv_report(md_results_file=args.md_results_file,
+                        output_file=args.output_file,
+                        datetime_source=args.datetime_source,
+                        folder_level_columns=args.folder_level_columns,
+                        detection_confidence_threshold=args.detection_confidence_threshold,
+                        classification_confidence_threshold=args.classification_confidence_threshold,
+                        verbose=args.verbose)
+if __name__ == '__main__':
+    main()

megadetector/postprocessing/load_api_results.py CHANGED Viewed

@@ -4,7 +4,7 @@ load_api_results.py
 DEPRECATED
-As of 2023.12, this module is used in postprocessing and RDE.  Not recommended
+As of 2023.12, this module is still used in postprocessing and RDE, but it's not recommended
 for new code.
 Loads the output of the batch processing API (json) into a Pandas dataframe.
@@ -18,11 +18,12 @@ Includes functions to read/write the (very very old) .csv results format.
 import json
 import os
-from typing import Dict, Mapping, Optional, Tuple
+from typing import Mapping, Optional
 import pandas as pd
 from megadetector.utils import ct_utils
+from megadetector.utils.wi_utils import load_md_or_speciesnet_file
 #%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
@@ -30,7 +31,7 @@ from megadetector.utils import ct_utils
 def load_api_results(api_output_path: str, normalize_paths: bool = True,
                      filename_replacements: Optional[Mapping[str, str]] = None,
                      force_forward_slashes: bool = True
-                     ) -> Tuple[pd.DataFrame, Dict]:
+                     ) -> tuple[pd.DataFrame, dict]:
     r"""
     Loads json-formatted MegaDetector results to a Pandas DataFrame.
@@ -47,11 +48,10 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
         detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
         other_fields: a dict containing fields in the results other than 'images'
     """
     print('Loading results from {}'.format(api_output_path))
-    with open(api_output_path) as f:
-        detection_results = json.load(f)
+    detection_results = load_md_or_speciesnet_file(api_output_path)
     # Validate that this is really a detector output file
     for s in ['info', 'detection_categories', 'images']:
@@ -65,12 +65,12 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
     if normalize_paths:
         for image in detection_results['images']:
-            image['file'] = os.path.normpath(image['file'])
+            image['file'] = os.path.normpath(image['file'])
     if force_forward_slashes:
         for image in detection_results['images']:
             image['file'] = image['file'].replace('\\','/')
     # Replace some path tokens to match local paths to original blob structure
     if filename_replacements is not None:
         for string_to_replace in filename_replacements.keys():
@@ -79,16 +79,16 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
                 im['file'] = im['file'].replace(string_to_replace,replacement_string)
     print('Converting results to dataframe')
     # If this is a newer file that doesn't include maximum detection confidence values,
     # add them, because our unofficial internal dataframe format includes this.
     for im in detection_results['images']:
         if 'max_detection_conf' not in im:
             im['max_detection_conf'] = ct_utils.get_max_conf(im)
     # Pack the json output into a Pandas DataFrame
     detection_results = pd.DataFrame(detection_results['images'])
     print('Finished loading MegaDetector results for {} images from {}'.format(
             len(detection_results),api_output_path))
@@ -111,7 +111,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
         if 'failure' in im and im['failure'] is None:
             del im['failure']
     fields['images'] = images
     # Convert the 'version' field back to a string as per format convention
     try:
         version = other_fields['info']['format_version']
@@ -120,7 +120,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
     except Exception:
         print('Warning: error determining format version')
         pass
     # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
     try:
         version = other_fields['info']['format_version']
@@ -132,20 +132,23 @@ def write_api_results(detection_results_table, other_fields, out_path):
     except Exception:
         print('Warning: error removing max_detection_conf from output')
         pass
     with open(out_path, 'w') as f:
         json.dump(fields, f, indent=1)
     print('Finished writing detection results to {}'.format(out_path))
-def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
+def load_api_results_csv(filename, normalize_paths=True, filename_replacements=None, nrows=None):
     """
     [DEPRECATED]
     Loads .csv-formatted MegaDetector results to a pandas table
     """
+    if filename_replacements is None:
+        filename_replacements = {}
     print('Loading MegaDetector results from {}'.format(filename))
     detection_results = pd.read_csv(filename,nrows=nrows)
@@ -169,12 +172,12 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
         replacement_string = filename_replacements[string_to_replace]
-        # iRow = 0
-        for iRow in range(0,len(detection_results)):
-            row = detection_results.iloc[iRow]
+        # i_row = 0
+        for i_row in range(0,len(detection_results)):
+            row = detection_results.iloc[i_row]
             fn = row['image_path']
             fn = fn.replace(string_to_replace,replacement_string)
-            detection_results.at[iRow,'image_path'] = fn
+            detection_results.at[i_row,'image_path'] = fn
     print('Finished loading and de-serializing MD results for {} images from {}'.format(
         len(detection_results),filename))
@@ -183,9 +186,9 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
 def write_api_results_csv(detection_results, filename):
-    """
+    """
     [DEPRECATED]
     Writes a Pandas table to csv in a way that's compatible with the .csv output
     format.  Currently just a wrapper around to_csv that forces output writing
     to go through a common code path.

megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.27py3-none-any.whl → 5.0.29py3-none-any.whl