megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
generate_csv_report.py
|
|
4
|
+
|
|
5
|
+
Generates a .csv report from a MD-formatted .json file with the following columns:
|
|
6
|
+
|
|
7
|
+
* filename
|
|
8
|
+
* datetime (if images or EXIF information is supplied)
|
|
9
|
+
* detection_category
|
|
10
|
+
* max_detection_confidence
|
|
11
|
+
* classification_category
|
|
12
|
+
* max_classification_confidence
|
|
13
|
+
* count
|
|
14
|
+
|
|
15
|
+
One row is generated per category pair per image. For example, these would be unique rows:
|
|
16
|
+
|
|
17
|
+
image0001.jpg,animal,deer,4
|
|
18
|
+
image0001.jpg,animal,lion,4
|
|
19
|
+
image0001.jpg,animal,[none],4
|
|
20
|
+
image0001.jpg,person,[none],2
|
|
21
|
+
|
|
22
|
+
Images with no above-threshold detections will have a single row:
|
|
23
|
+
|
|
24
|
+
image0001.jpg,empty,[none],-1
|
|
25
|
+
|
|
26
|
+
Images with processing errors will have a single row:
|
|
27
|
+
|
|
28
|
+
image0001.jpg,error,error_string,-1
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
#%% Constants and imports
|
|
33
|
+
|
|
34
|
+
import os
|
|
35
|
+
import json
|
|
36
|
+
import tempfile
|
|
37
|
+
import sys
|
|
38
|
+
import argparse
|
|
39
|
+
import uuid
|
|
40
|
+
|
|
41
|
+
import pandas as pd
|
|
42
|
+
|
|
43
|
+
from copy import deepcopy
|
|
44
|
+
|
|
45
|
+
from megadetector.utils.wi_utils import load_md_or_speciesnet_file
|
|
46
|
+
from megadetector.utils.ct_utils import get_max_conf
|
|
47
|
+
from megadetector.utils.ct_utils import is_list_sorted
|
|
48
|
+
from megadetector.detection.run_detector import \
|
|
49
|
+
get_typical_confidence_threshold_from_results
|
|
50
|
+
from megadetector.data_management.read_exif import \
|
|
51
|
+
read_exif_from_folder, ReadExifOptions, minimal_exif_tags
|
|
52
|
+
|
|
53
|
+
default_classification_threshold = 0.3
|
|
54
|
+
unknown_datetime_tag = ''
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
#%% Functions
|
|
58
|
+
|
|
59
|
+
def generate_csv_report(md_results_file,
|
|
60
|
+
output_file=None,
|
|
61
|
+
datetime_source=None,
|
|
62
|
+
folder_level_columns=None,
|
|
63
|
+
detection_confidence_threshold=None,
|
|
64
|
+
classification_confidence_threshold=None,
|
|
65
|
+
verbose=True):
|
|
66
|
+
"""
|
|
67
|
+
Generates a .csv report from a MD-formatted .json file
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
md_results_file (str): MD results .json file for which we should generate a report
|
|
71
|
+
output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
|
|
72
|
+
datetime_source (str, optional): if datetime information is required, this should point to
|
|
73
|
+
a folder of images, a MD results .json file (can be the same as the input file), or
|
|
74
|
+
an exif_info.json file created with read_exif().
|
|
75
|
+
folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
|
|
76
|
+
folder in a path name) for which we should create separate columns. Should be zero-indexed ints,
|
|
77
|
+
or a comma-delimited list of zero-indexed int-strings.
|
|
78
|
+
detection_confidence_threshold (float, optional): detections below this confidence threshold will not
|
|
79
|
+
be included in the output data. Defaults to the recommended value based on the .json file.
|
|
80
|
+
classification_confidence_threshold (float, optional): classifications below this confidence threshold will
|
|
81
|
+
not be included in the output data (i.e., detections will be considered "animal").
|
|
82
|
+
verbose (bool, optional): enable debug output, including the progress bar,
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
str: the output .csv filename
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
##%% Load results file
|
|
89
|
+
|
|
90
|
+
results = load_md_or_speciesnet_file(md_results_file)
|
|
91
|
+
|
|
92
|
+
print('Loaded results for {} images'.format(len(results['images'])))
|
|
93
|
+
|
|
94
|
+
detection_category_id_to_name = results['detection_categories']
|
|
95
|
+
classification_category_id_to_name = None
|
|
96
|
+
if 'classification_categories' in results:
|
|
97
|
+
classification_category_id_to_name = results['classification_categories']
|
|
98
|
+
|
|
99
|
+
if output_file is None:
|
|
100
|
+
output_file = md_results_file + '.csv'
|
|
101
|
+
|
|
102
|
+
##%% Read datetime information if necessary
|
|
103
|
+
|
|
104
|
+
filename_to_datetime_string = None
|
|
105
|
+
|
|
106
|
+
if datetime_source is not None:
|
|
107
|
+
|
|
108
|
+
all_exif_results = None
|
|
109
|
+
|
|
110
|
+
if os.path.isdir(datetime_source):
|
|
111
|
+
|
|
112
|
+
# Read EXIF info from images
|
|
113
|
+
read_exif_options = ReadExifOptions()
|
|
114
|
+
read_exif_options.tags_to_include = minimal_exif_tags
|
|
115
|
+
read_exif_options.byte_handling = 'delete'
|
|
116
|
+
exif_cache_file = os.path.join(tempfile.gettempdir(),
|
|
117
|
+
'md-exif-data',
|
|
118
|
+
str(uuid.uuid1())+'.json')
|
|
119
|
+
print('Reading EXIF datetime info from {}, writing to {}'.format(
|
|
120
|
+
datetime_source,exif_cache_file))
|
|
121
|
+
os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
|
|
122
|
+
|
|
123
|
+
all_exif_results = read_exif_from_folder(input_folder=datetime_source,
|
|
124
|
+
output_file=exif_cache_file,
|
|
125
|
+
options=read_exif_options,
|
|
126
|
+
recursive=True)
|
|
127
|
+
|
|
128
|
+
else:
|
|
129
|
+
assert os.path.isfile(datetime_source), \
|
|
130
|
+
'datetime source {} is neither a folder nor a file'.format(datetime_source)
|
|
131
|
+
|
|
132
|
+
# Is this the same file we've already read?
|
|
133
|
+
|
|
134
|
+
# Load this, decide whether it's a MD file or an exif_info file
|
|
135
|
+
with open(datetime_source,'r') as f:
|
|
136
|
+
d = json.load(f)
|
|
137
|
+
|
|
138
|
+
if isinstance(d,list):
|
|
139
|
+
all_exif_results = d
|
|
140
|
+
else:
|
|
141
|
+
assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
|
|
142
|
+
assert 'images' in d,\
|
|
143
|
+
'The datetime source you provided doesn\'t look like a valid source .json file'
|
|
144
|
+
all_exif_results = []
|
|
145
|
+
found_datetime = False
|
|
146
|
+
for im in d['images']:
|
|
147
|
+
exif_result = {'file_name':im['file']}
|
|
148
|
+
if 'datetime' in im:
|
|
149
|
+
found_datetime = True
|
|
150
|
+
exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
|
|
151
|
+
all_exif_results.append(exif_result)
|
|
152
|
+
if not found_datetime:
|
|
153
|
+
print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
|
|
154
|
+
'to contain datetime information.')
|
|
155
|
+
|
|
156
|
+
assert all_exif_results is not None
|
|
157
|
+
|
|
158
|
+
filename_to_datetime_string = {}
|
|
159
|
+
|
|
160
|
+
for exif_result in all_exif_results:
|
|
161
|
+
datetime_string = unknown_datetime_tag
|
|
162
|
+
if ('exif_tags' in exif_result) and \
|
|
163
|
+
(exif_result['exif_tags'] is not None) and \
|
|
164
|
+
('DateTimeOriginal' in exif_result['exif_tags']):
|
|
165
|
+
datetime_string = exif_result['exif_tags']['DateTimeOriginal']
|
|
166
|
+
if datetime_string is None:
|
|
167
|
+
datetime_string = ''
|
|
168
|
+
else:
|
|
169
|
+
assert isinstance(datetime_string,str), 'Unrecognized datetime format'
|
|
170
|
+
filename_to_datetime_string[exif_result['file_name']] = datetime_string
|
|
171
|
+
|
|
172
|
+
image_files = [im['file'] for im in results['images']]
|
|
173
|
+
image_files_set = set(image_files)
|
|
174
|
+
|
|
175
|
+
files_in_exif_but_not_in_results = []
|
|
176
|
+
files_in_results_but_not_in_exif = []
|
|
177
|
+
files_with_no_datetime_info = []
|
|
178
|
+
|
|
179
|
+
for fn in filename_to_datetime_string:
|
|
180
|
+
dts = filename_to_datetime_string[fn]
|
|
181
|
+
if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
|
|
182
|
+
files_with_no_datetime_info.append(fn)
|
|
183
|
+
if fn not in image_files_set:
|
|
184
|
+
files_in_exif_but_not_in_results.append(fn)
|
|
185
|
+
|
|
186
|
+
for fn in image_files_set:
|
|
187
|
+
if fn not in filename_to_datetime_string:
|
|
188
|
+
files_in_results_but_not_in_exif.append(fn)
|
|
189
|
+
|
|
190
|
+
print('{} files (of {}) in EXIF info not found in MD results'.format(
|
|
191
|
+
len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
|
|
192
|
+
))
|
|
193
|
+
|
|
194
|
+
print('{} files (of {}) in MD results not found in MD EXIF info'.format(
|
|
195
|
+
len(files_in_results_but_not_in_exif),len(image_files_set)
|
|
196
|
+
))
|
|
197
|
+
|
|
198
|
+
print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
|
|
199
|
+
len(files_with_no_datetime_info),len(filename_to_datetime_string)
|
|
200
|
+
))
|
|
201
|
+
|
|
202
|
+
# ...if we need to deal with datetimes
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
##%% Parse folder level column specifier
|
|
206
|
+
|
|
207
|
+
if folder_level_columns is not None:
|
|
208
|
+
|
|
209
|
+
if isinstance(folder_level_columns,str):
|
|
210
|
+
tokens = folder_level_columns.split(',')
|
|
211
|
+
folder_level_columns = [int(s) for s in tokens]
|
|
212
|
+
for folder_level in folder_level_columns:
|
|
213
|
+
if (not isinstance(folder_level,int)) or (folder_level < 0):
|
|
214
|
+
raise ValueError('Illegal folder level specifier {}'.format(
|
|
215
|
+
str(folder_level_columns)))
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
##%% Fill in default thresholds
|
|
219
|
+
|
|
220
|
+
if classification_confidence_threshold is None:
|
|
221
|
+
classification_confidence_threshold = default_classification_threshold
|
|
222
|
+
if detection_confidence_threshold is None:
|
|
223
|
+
detection_confidence_threshold = \
|
|
224
|
+
get_typical_confidence_threshold_from_results(results)
|
|
225
|
+
|
|
226
|
+
assert detection_confidence_threshold is not None
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
##%% Fill in output records
|
|
230
|
+
|
|
231
|
+
output_records = []
|
|
232
|
+
|
|
233
|
+
# For each image
|
|
234
|
+
#
|
|
235
|
+
# im = results['images'][0]
|
|
236
|
+
for im in results['images']:
|
|
237
|
+
|
|
238
|
+
"""
|
|
239
|
+
* filename
|
|
240
|
+
* datetime (if images or EXIF information is supplied)
|
|
241
|
+
* detection_category
|
|
242
|
+
* max_detection_confidence
|
|
243
|
+
* classification_category
|
|
244
|
+
* max_classification_confidence
|
|
245
|
+
* count
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
base_record = {}
|
|
249
|
+
|
|
250
|
+
base_record['filename'] = im['file'].replace('\\','/')
|
|
251
|
+
|
|
252
|
+
# Datetime (if necessary)
|
|
253
|
+
if filename_to_datetime_string is not None:
|
|
254
|
+
if im['file'] in filename_to_datetime_string:
|
|
255
|
+
datetime_string = filename_to_datetime_string[im['file']]
|
|
256
|
+
else:
|
|
257
|
+
datetime_string = ''
|
|
258
|
+
base_record['datetime'] = datetime_string
|
|
259
|
+
|
|
260
|
+
for s in ['detection_category','max_detection_confidence',
|
|
261
|
+
'classification_category','max_classification_confidence',
|
|
262
|
+
'count']:
|
|
263
|
+
base_record[s] = ''
|
|
264
|
+
|
|
265
|
+
# Folder level columns
|
|
266
|
+
tokens = im['file'].split('/')
|
|
267
|
+
|
|
268
|
+
if folder_level_columns is not None:
|
|
269
|
+
|
|
270
|
+
for folder_level in folder_level_columns:
|
|
271
|
+
folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
|
|
272
|
+
if folder_level >= len(tokens):
|
|
273
|
+
folder_level_value = ''
|
|
274
|
+
else:
|
|
275
|
+
folder_level_value = tokens[folder_level]
|
|
276
|
+
base_record[folder_level_column_name] = folder_level_value
|
|
277
|
+
|
|
278
|
+
records_this_image = []
|
|
279
|
+
|
|
280
|
+
# Create one output row if this image failed
|
|
281
|
+
if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
|
|
282
|
+
|
|
283
|
+
record = deepcopy(base_record)
|
|
284
|
+
record['detection_category'] = 'error'
|
|
285
|
+
record['classification_category'] = im['failure']
|
|
286
|
+
records_this_image.append(record)
|
|
287
|
+
assert 'detections' not in im or im['detections'] is None
|
|
288
|
+
|
|
289
|
+
else:
|
|
290
|
+
|
|
291
|
+
assert 'detections' in im and im['detections'] is not None
|
|
292
|
+
|
|
293
|
+
# Count above-threshold detections
|
|
294
|
+
detections_above_threshold = []
|
|
295
|
+
for det in im['detections']:
|
|
296
|
+
if det['conf'] >= detection_confidence_threshold:
|
|
297
|
+
detections_above_threshold.append(det)
|
|
298
|
+
max_detection_conf = get_max_conf(im)
|
|
299
|
+
|
|
300
|
+
# Create one output row if this image is empty (i.e., has no
|
|
301
|
+
# above-threshold detections)
|
|
302
|
+
if len(detections_above_threshold) == 0:
|
|
303
|
+
|
|
304
|
+
record = deepcopy(base_record)
|
|
305
|
+
record['detection_category'] = 'empty'
|
|
306
|
+
record['max_detection_confidence'] = max_detection_conf
|
|
307
|
+
records_this_image.append(record)
|
|
308
|
+
|
|
309
|
+
# ...if this image is empty
|
|
310
|
+
|
|
311
|
+
else:
|
|
312
|
+
|
|
313
|
+
# Maps a string of the form:
|
|
314
|
+
#
|
|
315
|
+
# detection_category:classification_category
|
|
316
|
+
#
|
|
317
|
+
# ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
|
|
318
|
+
category_info_string_to_record = {}
|
|
319
|
+
|
|
320
|
+
for det in detections_above_threshold:
|
|
321
|
+
|
|
322
|
+
assert det['conf'] >= detection_confidence_threshold
|
|
323
|
+
|
|
324
|
+
detection_category_name = detection_category_id_to_name[det['category']]
|
|
325
|
+
detection_confidence = det['conf']
|
|
326
|
+
classification_category_name = ''
|
|
327
|
+
classification_confidence = 0.0
|
|
328
|
+
|
|
329
|
+
if ('classifications' in det) and (len(det['classifications']) > 0):
|
|
330
|
+
|
|
331
|
+
# Classifications should always be sorted by confidence. Not
|
|
332
|
+
# technically required, but always true in practice.
|
|
333
|
+
assert is_list_sorted([c[1] for c in det['classifications']]), \
|
|
334
|
+
'This script does not yet support unsorted classifications'
|
|
335
|
+
assert classification_category_id_to_name is not None, \
|
|
336
|
+
'If classifications are present, category mappings should be present'
|
|
337
|
+
|
|
338
|
+
# Only use the first classification
|
|
339
|
+
classification = det['classifications'][0]
|
|
340
|
+
if classification[1] >= classification_confidence_threshold:
|
|
341
|
+
classification_category_name = \
|
|
342
|
+
classification_category_id_to_name[classification[0]]
|
|
343
|
+
classification_confidence = classification[1]
|
|
344
|
+
|
|
345
|
+
# ...if classifications are present
|
|
346
|
+
|
|
347
|
+
# E.g. "animal:rodent", or "vehicle:"
|
|
348
|
+
category_info_string = detection_category_name + ':' + classification_category_name
|
|
349
|
+
|
|
350
|
+
if category_info_string not in category_info_string_to_record:
|
|
351
|
+
category_info_string_to_record[category_info_string] = {
|
|
352
|
+
'max_detection_confidence':0.0,
|
|
353
|
+
'max_classification_confidence':0.0,
|
|
354
|
+
'count':0,
|
|
355
|
+
'detection_category':detection_category_name,
|
|
356
|
+
'classification_category':classification_category_name
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
record = category_info_string_to_record[category_info_string]
|
|
360
|
+
record['count'] += 1
|
|
361
|
+
if detection_confidence > record['max_detection_confidence']:
|
|
362
|
+
record['max_detection_confidence'] = detection_confidence
|
|
363
|
+
if classification_confidence > record['max_classification_confidence']:
|
|
364
|
+
record['max_classification_confidence'] = classification_confidence
|
|
365
|
+
|
|
366
|
+
# ...for each detection
|
|
367
|
+
|
|
368
|
+
for record_in in category_info_string_to_record.values():
|
|
369
|
+
assert record_in['count'] > 0
|
|
370
|
+
record_out = deepcopy(base_record)
|
|
371
|
+
for k in record_in.keys():
|
|
372
|
+
assert k in record_out.keys()
|
|
373
|
+
record_out[k] = record_in[k]
|
|
374
|
+
records_this_image.append(record_out)
|
|
375
|
+
|
|
376
|
+
# ...is this empty/non-empty?
|
|
377
|
+
|
|
378
|
+
# ...if this image failed/didn't fail
|
|
379
|
+
|
|
380
|
+
# Add to [records]
|
|
381
|
+
output_records.extend(records_this_image)
|
|
382
|
+
|
|
383
|
+
# ...for each image
|
|
384
|
+
|
|
385
|
+
# Make sure every record has the same columns
|
|
386
|
+
column_names = output_records[0].keys()
|
|
387
|
+
for record in output_records:
|
|
388
|
+
assert record.keys() == column_names
|
|
389
|
+
|
|
390
|
+
# Write to .csv
|
|
391
|
+
df = pd.DataFrame(output_records)
|
|
392
|
+
df.to_csv(output_file,header=True,index=False)
|
|
393
|
+
|
|
394
|
+
# from megadetector.utils.path_utils import open_file; open_file(output_file)
|
|
395
|
+
|
|
396
|
+
# ...generate_csv_report(...)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# %%
|
|
400
|
+
|
|
401
|
+
#%% Interactive driver
|
|
402
|
+
|
|
403
|
+
if False:
|
|
404
|
+
|
|
405
|
+
pass
|
|
406
|
+
|
|
407
|
+
#%% Configure options
|
|
408
|
+
|
|
409
|
+
r"""
|
|
410
|
+
python run_detector_batch.py MDV5A "g:\temp\md-test-images"
|
|
411
|
+
"g:\temp\md-test-images\md_results_with_datetime.json"
|
|
412
|
+
--recursive --output_relative_filenames --include_image_timestamp --include_exif_data
|
|
413
|
+
"""
|
|
414
|
+
|
|
415
|
+
md_results_file = 'g:/temp/csv-report-test/md-results.json'
|
|
416
|
+
datetime_source = 'g:/temp/csv-report-test/exif_data.json'
|
|
417
|
+
|
|
418
|
+
# datetime_source = 'g:/temp/md-test-images'
|
|
419
|
+
# datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
|
|
420
|
+
# md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
|
|
421
|
+
# md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
|
|
422
|
+
|
|
423
|
+
output_file = None
|
|
424
|
+
folder_level_columns = [0,1,2,3]
|
|
425
|
+
detection_confidence_threshold = None
|
|
426
|
+
classification_confidence_threshold = None
|
|
427
|
+
verbose = True
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
#%% Programmatic execution
|
|
431
|
+
|
|
432
|
+
generate_csv_report(md_results_file=md_results_file,
|
|
433
|
+
output_file=output_file,
|
|
434
|
+
datetime_source=datetime_source,
|
|
435
|
+
folder_level_columns=folder_level_columns,
|
|
436
|
+
detection_confidence_threshold=detection_confidence_threshold,
|
|
437
|
+
classification_confidence_threshold=classification_confidence_threshold,
|
|
438
|
+
verbose=verbose)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
#%% Command-line driver
|
|
442
|
+
|
|
443
|
+
def main(): # noqa
|
|
444
|
+
|
|
445
|
+
parser = argparse.ArgumentParser(
|
|
446
|
+
description='Generates a .csv report from a MD-formatted .json file')
|
|
447
|
+
|
|
448
|
+
parser.add_argument(
|
|
449
|
+
'md_results_file',
|
|
450
|
+
type=str,
|
|
451
|
+
help='Path to MD results file (.json)')
|
|
452
|
+
|
|
453
|
+
parser.add_argument(
|
|
454
|
+
'--output_file',
|
|
455
|
+
type=str,
|
|
456
|
+
help='Output filename (.csv) (if omitted, will append .csv to the input file)')
|
|
457
|
+
|
|
458
|
+
parser.add_argument(
|
|
459
|
+
'--datetime_source',
|
|
460
|
+
type=str,
|
|
461
|
+
default=None,
|
|
462
|
+
help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
parser.add_argument(
|
|
466
|
+
'--folder_level_columns',
|
|
467
|
+
type=str,
|
|
468
|
+
default=None,
|
|
469
|
+
help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
parser.add_argument(
|
|
473
|
+
'--detection_confidence_threshold',
|
|
474
|
+
type=float,
|
|
475
|
+
default=None,
|
|
476
|
+
help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
parser.add_argument(
|
|
480
|
+
'--classification_confidence_threshold',
|
|
481
|
+
type=float,
|
|
482
|
+
default=None,
|
|
483
|
+
help='Classification threshold (default {})'.format(default_classification_threshold)
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
parser.add_argument(
|
|
487
|
+
'--verbose',
|
|
488
|
+
action='store_true',
|
|
489
|
+
help='Enable additional debug output'
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
if len(sys.argv[1:]) == 0:
|
|
494
|
+
parser.print_help()
|
|
495
|
+
parser.exit()
|
|
496
|
+
|
|
497
|
+
args = parser.parse_args()
|
|
498
|
+
|
|
499
|
+
generate_csv_report(md_results_file=args.md_results_file,
|
|
500
|
+
output_file=args.output_file,
|
|
501
|
+
datetime_source=args.datetime_source,
|
|
502
|
+
folder_level_columns=args.folder_level_columns,
|
|
503
|
+
detection_confidence_threshold=args.detection_confidence_threshold,
|
|
504
|
+
classification_confidence_threshold=args.classification_confidence_threshold,
|
|
505
|
+
verbose=args.verbose)
|
|
506
|
+
|
|
507
|
+
if __name__ == '__main__':
|
|
508
|
+
main()
|
|
@@ -4,7 +4,7 @@ load_api_results.py
|
|
|
4
4
|
|
|
5
5
|
DEPRECATED
|
|
6
6
|
|
|
7
|
-
As of 2023.12, this module is used in postprocessing and RDE
|
|
7
|
+
As of 2023.12, this module is still used in postprocessing and RDE, but it's not recommended
|
|
8
8
|
for new code.
|
|
9
9
|
|
|
10
10
|
Loads the output of the batch processing API (json) into a Pandas dataframe.
|
|
@@ -18,11 +18,12 @@ Includes functions to read/write the (very very old) .csv results format.
|
|
|
18
18
|
import json
|
|
19
19
|
import os
|
|
20
20
|
|
|
21
|
-
from typing import
|
|
21
|
+
from typing import Mapping, Optional
|
|
22
22
|
|
|
23
23
|
import pandas as pd
|
|
24
24
|
|
|
25
25
|
from megadetector.utils import ct_utils
|
|
26
|
+
from megadetector.utils.wi_utils import load_md_or_speciesnet_file
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
#%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
|
|
@@ -30,7 +31,7 @@ from megadetector.utils import ct_utils
|
|
|
30
31
|
def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
31
32
|
filename_replacements: Optional[Mapping[str, str]] = None,
|
|
32
33
|
force_forward_slashes: bool = True
|
|
33
|
-
) ->
|
|
34
|
+
) -> tuple[pd.DataFrame, dict]:
|
|
34
35
|
r"""
|
|
35
36
|
Loads json-formatted MegaDetector results to a Pandas DataFrame.
|
|
36
37
|
|
|
@@ -47,11 +48,10 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
47
48
|
detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
|
|
48
49
|
other_fields: a dict containing fields in the results other than 'images'
|
|
49
50
|
"""
|
|
50
|
-
|
|
51
|
+
|
|
51
52
|
print('Loading results from {}'.format(api_output_path))
|
|
52
53
|
|
|
53
|
-
|
|
54
|
-
detection_results = json.load(f)
|
|
54
|
+
detection_results = load_md_or_speciesnet_file(api_output_path)
|
|
55
55
|
|
|
56
56
|
# Validate that this is really a detector output file
|
|
57
57
|
for s in ['info', 'detection_categories', 'images']:
|
|
@@ -65,12 +65,12 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
65
65
|
|
|
66
66
|
if normalize_paths:
|
|
67
67
|
for image in detection_results['images']:
|
|
68
|
-
image['file'] = os.path.normpath(image['file'])
|
|
68
|
+
image['file'] = os.path.normpath(image['file'])
|
|
69
69
|
|
|
70
70
|
if force_forward_slashes:
|
|
71
71
|
for image in detection_results['images']:
|
|
72
72
|
image['file'] = image['file'].replace('\\','/')
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
# Replace some path tokens to match local paths to original blob structure
|
|
75
75
|
if filename_replacements is not None:
|
|
76
76
|
for string_to_replace in filename_replacements.keys():
|
|
@@ -79,16 +79,16 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
79
79
|
im['file'] = im['file'].replace(string_to_replace,replacement_string)
|
|
80
80
|
|
|
81
81
|
print('Converting results to dataframe')
|
|
82
|
-
|
|
82
|
+
|
|
83
83
|
# If this is a newer file that doesn't include maximum detection confidence values,
|
|
84
84
|
# add them, because our unofficial internal dataframe format includes this.
|
|
85
85
|
for im in detection_results['images']:
|
|
86
86
|
if 'max_detection_conf' not in im:
|
|
87
87
|
im['max_detection_conf'] = ct_utils.get_max_conf(im)
|
|
88
|
-
|
|
88
|
+
|
|
89
89
|
# Pack the json output into a Pandas DataFrame
|
|
90
90
|
detection_results = pd.DataFrame(detection_results['images'])
|
|
91
|
-
|
|
91
|
+
|
|
92
92
|
print('Finished loading MegaDetector results for {} images from {}'.format(
|
|
93
93
|
len(detection_results),api_output_path))
|
|
94
94
|
|
|
@@ -111,7 +111,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
|
|
|
111
111
|
if 'failure' in im and im['failure'] is None:
|
|
112
112
|
del im['failure']
|
|
113
113
|
fields['images'] = images
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
# Convert the 'version' field back to a string as per format convention
|
|
116
116
|
try:
|
|
117
117
|
version = other_fields['info']['format_version']
|
|
@@ -120,7 +120,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
|
|
|
120
120
|
except Exception:
|
|
121
121
|
print('Warning: error determining format version')
|
|
122
122
|
pass
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
# Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
|
|
125
125
|
try:
|
|
126
126
|
version = other_fields['info']['format_version']
|
|
@@ -132,20 +132,23 @@ def write_api_results(detection_results_table, other_fields, out_path):
|
|
|
132
132
|
except Exception:
|
|
133
133
|
print('Warning: error removing max_detection_conf from output')
|
|
134
134
|
pass
|
|
135
|
-
|
|
135
|
+
|
|
136
136
|
with open(out_path, 'w') as f:
|
|
137
137
|
json.dump(fields, f, indent=1)
|
|
138
138
|
|
|
139
139
|
print('Finished writing detection results to {}'.format(out_path))
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
def load_api_results_csv(filename, normalize_paths=True, filename_replacements=
|
|
142
|
+
def load_api_results_csv(filename, normalize_paths=True, filename_replacements=None, nrows=None):
|
|
143
143
|
"""
|
|
144
144
|
[DEPRECATED]
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
Loads .csv-formatted MegaDetector results to a pandas table
|
|
147
147
|
"""
|
|
148
148
|
|
|
149
|
+
if filename_replacements is None:
|
|
150
|
+
filename_replacements = {}
|
|
151
|
+
|
|
149
152
|
print('Loading MegaDetector results from {}'.format(filename))
|
|
150
153
|
|
|
151
154
|
detection_results = pd.read_csv(filename,nrows=nrows)
|
|
@@ -169,12 +172,12 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
|
|
|
169
172
|
|
|
170
173
|
replacement_string = filename_replacements[string_to_replace]
|
|
171
174
|
|
|
172
|
-
#
|
|
173
|
-
for
|
|
174
|
-
row = detection_results.iloc[
|
|
175
|
+
# i_row = 0
|
|
176
|
+
for i_row in range(0,len(detection_results)):
|
|
177
|
+
row = detection_results.iloc[i_row]
|
|
175
178
|
fn = row['image_path']
|
|
176
179
|
fn = fn.replace(string_to_replace,replacement_string)
|
|
177
|
-
detection_results.at[
|
|
180
|
+
detection_results.at[i_row,'image_path'] = fn
|
|
178
181
|
|
|
179
182
|
print('Finished loading and de-serializing MD results for {} images from {}'.format(
|
|
180
183
|
len(detection_results),filename))
|
|
@@ -183,9 +186,9 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
|
|
|
183
186
|
|
|
184
187
|
|
|
185
188
|
def write_api_results_csv(detection_results, filename):
|
|
186
|
-
"""
|
|
189
|
+
"""
|
|
187
190
|
[DEPRECATED]
|
|
188
|
-
|
|
191
|
+
|
|
189
192
|
Writes a Pandas table to csv in a way that's compatible with the .csv output
|
|
190
193
|
format. Currently just a wrapper around to_csv that forces output writing
|
|
191
194
|
to go through a common code path.
|