megadetector 10.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megadetector/__init__.py +0 -0
- megadetector/api/__init__.py +0 -0
- megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
- megadetector/classification/__init__.py +0 -0
- megadetector/classification/aggregate_classifier_probs.py +108 -0
- megadetector/classification/analyze_failed_images.py +227 -0
- megadetector/classification/cache_batchapi_outputs.py +198 -0
- megadetector/classification/create_classification_dataset.py +626 -0
- megadetector/classification/crop_detections.py +516 -0
- megadetector/classification/csv_to_json.py +226 -0
- megadetector/classification/detect_and_crop.py +853 -0
- megadetector/classification/efficientnet/__init__.py +9 -0
- megadetector/classification/efficientnet/model.py +415 -0
- megadetector/classification/efficientnet/utils.py +608 -0
- megadetector/classification/evaluate_model.py +520 -0
- megadetector/classification/identify_mislabeled_candidates.py +152 -0
- megadetector/classification/json_to_azcopy_list.py +63 -0
- megadetector/classification/json_validator.py +696 -0
- megadetector/classification/map_classification_categories.py +276 -0
- megadetector/classification/merge_classification_detection_output.py +509 -0
- megadetector/classification/prepare_classification_script.py +194 -0
- megadetector/classification/prepare_classification_script_mc.py +228 -0
- megadetector/classification/run_classifier.py +287 -0
- megadetector/classification/save_mislabeled.py +110 -0
- megadetector/classification/train_classifier.py +827 -0
- megadetector/classification/train_classifier_tf.py +725 -0
- megadetector/classification/train_utils.py +323 -0
- megadetector/data_management/__init__.py +0 -0
- megadetector/data_management/animl_to_md.py +161 -0
- megadetector/data_management/annotations/__init__.py +0 -0
- megadetector/data_management/annotations/annotation_constants.py +33 -0
- megadetector/data_management/camtrap_dp_to_coco.py +270 -0
- megadetector/data_management/cct_json_utils.py +566 -0
- megadetector/data_management/cct_to_md.py +184 -0
- megadetector/data_management/cct_to_wi.py +293 -0
- megadetector/data_management/coco_to_labelme.py +284 -0
- megadetector/data_management/coco_to_yolo.py +701 -0
- megadetector/data_management/databases/__init__.py +0 -0
- megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
- megadetector/data_management/databases/integrity_check_json_db.py +563 -0
- megadetector/data_management/databases/subset_json_db.py +195 -0
- megadetector/data_management/generate_crops_from_cct.py +200 -0
- megadetector/data_management/get_image_sizes.py +164 -0
- megadetector/data_management/labelme_to_coco.py +559 -0
- megadetector/data_management/labelme_to_yolo.py +349 -0
- megadetector/data_management/lila/__init__.py +0 -0
- megadetector/data_management/lila/create_lila_blank_set.py +556 -0
- megadetector/data_management/lila/create_lila_test_set.py +192 -0
- megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
- megadetector/data_management/lila/download_lila_subset.py +182 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
- megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
- megadetector/data_management/lila/get_lila_image_counts.py +112 -0
- megadetector/data_management/lila/lila_common.py +319 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
- megadetector/data_management/mewc_to_md.py +344 -0
- megadetector/data_management/ocr_tools.py +873 -0
- megadetector/data_management/read_exif.py +964 -0
- megadetector/data_management/remap_coco_categories.py +195 -0
- megadetector/data_management/remove_exif.py +156 -0
- megadetector/data_management/rename_images.py +194 -0
- megadetector/data_management/resize_coco_dataset.py +665 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/wi_download_csv_to_coco.py +247 -0
- megadetector/data_management/yolo_output_to_md_output.py +594 -0
- megadetector/data_management/yolo_to_coco.py +984 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/__init__.py +0 -0
- megadetector/detection/change_detection.py +840 -0
- megadetector/detection/process_video.py +479 -0
- megadetector/detection/pytorch_detector.py +1451 -0
- megadetector/detection/run_detector.py +1267 -0
- megadetector/detection/run_detector_batch.py +2172 -0
- megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
- megadetector/detection/run_md_and_speciesnet.py +1604 -0
- megadetector/detection/run_tiled_inference.py +1044 -0
- megadetector/detection/tf_detector.py +209 -0
- megadetector/detection/video_utils.py +1379 -0
- megadetector/postprocessing/__init__.py +0 -0
- megadetector/postprocessing/add_max_conf.py +72 -0
- megadetector/postprocessing/categorize_detections_by_size.py +166 -0
- megadetector/postprocessing/classification_postprocessing.py +1943 -0
- megadetector/postprocessing/combine_batch_outputs.py +249 -0
- megadetector/postprocessing/compare_batch_results.py +2110 -0
- megadetector/postprocessing/convert_output_format.py +403 -0
- megadetector/postprocessing/create_crop_folder.py +629 -0
- megadetector/postprocessing/detector_calibration.py +570 -0
- megadetector/postprocessing/generate_csv_report.py +522 -0
- megadetector/postprocessing/load_api_results.py +223 -0
- megadetector/postprocessing/md_to_coco.py +428 -0
- megadetector/postprocessing/md_to_labelme.py +351 -0
- megadetector/postprocessing/md_to_wi.py +41 -0
- megadetector/postprocessing/merge_detections.py +392 -0
- megadetector/postprocessing/postprocess_batch_results.py +2140 -0
- megadetector/postprocessing/remap_detection_categories.py +226 -0
- megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
- megadetector/postprocessing/separate_detections_into_folders.py +795 -0
- megadetector/postprocessing/subset_json_detector_output.py +964 -0
- megadetector/postprocessing/top_folders_to_bottom.py +238 -0
- megadetector/postprocessing/validate_batch_results.py +332 -0
- megadetector/taxonomy_mapping/__init__.py +0 -0
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
- megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
- megadetector/taxonomy_mapping/simple_image_download.py +231 -0
- megadetector/taxonomy_mapping/species_lookup.py +1008 -0
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
- megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/__init__.py +0 -0
- megadetector/utils/ct_utils.py +1857 -0
- megadetector/utils/directory_listing.py +199 -0
- megadetector/utils/extract_frames_from_video.py +307 -0
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +2072 -0
- megadetector/utils/path_utils.py +2872 -0
- megadetector/utils/process_utils.py +172 -0
- megadetector/utils/split_locations_into_train_val.py +237 -0
- megadetector/utils/string_utils.py +234 -0
- megadetector/utils/url_utils.py +825 -0
- megadetector/utils/wi_platform_utils.py +968 -0
- megadetector/utils/wi_taxonomy_utils.py +1766 -0
- megadetector/utils/write_html_image_list.py +239 -0
- megadetector/visualization/__init__.py +0 -0
- megadetector/visualization/plot_utils.py +309 -0
- megadetector/visualization/render_images_with_thumbnails.py +243 -0
- megadetector/visualization/visualization_utils.py +1973 -0
- megadetector/visualization/visualize_db.py +630 -0
- megadetector/visualization/visualize_detector_output.py +498 -0
- megadetector/visualization/visualize_video_output.py +705 -0
- megadetector-10.0.15.dist-info/METADATA +115 -0
- megadetector-10.0.15.dist-info/RECORD +147 -0
- megadetector-10.0.15.dist-info/WHEEL +5 -0
- megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
- megadetector-10.0.15.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,522 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
generate_csv_report.py
|
|
4
|
+
|
|
5
|
+
Generates a .csv report from a MD-formatted .json file with the following columns:
|
|
6
|
+
|
|
7
|
+
* filename
|
|
8
|
+
* datetime (if images or EXIF information is supplied)
|
|
9
|
+
* detection_category
|
|
10
|
+
* max_detection_confidence
|
|
11
|
+
* classification_category
|
|
12
|
+
* max_classification_confidence
|
|
13
|
+
* count
|
|
14
|
+
|
|
15
|
+
One row is generated per category pair per image. For example, these would be unique rows:
|
|
16
|
+
|
|
17
|
+
image0001.jpg,animal,deer,4
|
|
18
|
+
image0001.jpg,animal,lion,4
|
|
19
|
+
image0001.jpg,animal,[none],4
|
|
20
|
+
image0001.jpg,person,[none],2
|
|
21
|
+
|
|
22
|
+
Images with no above-threshold detections will have a single row:
|
|
23
|
+
|
|
24
|
+
image0001.jpg,empty,[none],-1
|
|
25
|
+
|
|
26
|
+
Images with processing errors will have a single row:
|
|
27
|
+
|
|
28
|
+
image0001.jpg,error,error_string,-1
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
#%% Constants and imports
|
|
33
|
+
|
|
34
|
+
import os
|
|
35
|
+
import json
|
|
36
|
+
import tempfile
|
|
37
|
+
import sys
|
|
38
|
+
import argparse
|
|
39
|
+
import uuid
|
|
40
|
+
|
|
41
|
+
import pandas as pd
|
|
42
|
+
|
|
43
|
+
from copy import deepcopy
|
|
44
|
+
|
|
45
|
+
from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
|
|
46
|
+
from megadetector.utils.ct_utils import get_max_conf
|
|
47
|
+
from megadetector.utils.ct_utils import is_list_sorted
|
|
48
|
+
from megadetector.detection.run_detector import \
|
|
49
|
+
get_typical_confidence_threshold_from_results
|
|
50
|
+
from megadetector.data_management.read_exif import \
|
|
51
|
+
read_exif_from_folder, ReadExifOptions, minimal_exif_tags
|
|
52
|
+
|
|
53
|
+
default_classification_threshold = 0.3
|
|
54
|
+
unknown_datetime_tag = ''
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
#%% Functions
|
|
58
|
+
|
|
59
|
+
def generate_csv_report(md_results_file,
|
|
60
|
+
output_file=None,
|
|
61
|
+
datetime_source=None,
|
|
62
|
+
folder_level_columns=None,
|
|
63
|
+
detection_confidence_threshold=None,
|
|
64
|
+
classification_confidence_threshold=None,
|
|
65
|
+
verbose=True):
|
|
66
|
+
"""
|
|
67
|
+
Generates a .csv report from a MD-formatted .json file
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
md_results_file (str): MD results .json file for which we should generate a report
|
|
71
|
+
output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
|
|
72
|
+
datetime_source (str, optional): if datetime information is required, this should point to
|
|
73
|
+
a folder of images, a MD results .json file (can be the same as the input file), or
|
|
74
|
+
an exif_info.json file created with read_exif().
|
|
75
|
+
folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
|
|
76
|
+
folder in a path name) for which we should create separate columns. Should be zero-indexed ints,
|
|
77
|
+
or a comma-delimited list of zero-indexed int-strings.
|
|
78
|
+
detection_confidence_threshold (float, optional): detections below this confidence threshold will not
|
|
79
|
+
be included in the output data. Defaults to the recommended value based on the .json file.
|
|
80
|
+
classification_confidence_threshold (float, optional): classifications below this confidence threshold will
|
|
81
|
+
not be included in the output data (i.e., detections will be considered "animal").
|
|
82
|
+
verbose (bool, optional): enable debug output, including the progress bar,
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
str: the output .csv filename
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
##%% Load results file
|
|
89
|
+
|
|
90
|
+
results = load_md_or_speciesnet_file(md_results_file)
|
|
91
|
+
|
|
92
|
+
print('Loaded results for {} images'.format(len(results['images'])))
|
|
93
|
+
|
|
94
|
+
detection_category_id_to_name = results['detection_categories']
|
|
95
|
+
classification_category_id_to_name = None
|
|
96
|
+
if 'classification_categories' in results:
|
|
97
|
+
classification_category_id_to_name = results['classification_categories']
|
|
98
|
+
|
|
99
|
+
if output_file is None:
|
|
100
|
+
output_file = md_results_file + '.csv'
|
|
101
|
+
|
|
102
|
+
##%% Read datetime information if necessary
|
|
103
|
+
|
|
104
|
+
filename_to_datetime_string = None
|
|
105
|
+
|
|
106
|
+
if datetime_source is not None:
|
|
107
|
+
|
|
108
|
+
all_exif_results = None
|
|
109
|
+
|
|
110
|
+
if os.path.isdir(datetime_source):
|
|
111
|
+
|
|
112
|
+
# Read EXIF info from images
|
|
113
|
+
read_exif_options = ReadExifOptions()
|
|
114
|
+
read_exif_options.tags_to_include = minimal_exif_tags
|
|
115
|
+
read_exif_options.byte_handling = 'delete'
|
|
116
|
+
exif_cache_file = os.path.join(tempfile.gettempdir(),
|
|
117
|
+
'md-exif-data',
|
|
118
|
+
str(uuid.uuid1())+'.json')
|
|
119
|
+
print('Reading EXIF datetime info from {}, writing to {}'.format(
|
|
120
|
+
datetime_source,exif_cache_file))
|
|
121
|
+
os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
|
|
122
|
+
|
|
123
|
+
all_exif_results = read_exif_from_folder(input_folder=datetime_source,
|
|
124
|
+
output_file=exif_cache_file,
|
|
125
|
+
options=read_exif_options,
|
|
126
|
+
recursive=True)
|
|
127
|
+
|
|
128
|
+
else:
|
|
129
|
+
|
|
130
|
+
assert os.path.isfile(datetime_source), \
|
|
131
|
+
'datetime source {} is neither a folder nor a file'.format(datetime_source)
|
|
132
|
+
|
|
133
|
+
# Is this the same file we've already read?
|
|
134
|
+
|
|
135
|
+
# Load this, decide whether it's a MD file or an exif_info file
|
|
136
|
+
with open(datetime_source,'r') as f:
|
|
137
|
+
d = json.load(f)
|
|
138
|
+
|
|
139
|
+
if isinstance(d,list):
|
|
140
|
+
all_exif_results = d
|
|
141
|
+
else:
|
|
142
|
+
assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
|
|
143
|
+
assert 'images' in d,\
|
|
144
|
+
'The datetime source you provided doesn\'t look like a valid source .json file'
|
|
145
|
+
all_exif_results = []
|
|
146
|
+
found_datetime = False
|
|
147
|
+
for im in d['images']:
|
|
148
|
+
exif_result = {'file_name':im['file']}
|
|
149
|
+
if 'datetime' in im:
|
|
150
|
+
found_datetime = True
|
|
151
|
+
exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
|
|
152
|
+
all_exif_results.append(exif_result)
|
|
153
|
+
if not found_datetime:
|
|
154
|
+
print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
|
|
155
|
+
'to contain datetime information.')
|
|
156
|
+
|
|
157
|
+
# ...if datetime_source is a folder/file
|
|
158
|
+
|
|
159
|
+
assert all_exif_results is not None
|
|
160
|
+
|
|
161
|
+
filename_to_datetime_string = {}
|
|
162
|
+
|
|
163
|
+
for exif_result in all_exif_results:
|
|
164
|
+
|
|
165
|
+
datetime_string = unknown_datetime_tag
|
|
166
|
+
if ('exif_tags' in exif_result) and \
|
|
167
|
+
(exif_result['exif_tags'] is not None) and \
|
|
168
|
+
('DateTimeOriginal' in exif_result['exif_tags']):
|
|
169
|
+
datetime_string = exif_result['exif_tags']['DateTimeOriginal']
|
|
170
|
+
if datetime_string is None:
|
|
171
|
+
datetime_string = ''
|
|
172
|
+
else:
|
|
173
|
+
assert isinstance(datetime_string,str), 'Unrecognized datetime format'
|
|
174
|
+
filename_to_datetime_string[exif_result['file_name']] = datetime_string
|
|
175
|
+
|
|
176
|
+
# ...for each exif result
|
|
177
|
+
|
|
178
|
+
image_files = [im['file'] for im in results['images']]
|
|
179
|
+
image_files_set = set(image_files)
|
|
180
|
+
|
|
181
|
+
files_in_exif_but_not_in_results = []
|
|
182
|
+
files_in_results_but_not_in_exif = []
|
|
183
|
+
files_with_no_datetime_info = []
|
|
184
|
+
|
|
185
|
+
for fn in filename_to_datetime_string:
|
|
186
|
+
dts = filename_to_datetime_string[fn]
|
|
187
|
+
if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
|
|
188
|
+
files_with_no_datetime_info.append(fn)
|
|
189
|
+
if fn not in image_files_set:
|
|
190
|
+
files_in_exif_but_not_in_results.append(fn)
|
|
191
|
+
|
|
192
|
+
for fn in image_files_set:
|
|
193
|
+
if fn not in filename_to_datetime_string:
|
|
194
|
+
files_in_results_but_not_in_exif.append(fn)
|
|
195
|
+
|
|
196
|
+
print('{} files (of {}) in EXIF info not found in MD results'.format(
|
|
197
|
+
len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
|
|
198
|
+
))
|
|
199
|
+
|
|
200
|
+
print('{} files (of {}) in MD results not found in MD EXIF info'.format(
|
|
201
|
+
len(files_in_results_but_not_in_exif),len(image_files_set)
|
|
202
|
+
))
|
|
203
|
+
|
|
204
|
+
print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
|
|
205
|
+
len(files_with_no_datetime_info),len(filename_to_datetime_string)
|
|
206
|
+
))
|
|
207
|
+
|
|
208
|
+
# ...if we need to deal with datetimes
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
##%% Parse folder level column specifier
|
|
212
|
+
|
|
213
|
+
if folder_level_columns is not None:
|
|
214
|
+
|
|
215
|
+
if isinstance(folder_level_columns,str):
|
|
216
|
+
tokens = folder_level_columns.split(',')
|
|
217
|
+
folder_level_columns = [int(s) for s in tokens]
|
|
218
|
+
for folder_level in folder_level_columns:
|
|
219
|
+
if (not isinstance(folder_level,int)) or (folder_level < 0):
|
|
220
|
+
raise ValueError('Illegal folder level specifier {}'.format(
|
|
221
|
+
str(folder_level_columns)))
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
##%% Fill in default thresholds
|
|
225
|
+
|
|
226
|
+
if classification_confidence_threshold is None:
|
|
227
|
+
classification_confidence_threshold = default_classification_threshold
|
|
228
|
+
if detection_confidence_threshold is None:
|
|
229
|
+
detection_confidence_threshold = \
|
|
230
|
+
get_typical_confidence_threshold_from_results(results)
|
|
231
|
+
|
|
232
|
+
assert detection_confidence_threshold is not None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
##%% Fill in output records
|
|
236
|
+
|
|
237
|
+
output_records = []
|
|
238
|
+
|
|
239
|
+
# For each image
|
|
240
|
+
#
|
|
241
|
+
# im = results['images'][0]
|
|
242
|
+
for im in results['images']:
|
|
243
|
+
|
|
244
|
+
"""
|
|
245
|
+
* filename
|
|
246
|
+
* datetime (if images or EXIF information is supplied)
|
|
247
|
+
* detection_category
|
|
248
|
+
* max_detection_confidence
|
|
249
|
+
* classification_category
|
|
250
|
+
* max_classification_confidence
|
|
251
|
+
* count
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
base_record = {}
|
|
255
|
+
|
|
256
|
+
base_record['filename'] = im['file'].replace('\\','/')
|
|
257
|
+
|
|
258
|
+
# Datetime (if necessary)
|
|
259
|
+
datetime_string = ''
|
|
260
|
+
if filename_to_datetime_string is not None:
|
|
261
|
+
if im['file'] in filename_to_datetime_string:
|
|
262
|
+
datetime_string = filename_to_datetime_string[im['file']]
|
|
263
|
+
base_record['datetime'] = datetime_string
|
|
264
|
+
|
|
265
|
+
for s in ['detection_category','max_detection_confidence',
|
|
266
|
+
'classification_category','max_classification_confidence',
|
|
267
|
+
'count']:
|
|
268
|
+
base_record[s] = ''
|
|
269
|
+
|
|
270
|
+
# Folder level columns
|
|
271
|
+
tokens = im['file'].split('/')
|
|
272
|
+
|
|
273
|
+
if folder_level_columns is not None:
|
|
274
|
+
|
|
275
|
+
for folder_level in folder_level_columns:
|
|
276
|
+
folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
|
|
277
|
+
if folder_level >= len(tokens):
|
|
278
|
+
folder_level_value = ''
|
|
279
|
+
else:
|
|
280
|
+
folder_level_value = tokens[folder_level]
|
|
281
|
+
base_record[folder_level_column_name] = folder_level_value
|
|
282
|
+
|
|
283
|
+
records_this_image = []
|
|
284
|
+
|
|
285
|
+
# Create one output row if this image failed
|
|
286
|
+
if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
|
|
287
|
+
|
|
288
|
+
record = deepcopy(base_record)
|
|
289
|
+
record['detection_category'] = 'error'
|
|
290
|
+
record['classification_category'] = im['failure']
|
|
291
|
+
records_this_image.append(record)
|
|
292
|
+
assert 'detections' not in im or im['detections'] is None
|
|
293
|
+
|
|
294
|
+
else:
|
|
295
|
+
|
|
296
|
+
assert 'detections' in im and im['detections'] is not None
|
|
297
|
+
|
|
298
|
+
# Count above-threshold detections
|
|
299
|
+
detections_above_threshold = []
|
|
300
|
+
for det in im['detections']:
|
|
301
|
+
if det['conf'] >= detection_confidence_threshold:
|
|
302
|
+
detections_above_threshold.append(det)
|
|
303
|
+
max_detection_conf = get_max_conf(im)
|
|
304
|
+
|
|
305
|
+
# Create one output row if this image is empty (i.e., has no
|
|
306
|
+
# above-threshold detections)
|
|
307
|
+
if len(detections_above_threshold) == 0:
|
|
308
|
+
|
|
309
|
+
record = deepcopy(base_record)
|
|
310
|
+
record['detection_category'] = 'empty'
|
|
311
|
+
record['max_detection_confidence'] = max_detection_conf
|
|
312
|
+
records_this_image.append(record)
|
|
313
|
+
|
|
314
|
+
# ...if this image is empty
|
|
315
|
+
|
|
316
|
+
else:
|
|
317
|
+
|
|
318
|
+
# Maps a string of the form:
|
|
319
|
+
#
|
|
320
|
+
# detection_category:classification_category
|
|
321
|
+
#
|
|
322
|
+
# ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
|
|
323
|
+
category_info_string_to_record = {}
|
|
324
|
+
|
|
325
|
+
for det in detections_above_threshold:
|
|
326
|
+
|
|
327
|
+
assert det['conf'] >= detection_confidence_threshold
|
|
328
|
+
|
|
329
|
+
detection_category_name = detection_category_id_to_name[det['category']]
|
|
330
|
+
detection_confidence = det['conf']
|
|
331
|
+
classification_category_name = ''
|
|
332
|
+
classification_confidence = 0.0
|
|
333
|
+
|
|
334
|
+
if ('classifications' in det) and (len(det['classifications']) > 0):
|
|
335
|
+
|
|
336
|
+
# Classifications should always be sorted by confidence. Not
|
|
337
|
+
# technically required, but always true in practice.
|
|
338
|
+
assert is_list_sorted([c[1] for c in det['classifications']]), \
|
|
339
|
+
'This script does not yet support unsorted classifications'
|
|
340
|
+
assert classification_category_id_to_name is not None, \
|
|
341
|
+
'If classifications are present, category mappings should be present'
|
|
342
|
+
|
|
343
|
+
# Only use the first classification
|
|
344
|
+
classification = det['classifications'][0]
|
|
345
|
+
if classification[1] >= classification_confidence_threshold:
|
|
346
|
+
classification_category_name = \
|
|
347
|
+
classification_category_id_to_name[classification[0]]
|
|
348
|
+
classification_confidence = classification[1]
|
|
349
|
+
|
|
350
|
+
# ...if classifications are present
|
|
351
|
+
|
|
352
|
+
# E.g. "animal:rodent", or "vehicle:"
|
|
353
|
+
category_info_string = detection_category_name + ':' + classification_category_name
|
|
354
|
+
|
|
355
|
+
if category_info_string not in category_info_string_to_record:
|
|
356
|
+
category_info_string_to_record[category_info_string] = {
|
|
357
|
+
'max_detection_confidence':0.0,
|
|
358
|
+
'max_classification_confidence':0.0,
|
|
359
|
+
'count':0,
|
|
360
|
+
'detection_category':detection_category_name,
|
|
361
|
+
'classification_category':classification_category_name
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
record = category_info_string_to_record[category_info_string]
|
|
365
|
+
record['count'] += 1
|
|
366
|
+
if detection_confidence > record['max_detection_confidence']:
|
|
367
|
+
record['max_detection_confidence'] = detection_confidence
|
|
368
|
+
if classification_confidence > record['max_classification_confidence']:
|
|
369
|
+
record['max_classification_confidence'] = classification_confidence
|
|
370
|
+
|
|
371
|
+
# ...for each detection
|
|
372
|
+
|
|
373
|
+
for record_in in category_info_string_to_record.values():
|
|
374
|
+
assert record_in['count'] > 0
|
|
375
|
+
record_out = deepcopy(base_record)
|
|
376
|
+
for k in record_in.keys():
|
|
377
|
+
assert k in record_out.keys()
|
|
378
|
+
record_out[k] = record_in[k]
|
|
379
|
+
records_this_image.append(record_out)
|
|
380
|
+
|
|
381
|
+
# ...is this empty/non-empty?
|
|
382
|
+
|
|
383
|
+
# ...if this image failed/didn't fail
|
|
384
|
+
|
|
385
|
+
# Add to [records]
|
|
386
|
+
output_records.extend(records_this_image)
|
|
387
|
+
|
|
388
|
+
# ...for each image
|
|
389
|
+
|
|
390
|
+
# Make sure every record has the same columns
|
|
391
|
+
|
|
392
|
+
if len(output_records) == 0:
|
|
393
|
+
print('Warning: no output records generated')
|
|
394
|
+
else:
|
|
395
|
+
column_names = output_records[0].keys()
|
|
396
|
+
for record in output_records:
|
|
397
|
+
assert record.keys() == column_names
|
|
398
|
+
|
|
399
|
+
# Create folder for output file if necessary
|
|
400
|
+
output_dir = os.path.dirname(output_file)
|
|
401
|
+
if len(output_dir) > 0:
|
|
402
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
403
|
+
|
|
404
|
+
# Write to .csv
|
|
405
|
+
df = pd.DataFrame(output_records)
|
|
406
|
+
df.to_csv(output_file,header=True,index=False)
|
|
407
|
+
|
|
408
|
+
# from megadetector.utils.path_utils import open_file; open_file(output_file)
|
|
409
|
+
|
|
410
|
+
# ...generate_csv_report(...)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
# %%
|
|
414
|
+
|
|
415
|
+
#%% Interactive driver
|
|
416
|
+
|
|
417
|
+
if False:
|
|
418
|
+
|
|
419
|
+
pass
|
|
420
|
+
|
|
421
|
+
#%% Configure options
|
|
422
|
+
|
|
423
|
+
r"""
|
|
424
|
+
python run_detector_batch.py MDV5A "g:\temp\md-test-images"
|
|
425
|
+
"g:\temp\md-test-images\md_results_with_datetime.json"
|
|
426
|
+
--recursive --output_relative_filenames --include_image_timestamp --include_exif_data
|
|
427
|
+
"""
|
|
428
|
+
|
|
429
|
+
md_results_file = 'g:/temp/csv-report-test/md-results.json'
|
|
430
|
+
datetime_source = 'g:/temp/csv-report-test/exif_data.json'
|
|
431
|
+
|
|
432
|
+
# datetime_source = 'g:/temp/md-test-images'
|
|
433
|
+
# datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
|
|
434
|
+
# md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
|
|
435
|
+
# md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
|
|
436
|
+
|
|
437
|
+
output_file = None
|
|
438
|
+
folder_level_columns = [0,1,2,3]
|
|
439
|
+
detection_confidence_threshold = None
|
|
440
|
+
classification_confidence_threshold = None
|
|
441
|
+
verbose = True
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
#%% Programmatic execution
|
|
445
|
+
|
|
446
|
+
generate_csv_report(md_results_file=md_results_file,
|
|
447
|
+
output_file=output_file,
|
|
448
|
+
datetime_source=datetime_source,
|
|
449
|
+
folder_level_columns=folder_level_columns,
|
|
450
|
+
detection_confidence_threshold=detection_confidence_threshold,
|
|
451
|
+
classification_confidence_threshold=classification_confidence_threshold,
|
|
452
|
+
verbose=verbose)
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
#%% Command-line driver
|
|
456
|
+
|
|
457
|
+
def main(): # noqa
|
|
458
|
+
|
|
459
|
+
parser = argparse.ArgumentParser(
|
|
460
|
+
description='Generates a .csv report from a MD-formatted .json file')
|
|
461
|
+
|
|
462
|
+
parser.add_argument(
|
|
463
|
+
'md_results_file',
|
|
464
|
+
type=str,
|
|
465
|
+
help='Path to MD results file (.json)')
|
|
466
|
+
|
|
467
|
+
parser.add_argument(
|
|
468
|
+
'--output_file',
|
|
469
|
+
type=str,
|
|
470
|
+
help='Output filename (.csv) (if omitted, will append .csv to the input file)')
|
|
471
|
+
|
|
472
|
+
parser.add_argument(
|
|
473
|
+
'--datetime_source',
|
|
474
|
+
type=str,
|
|
475
|
+
default=None,
|
|
476
|
+
help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
parser.add_argument(
|
|
480
|
+
'--folder_level_columns',
|
|
481
|
+
type=str,
|
|
482
|
+
default=None,
|
|
483
|
+
help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
parser.add_argument(
|
|
487
|
+
'--detection_confidence_threshold',
|
|
488
|
+
type=float,
|
|
489
|
+
default=None,
|
|
490
|
+
help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
parser.add_argument(
|
|
494
|
+
'--classification_confidence_threshold',
|
|
495
|
+
type=float,
|
|
496
|
+
default=None,
|
|
497
|
+
help='Classification threshold (default {})'.format(default_classification_threshold)
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
parser.add_argument(
|
|
501
|
+
'--verbose',
|
|
502
|
+
action='store_true',
|
|
503
|
+
help='Enable additional debug output'
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
if len(sys.argv[1:]) == 0:
|
|
508
|
+
parser.print_help()
|
|
509
|
+
parser.exit()
|
|
510
|
+
|
|
511
|
+
args = parser.parse_args()
|
|
512
|
+
|
|
513
|
+
generate_csv_report(md_results_file=args.md_results_file,
|
|
514
|
+
output_file=args.output_file,
|
|
515
|
+
datetime_source=args.datetime_source,
|
|
516
|
+
folder_level_columns=args.folder_level_columns,
|
|
517
|
+
detection_confidence_threshold=args.detection_confidence_threshold,
|
|
518
|
+
classification_confidence_threshold=args.classification_confidence_threshold,
|
|
519
|
+
verbose=args.verbose)
|
|
520
|
+
|
|
521
|
+
if __name__ == '__main__':
|
|
522
|
+
main()
|