megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,522 @@
1
+ """
2
+
3
+ generate_csv_report.py
4
+
5
+ Generates a .csv report from a MD-formatted .json file with the following columns:
6
+
7
+ * filename
8
+ * datetime (if images or EXIF information is supplied)
9
+ * detection_category
10
+ * max_detection_confidence
11
+ * classification_category
12
+ * max_classification_confidence
13
+ * count
14
+
15
+ One row is generated per category pair per image. For example, these would be unique rows:
16
+
17
+ image0001.jpg,animal,deer,4
18
+ image0001.jpg,animal,lion,4
19
+ image0001.jpg,animal,[none],4
20
+ image0001.jpg,person,[none],2
21
+
22
+ Images with no above-threshold detections will have a single row:
23
+
24
+ image0001.jpg,empty,[none],-1
25
+
26
+ Images with processing errors will have a single row:
27
+
28
+ image0001.jpg,error,error_string,-1
29
+
30
+ """
31
+
32
+ #%% Constants and imports
33
+
34
+ import os
35
+ import json
36
+ import tempfile
37
+ import sys
38
+ import argparse
39
+ import uuid
40
+
41
+ import pandas as pd
42
+
43
+ from copy import deepcopy
44
+
45
+ from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
46
+ from megadetector.utils.ct_utils import get_max_conf
47
+ from megadetector.utils.ct_utils import is_list_sorted
48
+ from megadetector.detection.run_detector import \
49
+ get_typical_confidence_threshold_from_results
50
+ from megadetector.data_management.read_exif import \
51
+ read_exif_from_folder, ReadExifOptions, minimal_exif_tags
52
+
53
+ default_classification_threshold = 0.3
54
+ unknown_datetime_tag = ''
55
+
56
+
57
+ #%% Functions
58
+
59
+ def generate_csv_report(md_results_file,
60
+ output_file=None,
61
+ datetime_source=None,
62
+ folder_level_columns=None,
63
+ detection_confidence_threshold=None,
64
+ classification_confidence_threshold=None,
65
+ verbose=True):
66
+ """
67
+ Generates a .csv report from a MD-formatted .json file
68
+
69
+ Args:
70
+ md_results_file (str): MD results .json file for which we should generate a report
71
+ output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
72
+ datetime_source (str, optional): if datetime information is required, this should point to
73
+ a folder of images, a MD results .json file (can be the same as the input file), or
74
+ an exif_info.json file created with read_exif().
75
+ folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
76
+ folder in a path name) for which we should create separate columns. Should be zero-indexed ints,
77
+ or a comma-delimited list of zero-indexed int-strings.
78
+ detection_confidence_threshold (float, optional): detections below this confidence threshold will not
79
+ be included in the output data. Defaults to the recommended value based on the .json file.
80
+ classification_confidence_threshold (float, optional): classifications below this confidence threshold will
81
+ not be included in the output data (i.e., detections will be considered "animal").
82
+ verbose (bool, optional): enable debug output, including the progress bar,
83
+
84
+ Returns:
85
+ str: the output .csv filename
86
+ """
87
+
88
+ ##%% Load results file
89
+
90
+ results = load_md_or_speciesnet_file(md_results_file)
91
+
92
+ print('Loaded results for {} images'.format(len(results['images'])))
93
+
94
+ detection_category_id_to_name = results['detection_categories']
95
+ classification_category_id_to_name = None
96
+ if 'classification_categories' in results:
97
+ classification_category_id_to_name = results['classification_categories']
98
+
99
+ if output_file is None:
100
+ output_file = md_results_file + '.csv'
101
+
102
+ ##%% Read datetime information if necessary
103
+
104
+ filename_to_datetime_string = None
105
+
106
+ if datetime_source is not None:
107
+
108
+ all_exif_results = None
109
+
110
+ if os.path.isdir(datetime_source):
111
+
112
+ # Read EXIF info from images
113
+ read_exif_options = ReadExifOptions()
114
+ read_exif_options.tags_to_include = minimal_exif_tags
115
+ read_exif_options.byte_handling = 'delete'
116
+ exif_cache_file = os.path.join(tempfile.gettempdir(),
117
+ 'md-exif-data',
118
+ str(uuid.uuid1())+'.json')
119
+ print('Reading EXIF datetime info from {}, writing to {}'.format(
120
+ datetime_source,exif_cache_file))
121
+ os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
122
+
123
+ all_exif_results = read_exif_from_folder(input_folder=datetime_source,
124
+ output_file=exif_cache_file,
125
+ options=read_exif_options,
126
+ recursive=True)
127
+
128
+ else:
129
+
130
+ assert os.path.isfile(datetime_source), \
131
+ 'datetime source {} is neither a folder nor a file'.format(datetime_source)
132
+
133
+ # Is this the same file we've already read?
134
+
135
+ # Load this, decide whether it's a MD file or an exif_info file
136
+ with open(datetime_source,'r') as f:
137
+ d = json.load(f)
138
+
139
+ if isinstance(d,list):
140
+ all_exif_results = d
141
+ else:
142
+ assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
143
+ assert 'images' in d,\
144
+ 'The datetime source you provided doesn\'t look like a valid source .json file'
145
+ all_exif_results = []
146
+ found_datetime = False
147
+ for im in d['images']:
148
+ exif_result = {'file_name':im['file']}
149
+ if 'datetime' in im:
150
+ found_datetime = True
151
+ exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
152
+ all_exif_results.append(exif_result)
153
+ if not found_datetime:
154
+ print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
155
+ 'to contain datetime information.')
156
+
157
+ # ...if datetime_source is a folder/file
158
+
159
+ assert all_exif_results is not None
160
+
161
+ filename_to_datetime_string = {}
162
+
163
+ for exif_result in all_exif_results:
164
+
165
+ datetime_string = unknown_datetime_tag
166
+ if ('exif_tags' in exif_result) and \
167
+ (exif_result['exif_tags'] is not None) and \
168
+ ('DateTimeOriginal' in exif_result['exif_tags']):
169
+ datetime_string = exif_result['exif_tags']['DateTimeOriginal']
170
+ if datetime_string is None:
171
+ datetime_string = ''
172
+ else:
173
+ assert isinstance(datetime_string,str), 'Unrecognized datetime format'
174
+ filename_to_datetime_string[exif_result['file_name']] = datetime_string
175
+
176
+ # ...for each exif result
177
+
178
+ image_files = [im['file'] for im in results['images']]
179
+ image_files_set = set(image_files)
180
+
181
+ files_in_exif_but_not_in_results = []
182
+ files_in_results_but_not_in_exif = []
183
+ files_with_no_datetime_info = []
184
+
185
+ for fn in filename_to_datetime_string:
186
+ dts = filename_to_datetime_string[fn]
187
+ if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
188
+ files_with_no_datetime_info.append(fn)
189
+ if fn not in image_files_set:
190
+ files_in_exif_but_not_in_results.append(fn)
191
+
192
+ for fn in image_files_set:
193
+ if fn not in filename_to_datetime_string:
194
+ files_in_results_but_not_in_exif.append(fn)
195
+
196
+ print('{} files (of {}) in EXIF info not found in MD results'.format(
197
+ len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
198
+ ))
199
+
200
+ print('{} files (of {}) in MD results not found in MD EXIF info'.format(
201
+ len(files_in_results_but_not_in_exif),len(image_files_set)
202
+ ))
203
+
204
+ print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
205
+ len(files_with_no_datetime_info),len(filename_to_datetime_string)
206
+ ))
207
+
208
+ # ...if we need to deal with datetimes
209
+
210
+
211
+ ##%% Parse folder level column specifier
212
+
213
+ if folder_level_columns is not None:
214
+
215
+ if isinstance(folder_level_columns,str):
216
+ tokens = folder_level_columns.split(',')
217
+ folder_level_columns = [int(s) for s in tokens]
218
+ for folder_level in folder_level_columns:
219
+ if (not isinstance(folder_level,int)) or (folder_level < 0):
220
+ raise ValueError('Illegal folder level specifier {}'.format(
221
+ str(folder_level_columns)))
222
+
223
+
224
+ ##%% Fill in default thresholds
225
+
226
+ if classification_confidence_threshold is None:
227
+ classification_confidence_threshold = default_classification_threshold
228
+ if detection_confidence_threshold is None:
229
+ detection_confidence_threshold = \
230
+ get_typical_confidence_threshold_from_results(results)
231
+
232
+ assert detection_confidence_threshold is not None
233
+
234
+
235
+ ##%% Fill in output records
236
+
237
+ output_records = []
238
+
239
+ # For each image
240
+ #
241
+ # im = results['images'][0]
242
+ for im in results['images']:
243
+
244
+ """
245
+ * filename
246
+ * datetime (if images or EXIF information is supplied)
247
+ * detection_category
248
+ * max_detection_confidence
249
+ * classification_category
250
+ * max_classification_confidence
251
+ * count
252
+ """
253
+
254
+ base_record = {}
255
+
256
+ base_record['filename'] = im['file'].replace('\\','/')
257
+
258
+ # Datetime (if necessary)
259
+ datetime_string = ''
260
+ if filename_to_datetime_string is not None:
261
+ if im['file'] in filename_to_datetime_string:
262
+ datetime_string = filename_to_datetime_string[im['file']]
263
+ base_record['datetime'] = datetime_string
264
+
265
+ for s in ['detection_category','max_detection_confidence',
266
+ 'classification_category','max_classification_confidence',
267
+ 'count']:
268
+ base_record[s] = ''
269
+
270
+ # Folder level columns
271
+ tokens = im['file'].split('/')
272
+
273
+ if folder_level_columns is not None:
274
+
275
+ for folder_level in folder_level_columns:
276
+ folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
277
+ if folder_level >= len(tokens):
278
+ folder_level_value = ''
279
+ else:
280
+ folder_level_value = tokens[folder_level]
281
+ base_record[folder_level_column_name] = folder_level_value
282
+
283
+ records_this_image = []
284
+
285
+ # Create one output row if this image failed
286
+ if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
287
+
288
+ record = deepcopy(base_record)
289
+ record['detection_category'] = 'error'
290
+ record['classification_category'] = im['failure']
291
+ records_this_image.append(record)
292
+ assert 'detections' not in im or im['detections'] is None
293
+
294
+ else:
295
+
296
+ assert 'detections' in im and im['detections'] is not None
297
+
298
+ # Count above-threshold detections
299
+ detections_above_threshold = []
300
+ for det in im['detections']:
301
+ if det['conf'] >= detection_confidence_threshold:
302
+ detections_above_threshold.append(det)
303
+ max_detection_conf = get_max_conf(im)
304
+
305
+ # Create one output row if this image is empty (i.e., has no
306
+ # above-threshold detections)
307
+ if len(detections_above_threshold) == 0:
308
+
309
+ record = deepcopy(base_record)
310
+ record['detection_category'] = 'empty'
311
+ record['max_detection_confidence'] = max_detection_conf
312
+ records_this_image.append(record)
313
+
314
+ # ...if this image is empty
315
+
316
+ else:
317
+
318
+ # Maps a string of the form:
319
+ #
320
+ # detection_category:classification_category
321
+ #
322
+ # ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
323
+ category_info_string_to_record = {}
324
+
325
+ for det in detections_above_threshold:
326
+
327
+ assert det['conf'] >= detection_confidence_threshold
328
+
329
+ detection_category_name = detection_category_id_to_name[det['category']]
330
+ detection_confidence = det['conf']
331
+ classification_category_name = ''
332
+ classification_confidence = 0.0
333
+
334
+ if ('classifications' in det) and (len(det['classifications']) > 0):
335
+
336
+ # Classifications should always be sorted by confidence. Not
337
+ # technically required, but always true in practice.
338
+ assert is_list_sorted([c[1] for c in det['classifications']]), \
339
+ 'This script does not yet support unsorted classifications'
340
+ assert classification_category_id_to_name is not None, \
341
+ 'If classifications are present, category mappings should be present'
342
+
343
+ # Only use the first classification
344
+ classification = det['classifications'][0]
345
+ if classification[1] >= classification_confidence_threshold:
346
+ classification_category_name = \
347
+ classification_category_id_to_name[classification[0]]
348
+ classification_confidence = classification[1]
349
+
350
+ # ...if classifications are present
351
+
352
+ # E.g. "animal:rodent", or "vehicle:"
353
+ category_info_string = detection_category_name + ':' + classification_category_name
354
+
355
+ if category_info_string not in category_info_string_to_record:
356
+ category_info_string_to_record[category_info_string] = {
357
+ 'max_detection_confidence':0.0,
358
+ 'max_classification_confidence':0.0,
359
+ 'count':0,
360
+ 'detection_category':detection_category_name,
361
+ 'classification_category':classification_category_name
362
+ }
363
+
364
+ record = category_info_string_to_record[category_info_string]
365
+ record['count'] += 1
366
+ if detection_confidence > record['max_detection_confidence']:
367
+ record['max_detection_confidence'] = detection_confidence
368
+ if classification_confidence > record['max_classification_confidence']:
369
+ record['max_classification_confidence'] = classification_confidence
370
+
371
+ # ...for each detection
372
+
373
+ for record_in in category_info_string_to_record.values():
374
+ assert record_in['count'] > 0
375
+ record_out = deepcopy(base_record)
376
+ for k in record_in.keys():
377
+ assert k in record_out.keys()
378
+ record_out[k] = record_in[k]
379
+ records_this_image.append(record_out)
380
+
381
+ # ...is this empty/non-empty?
382
+
383
+ # ...if this image failed/didn't fail
384
+
385
+ # Add to [records]
386
+ output_records.extend(records_this_image)
387
+
388
+ # ...for each image
389
+
390
+ # Make sure every record has the same columns
391
+
392
+ if len(output_records) == 0:
393
+ print('Warning: no output records generated')
394
+ else:
395
+ column_names = output_records[0].keys()
396
+ for record in output_records:
397
+ assert record.keys() == column_names
398
+
399
+ # Create folder for output file if necessary
400
+ output_dir = os.path.dirname(output_file)
401
+ if len(output_dir) > 0:
402
+ os.makedirs(output_dir, exist_ok=True)
403
+
404
+ # Write to .csv
405
+ df = pd.DataFrame(output_records)
406
+ df.to_csv(output_file,header=True,index=False)
407
+
408
+ # from megadetector.utils.path_utils import open_file; open_file(output_file)
409
+
410
+ # ...generate_csv_report(...)
411
+
412
+
413
+ # %%
414
+
415
+ #%% Interactive driver
416
+
417
+ if False:
418
+
419
+ pass
420
+
421
+ #%% Configure options
422
+
423
+ r"""
424
+ python run_detector_batch.py MDV5A "g:\temp\md-test-images"
425
+ "g:\temp\md-test-images\md_results_with_datetime.json"
426
+ --recursive --output_relative_filenames --include_image_timestamp --include_exif_data
427
+ """
428
+
429
+ md_results_file = 'g:/temp/csv-report-test/md-results.json'
430
+ datetime_source = 'g:/temp/csv-report-test/exif_data.json'
431
+
432
+ # datetime_source = 'g:/temp/md-test-images'
433
+ # datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
434
+ # md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
435
+ # md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
436
+
437
+ output_file = None
438
+ folder_level_columns = [0,1,2,3]
439
+ detection_confidence_threshold = None
440
+ classification_confidence_threshold = None
441
+ verbose = True
442
+
443
+
444
+ #%% Programmatic execution
445
+
446
+ generate_csv_report(md_results_file=md_results_file,
447
+ output_file=output_file,
448
+ datetime_source=datetime_source,
449
+ folder_level_columns=folder_level_columns,
450
+ detection_confidence_threshold=detection_confidence_threshold,
451
+ classification_confidence_threshold=classification_confidence_threshold,
452
+ verbose=verbose)
453
+
454
+
455
+ #%% Command-line driver
456
+
457
+ def main(): # noqa
458
+
459
+ parser = argparse.ArgumentParser(
460
+ description='Generates a .csv report from a MD-formatted .json file')
461
+
462
+ parser.add_argument(
463
+ 'md_results_file',
464
+ type=str,
465
+ help='Path to MD results file (.json)')
466
+
467
+ parser.add_argument(
468
+ '--output_file',
469
+ type=str,
470
+ help='Output filename (.csv) (if omitted, will append .csv to the input file)')
471
+
472
+ parser.add_argument(
473
+ '--datetime_source',
474
+ type=str,
475
+ default=None,
476
+ help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
477
+ )
478
+
479
+ parser.add_argument(
480
+ '--folder_level_columns',
481
+ type=str,
482
+ default=None,
483
+ help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
484
+ )
485
+
486
+ parser.add_argument(
487
+ '--detection_confidence_threshold',
488
+ type=float,
489
+ default=None,
490
+ help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
491
+ )
492
+
493
+ parser.add_argument(
494
+ '--classification_confidence_threshold',
495
+ type=float,
496
+ default=None,
497
+ help='Classification threshold (default {})'.format(default_classification_threshold)
498
+ )
499
+
500
+ parser.add_argument(
501
+ '--verbose',
502
+ action='store_true',
503
+ help='Enable additional debug output'
504
+ )
505
+
506
+
507
+ if len(sys.argv[1:]) == 0:
508
+ parser.print_help()
509
+ parser.exit()
510
+
511
+ args = parser.parse_args()
512
+
513
+ generate_csv_report(md_results_file=args.md_results_file,
514
+ output_file=args.output_file,
515
+ datetime_source=args.datetime_source,
516
+ folder_level_columns=args.folder_level_columns,
517
+ detection_confidence_threshold=args.detection_confidence_threshold,
518
+ classification_confidence_threshold=args.classification_confidence_threshold,
519
+ verbose=args.verbose)
520
+
521
+ if __name__ == '__main__':
522
+ main()