megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,508 @@
1
+ """
2
+
3
+ generate_csv_report.py
4
+
5
+ Generates a .csv report from a MD-formatted .json file with the following columns:
6
+
7
+ * filename
8
+ * datetime (if images or EXIF information is supplied)
9
+ * detection_category
10
+ * max_detection_confidence
11
+ * classification_category
12
+ * max_classification_confidence
13
+ * count
14
+
15
+ One row is generated per category pair per image. For example, these would be unique rows:
16
+
17
+ image0001.jpg,animal,deer,4
18
+ image0001.jpg,animal,lion,4
19
+ image0001.jpg,animal,[none],4
20
+ image0001.jpg,person,[none],2
21
+
22
+ Images with no above-threshold detections will have a single row:
23
+
24
+ image0001.jpg,empty,[none],-1
25
+
26
+ Images with processing errors will have a single row:
27
+
28
+ image0001.jpg,error,error_string,-1
29
+
30
+ """
31
+
32
+ #%% Constants and imports
33
+
34
+ import os
35
+ import json
36
+ import tempfile
37
+ import sys
38
+ import argparse
39
+ import uuid
40
+
41
+ import pandas as pd
42
+
43
+ from copy import deepcopy
44
+
45
+ from megadetector.utils.wi_utils import load_md_or_speciesnet_file
46
+ from megadetector.utils.ct_utils import get_max_conf
47
+ from megadetector.utils.ct_utils import is_list_sorted
48
+ from megadetector.detection.run_detector import \
49
+ get_typical_confidence_threshold_from_results
50
+ from megadetector.data_management.read_exif import \
51
+ read_exif_from_folder, ReadExifOptions, minimal_exif_tags
52
+
53
+ default_classification_threshold = 0.3
54
+ unknown_datetime_tag = ''
55
+
56
+
57
+ #%% Functions
58
+
59
+ def generate_csv_report(md_results_file,
60
+ output_file=None,
61
+ datetime_source=None,
62
+ folder_level_columns=None,
63
+ detection_confidence_threshold=None,
64
+ classification_confidence_threshold=None,
65
+ verbose=True):
66
+ """
67
+ Generates a .csv report from a MD-formatted .json file
68
+
69
+ Args:
70
+ md_results_file (str): MD results .json file for which we should generate a report
71
+ output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
72
+ datetime_source (str, optional): if datetime information is required, this should point to
73
+ a folder of images, a MD results .json file (can be the same as the input file), or
74
+ an exif_info.json file created with read_exif().
75
+ folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
76
+ folder in a path name) for which we should create separate columns. Should be zero-indexed ints,
77
+ or a comma-delimited list of zero-indexed int-strings.
78
+ detection_confidence_threshold (float, optional): detections below this confidence threshold will not
79
+ be included in the output data. Defaults to the recommended value based on the .json file.
80
+ classification_confidence_threshold (float, optional): classifications below this confidence threshold will
81
+ not be included in the output data (i.e., detections will be considered "animal").
82
+ verbose (bool, optional): enable debug output, including the progress bar,
83
+
84
+ Returns:
85
+ str: the output .csv filename
86
+ """
87
+
88
+ ##%% Load results file
89
+
90
+ results = load_md_or_speciesnet_file(md_results_file)
91
+
92
+ print('Loaded results for {} images'.format(len(results['images'])))
93
+
94
+ detection_category_id_to_name = results['detection_categories']
95
+ classification_category_id_to_name = None
96
+ if 'classification_categories' in results:
97
+ classification_category_id_to_name = results['classification_categories']
98
+
99
+ if output_file is None:
100
+ output_file = md_results_file + '.csv'
101
+
102
+ ##%% Read datetime information if necessary
103
+
104
+ filename_to_datetime_string = None
105
+
106
+ if datetime_source is not None:
107
+
108
+ all_exif_results = None
109
+
110
+ if os.path.isdir(datetime_source):
111
+
112
+ # Read EXIF info from images
113
+ read_exif_options = ReadExifOptions()
114
+ read_exif_options.tags_to_include = minimal_exif_tags
115
+ read_exif_options.byte_handling = 'delete'
116
+ exif_cache_file = os.path.join(tempfile.gettempdir(),
117
+ 'md-exif-data',
118
+ str(uuid.uuid1())+'.json')
119
+ print('Reading EXIF datetime info from {}, writing to {}'.format(
120
+ datetime_source,exif_cache_file))
121
+ os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
122
+
123
+ all_exif_results = read_exif_from_folder(input_folder=datetime_source,
124
+ output_file=exif_cache_file,
125
+ options=read_exif_options,
126
+ recursive=True)
127
+
128
+ else:
129
+ assert os.path.isfile(datetime_source), \
130
+ 'datetime source {} is neither a folder nor a file'.format(datetime_source)
131
+
132
+ # Is this the same file we've already read?
133
+
134
+ # Load this, decide whether it's a MD file or an exif_info file
135
+ with open(datetime_source,'r') as f:
136
+ d = json.load(f)
137
+
138
+ if isinstance(d,list):
139
+ all_exif_results = d
140
+ else:
141
+ assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
142
+ assert 'images' in d,\
143
+ 'The datetime source you provided doesn\'t look like a valid source .json file'
144
+ all_exif_results = []
145
+ found_datetime = False
146
+ for im in d['images']:
147
+ exif_result = {'file_name':im['file']}
148
+ if 'datetime' in im:
149
+ found_datetime = True
150
+ exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
151
+ all_exif_results.append(exif_result)
152
+ if not found_datetime:
153
+ print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
154
+ 'to contain datetime information.')
155
+
156
+ assert all_exif_results is not None
157
+
158
+ filename_to_datetime_string = {}
159
+
160
+ for exif_result in all_exif_results:
161
+ datetime_string = unknown_datetime_tag
162
+ if ('exif_tags' in exif_result) and \
163
+ (exif_result['exif_tags'] is not None) and \
164
+ ('DateTimeOriginal' in exif_result['exif_tags']):
165
+ datetime_string = exif_result['exif_tags']['DateTimeOriginal']
166
+ if datetime_string is None:
167
+ datetime_string = ''
168
+ else:
169
+ assert isinstance(datetime_string,str), 'Unrecognized datetime format'
170
+ filename_to_datetime_string[exif_result['file_name']] = datetime_string
171
+
172
+ image_files = [im['file'] for im in results['images']]
173
+ image_files_set = set(image_files)
174
+
175
+ files_in_exif_but_not_in_results = []
176
+ files_in_results_but_not_in_exif = []
177
+ files_with_no_datetime_info = []
178
+
179
+ for fn in filename_to_datetime_string:
180
+ dts = filename_to_datetime_string[fn]
181
+ if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
182
+ files_with_no_datetime_info.append(fn)
183
+ if fn not in image_files_set:
184
+ files_in_exif_but_not_in_results.append(fn)
185
+
186
+ for fn in image_files_set:
187
+ if fn not in filename_to_datetime_string:
188
+ files_in_results_but_not_in_exif.append(fn)
189
+
190
+ print('{} files (of {}) in EXIF info not found in MD results'.format(
191
+ len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
192
+ ))
193
+
194
+ print('{} files (of {}) in MD results not found in MD EXIF info'.format(
195
+ len(files_in_results_but_not_in_exif),len(image_files_set)
196
+ ))
197
+
198
+ print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
199
+ len(files_with_no_datetime_info),len(filename_to_datetime_string)
200
+ ))
201
+
202
+ # ...if we need to deal with datetimes
203
+
204
+
205
+ ##%% Parse folder level column specifier
206
+
207
+ if folder_level_columns is not None:
208
+
209
+ if isinstance(folder_level_columns,str):
210
+ tokens = folder_level_columns.split(',')
211
+ folder_level_columns = [int(s) for s in tokens]
212
+ for folder_level in folder_level_columns:
213
+ if (not isinstance(folder_level,int)) or (folder_level < 0):
214
+ raise ValueError('Illegal folder level specifier {}'.format(
215
+ str(folder_level_columns)))
216
+
217
+
218
+ ##%% Fill in default thresholds
219
+
220
+ if classification_confidence_threshold is None:
221
+ classification_confidence_threshold = default_classification_threshold
222
+ if detection_confidence_threshold is None:
223
+ detection_confidence_threshold = \
224
+ get_typical_confidence_threshold_from_results(results)
225
+
226
+ assert detection_confidence_threshold is not None
227
+
228
+
229
+ ##%% Fill in output records
230
+
231
+ output_records = []
232
+
233
+ # For each image
234
+ #
235
+ # im = results['images'][0]
236
+ for im in results['images']:
237
+
238
+ """
239
+ * filename
240
+ * datetime (if images or EXIF information is supplied)
241
+ * detection_category
242
+ * max_detection_confidence
243
+ * classification_category
244
+ * max_classification_confidence
245
+ * count
246
+ """
247
+
248
+ base_record = {}
249
+
250
+ base_record['filename'] = im['file'].replace('\\','/')
251
+
252
+ # Datetime (if necessary)
253
+ if filename_to_datetime_string is not None:
254
+ if im['file'] in filename_to_datetime_string:
255
+ datetime_string = filename_to_datetime_string[im['file']]
256
+ else:
257
+ datetime_string = ''
258
+ base_record['datetime'] = datetime_string
259
+
260
+ for s in ['detection_category','max_detection_confidence',
261
+ 'classification_category','max_classification_confidence',
262
+ 'count']:
263
+ base_record[s] = ''
264
+
265
+ # Folder level columns
266
+ tokens = im['file'].split('/')
267
+
268
+ if folder_level_columns is not None:
269
+
270
+ for folder_level in folder_level_columns:
271
+ folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
272
+ if folder_level >= len(tokens):
273
+ folder_level_value = ''
274
+ else:
275
+ folder_level_value = tokens[folder_level]
276
+ base_record[folder_level_column_name] = folder_level_value
277
+
278
+ records_this_image = []
279
+
280
+ # Create one output row if this image failed
281
+ if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
282
+
283
+ record = deepcopy(base_record)
284
+ record['detection_category'] = 'error'
285
+ record['classification_category'] = im['failure']
286
+ records_this_image.append(record)
287
+ assert 'detections' not in im or im['detections'] is None
288
+
289
+ else:
290
+
291
+ assert 'detections' in im and im['detections'] is not None
292
+
293
+ # Count above-threshold detections
294
+ detections_above_threshold = []
295
+ for det in im['detections']:
296
+ if det['conf'] >= detection_confidence_threshold:
297
+ detections_above_threshold.append(det)
298
+ max_detection_conf = get_max_conf(im)
299
+
300
+ # Create one output row if this image is empty (i.e., has no
301
+ # above-threshold detections)
302
+ if len(detections_above_threshold) == 0:
303
+
304
+ record = deepcopy(base_record)
305
+ record['detection_category'] = 'empty'
306
+ record['max_detection_confidence'] = max_detection_conf
307
+ records_this_image.append(record)
308
+
309
+ # ...if this image is empty
310
+
311
+ else:
312
+
313
+ # Maps a string of the form:
314
+ #
315
+ # detection_category:classification_category
316
+ #
317
+ # ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
318
+ category_info_string_to_record = {}
319
+
320
+ for det in detections_above_threshold:
321
+
322
+ assert det['conf'] >= detection_confidence_threshold
323
+
324
+ detection_category_name = detection_category_id_to_name[det['category']]
325
+ detection_confidence = det['conf']
326
+ classification_category_name = ''
327
+ classification_confidence = 0.0
328
+
329
+ if ('classifications' in det) and (len(det['classifications']) > 0):
330
+
331
+ # Classifications should always be sorted by confidence. Not
332
+ # technically required, but always true in practice.
333
+ assert is_list_sorted([c[1] for c in det['classifications']]), \
334
+ 'This script does not yet support unsorted classifications'
335
+ assert classification_category_id_to_name is not None, \
336
+ 'If classifications are present, category mappings should be present'
337
+
338
+ # Only use the first classification
339
+ classification = det['classifications'][0]
340
+ if classification[1] >= classification_confidence_threshold:
341
+ classification_category_name = \
342
+ classification_category_id_to_name[classification[0]]
343
+ classification_confidence = classification[1]
344
+
345
+ # ...if classifications are present
346
+
347
+ # E.g. "animal:rodent", or "vehicle:"
348
+ category_info_string = detection_category_name + ':' + classification_category_name
349
+
350
+ if category_info_string not in category_info_string_to_record:
351
+ category_info_string_to_record[category_info_string] = {
352
+ 'max_detection_confidence':0.0,
353
+ 'max_classification_confidence':0.0,
354
+ 'count':0,
355
+ 'detection_category':detection_category_name,
356
+ 'classification_category':classification_category_name
357
+ }
358
+
359
+ record = category_info_string_to_record[category_info_string]
360
+ record['count'] += 1
361
+ if detection_confidence > record['max_detection_confidence']:
362
+ record['max_detection_confidence'] = detection_confidence
363
+ if classification_confidence > record['max_classification_confidence']:
364
+ record['max_classification_confidence'] = classification_confidence
365
+
366
+ # ...for each detection
367
+
368
+ for record_in in category_info_string_to_record.values():
369
+ assert record_in['count'] > 0
370
+ record_out = deepcopy(base_record)
371
+ for k in record_in.keys():
372
+ assert k in record_out.keys()
373
+ record_out[k] = record_in[k]
374
+ records_this_image.append(record_out)
375
+
376
+ # ...is this empty/non-empty?
377
+
378
+ # ...if this image failed/didn't fail
379
+
380
+ # Add to [records]
381
+ output_records.extend(records_this_image)
382
+
383
+ # ...for each image
384
+
385
+ # Make sure every record has the same columns
386
+ column_names = output_records[0].keys()
387
+ for record in output_records:
388
+ assert record.keys() == column_names
389
+
390
+ # Write to .csv
391
+ df = pd.DataFrame(output_records)
392
+ df.to_csv(output_file,header=True,index=False)
393
+
394
+ # from megadetector.utils.path_utils import open_file; open_file(output_file)
395
+
396
+ # ...generate_csv_report(...)
397
+
398
+
399
+ # %%
400
+
401
+ #%% Interactive driver
402
+
403
+ if False:
404
+
405
+ pass
406
+
407
+ #%% Configure options
408
+
409
+ r"""
410
+ python run_detector_batch.py MDV5A "g:\temp\md-test-images"
411
+ "g:\temp\md-test-images\md_results_with_datetime.json"
412
+ --recursive --output_relative_filenames --include_image_timestamp --include_exif_data
413
+ """
414
+
415
+ md_results_file = 'g:/temp/csv-report-test/md-results.json'
416
+ datetime_source = 'g:/temp/csv-report-test/exif_data.json'
417
+
418
+ # datetime_source = 'g:/temp/md-test-images'
419
+ # datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
420
+ # md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
421
+ # md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
422
+
423
+ output_file = None
424
+ folder_level_columns = [0,1,2,3]
425
+ detection_confidence_threshold = None
426
+ classification_confidence_threshold = None
427
+ verbose = True
428
+
429
+
430
+ #%% Programmatic execution
431
+
432
+ generate_csv_report(md_results_file=md_results_file,
433
+ output_file=output_file,
434
+ datetime_source=datetime_source,
435
+ folder_level_columns=folder_level_columns,
436
+ detection_confidence_threshold=detection_confidence_threshold,
437
+ classification_confidence_threshold=classification_confidence_threshold,
438
+ verbose=verbose)
439
+
440
+
441
+ #%% Command-line driver
442
+
443
+ def main(): # noqa
444
+
445
+ parser = argparse.ArgumentParser(
446
+ description='Generates a .csv report from a MD-formatted .json file')
447
+
448
+ parser.add_argument(
449
+ 'md_results_file',
450
+ type=str,
451
+ help='Path to MD results file (.json)')
452
+
453
+ parser.add_argument(
454
+ '--output_file',
455
+ type=str,
456
+ help='Output filename (.csv) (if omitted, will append .csv to the input file)')
457
+
458
+ parser.add_argument(
459
+ '--datetime_source',
460
+ type=str,
461
+ default=None,
462
+ help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
463
+ )
464
+
465
+ parser.add_argument(
466
+ '--folder_level_columns',
467
+ type=str,
468
+ default=None,
469
+ help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
470
+ )
471
+
472
+ parser.add_argument(
473
+ '--detection_confidence_threshold',
474
+ type=float,
475
+ default=None,
476
+ help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
477
+ )
478
+
479
+ parser.add_argument(
480
+ '--classification_confidence_threshold',
481
+ type=float,
482
+ default=None,
483
+ help='Classification threshold (default {})'.format(default_classification_threshold)
484
+ )
485
+
486
+ parser.add_argument(
487
+ '--verbose',
488
+ action='store_true',
489
+ help='Enable additional debug output'
490
+ )
491
+
492
+
493
+ if len(sys.argv[1:]) == 0:
494
+ parser.print_help()
495
+ parser.exit()
496
+
497
+ args = parser.parse_args()
498
+
499
+ generate_csv_report(md_results_file=args.md_results_file,
500
+ output_file=args.output_file,
501
+ datetime_source=args.datetime_source,
502
+ folder_level_columns=args.folder_level_columns,
503
+ detection_confidence_threshold=args.detection_confidence_threshold,
504
+ classification_confidence_threshold=args.classification_confidence_threshold,
505
+ verbose=args.verbose)
506
+
507
+ if __name__ == '__main__':
508
+ main()
@@ -4,7 +4,7 @@ load_api_results.py
4
4
 
5
5
  DEPRECATED
6
6
 
7
- As of 2023.12, this module is used in postprocessing and RDE. Not recommended
7
+ As of 2023.12, this module is still used in postprocessing and RDE, but it's not recommended
8
8
  for new code.
9
9
 
10
10
  Loads the output of the batch processing API (json) into a Pandas dataframe.
@@ -18,11 +18,12 @@ Includes functions to read/write the (very very old) .csv results format.
18
18
  import json
19
19
  import os
20
20
 
21
- from typing import Dict, Mapping, Optional, Tuple
21
+ from typing import Mapping, Optional
22
22
 
23
23
  import pandas as pd
24
24
 
25
25
  from megadetector.utils import ct_utils
26
+ from megadetector.utils.wi_utils import load_md_or_speciesnet_file
26
27
 
27
28
 
28
29
  #%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
@@ -30,7 +31,7 @@ from megadetector.utils import ct_utils
30
31
  def load_api_results(api_output_path: str, normalize_paths: bool = True,
31
32
  filename_replacements: Optional[Mapping[str, str]] = None,
32
33
  force_forward_slashes: bool = True
33
- ) -> Tuple[pd.DataFrame, Dict]:
34
+ ) -> tuple[pd.DataFrame, dict]:
34
35
  r"""
35
36
  Loads json-formatted MegaDetector results to a Pandas DataFrame.
36
37
 
@@ -47,11 +48,10 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
47
48
  detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
48
49
  other_fields: a dict containing fields in the results other than 'images'
49
50
  """
50
-
51
+
51
52
  print('Loading results from {}'.format(api_output_path))
52
53
 
53
- with open(api_output_path) as f:
54
- detection_results = json.load(f)
54
+ detection_results = load_md_or_speciesnet_file(api_output_path)
55
55
 
56
56
  # Validate that this is really a detector output file
57
57
  for s in ['info', 'detection_categories', 'images']:
@@ -65,12 +65,12 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
65
65
 
66
66
  if normalize_paths:
67
67
  for image in detection_results['images']:
68
- image['file'] = os.path.normpath(image['file'])
68
+ image['file'] = os.path.normpath(image['file'])
69
69
 
70
70
  if force_forward_slashes:
71
71
  for image in detection_results['images']:
72
72
  image['file'] = image['file'].replace('\\','/')
73
-
73
+
74
74
  # Replace some path tokens to match local paths to original blob structure
75
75
  if filename_replacements is not None:
76
76
  for string_to_replace in filename_replacements.keys():
@@ -79,16 +79,16 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
79
79
  im['file'] = im['file'].replace(string_to_replace,replacement_string)
80
80
 
81
81
  print('Converting results to dataframe')
82
-
82
+
83
83
  # If this is a newer file that doesn't include maximum detection confidence values,
84
84
  # add them, because our unofficial internal dataframe format includes this.
85
85
  for im in detection_results['images']:
86
86
  if 'max_detection_conf' not in im:
87
87
  im['max_detection_conf'] = ct_utils.get_max_conf(im)
88
-
88
+
89
89
  # Pack the json output into a Pandas DataFrame
90
90
  detection_results = pd.DataFrame(detection_results['images'])
91
-
91
+
92
92
  print('Finished loading MegaDetector results for {} images from {}'.format(
93
93
  len(detection_results),api_output_path))
94
94
 
@@ -111,7 +111,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
111
111
  if 'failure' in im and im['failure'] is None:
112
112
  del im['failure']
113
113
  fields['images'] = images
114
-
114
+
115
115
  # Convert the 'version' field back to a string as per format convention
116
116
  try:
117
117
  version = other_fields['info']['format_version']
@@ -120,7 +120,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
120
120
  except Exception:
121
121
  print('Warning: error determining format version')
122
122
  pass
123
-
123
+
124
124
  # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
125
125
  try:
126
126
  version = other_fields['info']['format_version']
@@ -132,20 +132,23 @@ def write_api_results(detection_results_table, other_fields, out_path):
132
132
  except Exception:
133
133
  print('Warning: error removing max_detection_conf from output')
134
134
  pass
135
-
135
+
136
136
  with open(out_path, 'w') as f:
137
137
  json.dump(fields, f, indent=1)
138
138
 
139
139
  print('Finished writing detection results to {}'.format(out_path))
140
140
 
141
141
 
142
- def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
142
+ def load_api_results_csv(filename, normalize_paths=True, filename_replacements=None, nrows=None):
143
143
  """
144
144
  [DEPRECATED]
145
-
145
+
146
146
  Loads .csv-formatted MegaDetector results to a pandas table
147
147
  """
148
148
 
149
+ if filename_replacements is None:
150
+ filename_replacements = {}
151
+
149
152
  print('Loading MegaDetector results from {}'.format(filename))
150
153
 
151
154
  detection_results = pd.read_csv(filename,nrows=nrows)
@@ -169,12 +172,12 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
169
172
 
170
173
  replacement_string = filename_replacements[string_to_replace]
171
174
 
172
- # iRow = 0
173
- for iRow in range(0,len(detection_results)):
174
- row = detection_results.iloc[iRow]
175
+ # i_row = 0
176
+ for i_row in range(0,len(detection_results)):
177
+ row = detection_results.iloc[i_row]
175
178
  fn = row['image_path']
176
179
  fn = fn.replace(string_to_replace,replacement_string)
177
- detection_results.at[iRow,'image_path'] = fn
180
+ detection_results.at[i_row,'image_path'] = fn
178
181
 
179
182
  print('Finished loading and de-serializing MD results for {} images from {}'.format(
180
183
  len(detection_results),filename))
@@ -183,9 +186,9 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
183
186
 
184
187
 
185
188
  def write_api_results_csv(detection_results, filename):
186
- """
189
+ """
187
190
  [DEPRECATED]
188
-
191
+
189
192
  Writes a Pandas table to csv in a way that's compatible with the .csv output
190
193
  format. Currently just a wrapper around to_csv that forces output writing
191
194
  to go through a common code path.