megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -1,499 +1,508 @@
1
- """
2
-
3
- generate_csv_report.py
4
-
5
- Generates a .csv report from a MD-formatted .json file with the following columns:
6
-
7
- * filename
8
- * datetime (if images or EXIF information is supplied)
9
- * detection_category
10
- * max_detection_confidence
11
- * classification_category
12
- * max_classification_confidence
13
- * count
14
-
15
- One row is generated per category pair per image. For example, these would be unique rows:
16
-
17
- image0001.jpg,animal,deer,4
18
- image0001.jpg,animal,lion,4
19
- image0001.jpg,animal,[none],4
20
- image0001.jpg,person,[none],2
21
-
22
- Images with no above-threshold detections will have a single row:
23
-
24
- image0001.jpg,empty,[none],-1
25
-
26
- Images with processing errors will have a single row:
27
-
28
- image0001.jpg,error,error_string,-1
29
-
30
- """
31
-
32
- #%% Constants and imports
33
-
34
- import os
35
- import json
36
- import tempfile
37
- import uuid
38
- import pandas as pd
39
-
40
- from copy import deepcopy
41
-
42
- from megadetector.utils.wi_utils import load_md_or_speciesnet_file
43
- from megadetector.utils.ct_utils import get_max_conf
44
- from megadetector.utils.ct_utils import is_list_sorted
45
-
46
- from megadetector.detection.run_detector import \
47
- get_typical_confidence_threshold_from_results
48
-
49
- from megadetector.data_management.read_exif import \
50
- read_exif_from_folder, ReadExifOptions, minimal_exif_tags
51
-
52
- default_classification_threshold = 0.3
53
- unknown_datetime_tag = ''
54
-
55
-
56
- #%% Functions
57
-
58
- def generate_csv_report(md_results_file,
59
- output_file=None,
60
- datetime_source=None,
61
- folder_level_columns=None,
62
- detection_confidence_threshold=None,
63
- classification_confidence_threshold=None,
64
- verbose=True):
65
- """
66
- Generates a .csv report from a MD-formatted .json file
67
-
68
- Args:
69
- md_results_file (str): MD results .json file for which we should generate a report
70
- output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
71
- datetime_source (str, optional): if datetime information is required, this should point to
72
- a folder of images, a MD results .json file (can be the same as the input file), or
73
- an exif_info.json file created with read_exif().
74
- folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
75
- folder in a path name) for which we should create separate columns. Should be zero-indexed ints,
76
- or a comma-delimited list of zero-indexed int-strings.
77
- detection_confidence_threshold (float, optional): detections below this confidence threshold will not
78
- be included in the output data. Defaults to the recommended value based on the .json file.
79
- classification_confidence_threshold (float, optional): classifications below this confidence threshold will
80
- not be included in the output data (i.e., detections will be considered "animal").
81
- verbose (bool, optional): enable debug output, including the progress bar,
82
- """
83
-
84
- ##%% Load results file
85
-
86
- results = load_md_or_speciesnet_file(md_results_file)
87
-
88
- print('Loaded results for {} images'.format(len(results['images'])))
89
-
90
- detection_category_id_to_name = results['detection_categories']
91
- classification_category_id_to_name = None
92
- if 'classification_categories' in results:
93
- classification_category_id_to_name = results['classification_categories']
94
-
95
- if output_file is None:
96
- output_file = md_results_file + '.csv'
97
-
98
- ##%% Read datetime information if necessary
99
-
100
- filename_to_datetime_string = None
101
-
102
- if datetime_source is not None:
103
-
104
- all_exif_results = None
105
-
106
- if os.path.isdir(datetime_source):
107
-
108
- # Read EXIF info from images
109
- read_exif_options = ReadExifOptions()
110
- read_exif_options.tags_to_include = minimal_exif_tags
111
- read_exif_options.byte_handling = 'delete'
112
- exif_cache_file = os.path.join(tempfile.gettempdir(),
113
- 'md-exif-data',
114
- str(uuid.uuid1())+'.json')
115
- print('Reading EXIF datetime info from {}, writing to {}'.format(
116
- datetime_source,exif_cache_file))
117
- os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
118
-
119
- all_exif_results = read_exif_from_folder(input_folder=datetime_source,
120
- output_file=exif_cache_file,
121
- options=read_exif_options,
122
- recursive=True)
123
-
124
- else:
125
- assert os.path.isfile(datetime_source), \
126
- 'datetime source {} is neither a folder nor a file'.format(datetime_source)
127
-
128
- # Is this the same file we've already read?
129
-
130
- # Load this, decide whether it's a MD file or an exif_info file
131
- with open(datetime_source,'r') as f:
132
- d = json.load(f)
133
-
134
- if isinstance(d,list):
135
- all_exif_results = d
136
- else:
137
- assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
138
- assert 'images' in d,\
139
- 'The datetime source you provided doesn\'t look like a valid source .json file'
140
- all_exif_results = []
141
- found_datetime = False
142
- for im in d['images']:
143
- exif_result = {'file_name':im['file']}
144
- if 'datetime' in im:
145
- found_datetime = True
146
- exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
147
- all_exif_results.append(exif_result)
148
- if not found_datetime:
149
- print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
150
- 'to contain datetime information.')
151
-
152
- assert all_exif_results is not None
153
-
154
- filename_to_datetime_string = {}
155
-
156
- for exif_result in all_exif_results:
157
- datetime_string = unknown_datetime_tag
158
- if ('exif_tags' in exif_result) and \
159
- (exif_result['exif_tags'] is not None) and \
160
- ('DateTimeOriginal' in exif_result['exif_tags']):
161
- datetime_string = exif_result['exif_tags']['DateTimeOriginal']
162
- if datetime_string is None:
163
- datetime_string = ''
164
- else:
165
- assert isinstance(datetime_string,str), 'Unrecognized datetime format'
166
- filename_to_datetime_string[exif_result['file_name']] = datetime_string
167
-
168
- image_files = [im['file'] for im in results['images']]
169
- image_files_set = set(image_files)
170
-
171
- files_in_exif_but_not_in_results = []
172
- files_in_results_but_not_in_exif = []
173
- files_with_no_datetime_info = []
174
-
175
- for fn in filename_to_datetime_string:
176
- dts = filename_to_datetime_string[fn]
177
- if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
178
- files_with_no_datetime_info.append(fn)
179
- if fn not in image_files_set:
180
- files_in_exif_but_not_in_results.append(fn)
181
-
182
- for fn in image_files_set:
183
- if fn not in filename_to_datetime_string:
184
- files_in_results_but_not_in_exif.append(fn)
185
-
186
- print('{} files (of {}) in EXIF info not found in MD results'.format(
187
- len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
188
- ))
189
-
190
- print('{} files (of {}) in MD results not found in MD EXIF info'.format(
191
- len(files_in_results_but_not_in_exif),len(image_files_set)
192
- ))
193
-
194
- print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
195
- len(files_with_no_datetime_info),len(filename_to_datetime_string)
196
- ))
197
-
198
- # ...if we need to deal with datetimes
199
-
200
-
201
- ##%% Parse folder level column specifier
202
-
203
- if folder_level_columns is not None:
204
- if isinstance(folder_level_columns,str):
205
- tokens = folder_level_columns.split(',')
206
- folder_level_columns = [int(s) for s in tokens]
207
- for folder_level in folder_level_columns:
208
- if (not isinstance(folder_level,int)) or (folder_level < 0):
209
- raise ValueError('Illegal folder level specifier {}'.format(
210
- str(folder_level_columns)))
211
-
212
-
213
- ##%% Fill in default thresholds
214
-
215
- if classification_confidence_threshold is None:
216
- classification_confidence_threshold = default_classification_threshold
217
- if detection_confidence_threshold is None:
218
- detection_confidence_threshold = \
219
- get_typical_confidence_threshold_from_results(results)
220
-
221
- assert detection_confidence_threshold is not None
222
-
223
-
224
- ##%% Fill in output records
225
-
226
- output_records = []
227
-
228
- # For each image
229
- #
230
- # im = results['images'][0]
231
- for im in results['images']:
232
-
233
- """
234
- * filename
235
- * datetime (if images or EXIF information is supplied)
236
- * detection_category
237
- * max_detection_confidence
238
- * classification_category
239
- * max_classification_confidence
240
- * count
241
- """
242
-
243
- base_record = {}
244
-
245
- base_record['filename'] = im['file'].replace('\\','/')
246
-
247
- # Datetime (if necessary)
248
- if filename_to_datetime_string is not None:
249
- if im['file'] in filename_to_datetime_string:
250
- datetime_string = filename_to_datetime_string[im['file']]
251
- else:
252
- datetime_string = ''
253
- base_record['datetime'] = datetime_string
254
-
255
- for s in ['detection_category','max_detection_confidence',
256
- 'classification_category','max_classification_confidence',
257
- 'count']:
258
- base_record[s] = ''
259
-
260
- # Folder level columns
261
- tokens = im['file'].split('/')
262
-
263
- for folder_level in folder_level_columns:
264
- folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
265
- if folder_level >= len(tokens):
266
- folder_level_value = ''
267
- else:
268
- folder_level_value = tokens[folder_level]
269
- base_record[folder_level_column_name] = folder_level_value
270
-
271
- records_this_image = []
272
-
273
- # Create one output row if this image failed
274
- if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
275
-
276
- record = deepcopy(base_record)
277
- record['detection_category'] = 'error'
278
- record['classification_category'] = im['failure']
279
- records_this_image.append(record)
280
- assert 'detections' not in im or im['detections'] is None
281
-
282
- else:
283
-
284
- assert 'detections' in im and im['detections'] is not None
285
-
286
- # Count above-threshold detections
287
- detections_above_threshold = []
288
- for det in im['detections']:
289
- if det['conf'] >= detection_confidence_threshold:
290
- detections_above_threshold.append(det)
291
- max_detection_conf = get_max_conf(im)
292
-
293
- # Create one output row if this image is empty (i.e., has no
294
- # above-threshold detections)
295
- if len(detections_above_threshold) == 0:
296
-
297
- record = deepcopy(base_record)
298
- record['detection_category'] = 'empty'
299
- record['max_detection_confidence'] = max_detection_conf
300
- records_this_image.append(record)
301
-
302
- # ...if this image is empty
303
-
304
- else:
305
-
306
- # Maps a string of the form:
307
- #
308
- # detection_category:classification_category
309
- #
310
- # ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
311
- category_info_string_to_record = {}
312
-
313
- for det in detections_above_threshold:
314
-
315
- assert det['conf'] >= detection_confidence_threshold
316
-
317
- detection_category_name = detection_category_id_to_name[det['category']]
318
- detection_confidence = det['conf']
319
- classification_category_name = ''
320
- classificaition_confidence = 0.0
321
-
322
- if ('classifications' in det) and (len(det['classifications']) > 0):
323
-
324
- # Classifications should always be sorted by confidence. Not
325
- # technically required, but always true in practice.
326
- assert is_list_sorted([c[1] for c in det['classifications']]), \
327
- 'This script does not yet support unsorted classifications'
328
- assert classification_category_id_to_name is not None, \
329
- 'If classifications are present, category mappings should be present'
330
-
331
- # Only use the first classification
332
- classification = det['classifications'][0]
333
- if classification[1] >= classification_confidence_threshold:
334
- classification_category_name = \
335
- classification_category_id_to_name[classification[0]]
336
- classification_confidence = classification[1]
337
-
338
- # ...if classifications are present
339
-
340
- # E.g. "animal:rodent", or "vehicle:"
341
- category_info_string = detection_category_name + ':' + classification_category_name
342
-
343
- if category_info_string not in category_info_string_to_record:
344
- category_info_string_to_record[category_info_string] = {
345
- 'max_detection_confidence':0.0,
346
- 'max_classification_confidence':0.0,
347
- 'count':0,
348
- 'detection_category':detection_category_name,
349
- 'classification_category':classification_category_name
350
- }
351
-
352
- record = category_info_string_to_record[category_info_string]
353
- record['count'] += 1
354
- if detection_confidence > record['max_detection_confidence']:
355
- record['max_detection_confidence'] = detection_confidence
356
- if classification_confidence > record['max_classification_confidence']:
357
- record['max_classification_confidence'] = classification_confidence
358
-
359
- # ...for each detection
360
-
361
- for record_in in category_info_string_to_record.values():
362
- assert record_in['count'] > 0
363
- record_out = deepcopy(base_record)
364
- for k in record_in.keys():
365
- assert k in record_out.keys()
366
- record_out[k] = record_in[k]
367
- records_this_image.append(record_out)
368
-
369
- # ...is this empty/non-empty?
370
-
371
- # ...if this image failed/didn't fail
372
-
373
- # Add to [records]
374
- output_records.extend(records_this_image)
375
-
376
- # ...for each image
377
-
378
- # Make sure every record has the same columns
379
- column_names = output_records[0].keys()
380
- for record in output_records:
381
- assert record.keys() == column_names
382
-
383
- # Write to .csv
384
- df = pd.DataFrame(output_records)
385
- df.to_csv(output_file,header=True,index=False)
386
-
387
- # from megadetector.utils.path_utils import open_file; open_file(output_file)
388
-
389
- # ...generate_csv_report(...)
390
-
391
-
392
- #%% Interactive driver
393
-
394
- if False:
395
-
396
- pass
397
-
398
- #%% Configure options
399
-
400
- """
401
- python run_detector_batch.py MDV5A "g:\temp\md-test-images" "g:\temp\md-test-images\md_results_with_datetime.json" --recursive --output_relative_filenames --include_image_timestamp --include_exif_data
402
- """
403
-
404
- md_results_file = 'g:/temp/csv-report-test/md-results.json'
405
- datetime_source = 'g:/temp/csv-report-test/exif_data.json'
406
-
407
- # datetime_source = 'g:/temp/md-test-images'
408
- # datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
409
- # md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
410
- # md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
411
-
412
- output_file = None
413
- folder_level_columns = [0,1,2,3]
414
- detection_confidence_threshold = None
415
- classification_confidence_threshold = None
416
- verbose = True
417
-
418
-
419
- #%% Programmatic execution
420
-
421
- generate_csv_report(md_results_file=md_results_file,
422
- output_file=output_file,
423
- datetime_source=datetime_source,
424
- folder_level_columns=folder_level_columns,
425
- detection_confidence_threshold=detection_confidence_threshold,
426
- classification_confidence_threshold=classification_confidence_threshold,
427
- verbose=verbose)
428
-
429
-
430
- #%% Command-line driver
431
-
432
- import sys,argparse
433
-
434
- def main():
435
-
436
- parser = argparse.ArgumentParser(
437
- description='Generates a .csv report from a MD-formatted .json file')
438
-
439
- parser.add_argument(
440
- 'md_results_file',
441
- type=str,
442
- help='Path to MD results file (.json)')
443
-
444
- parser.add_argument(
445
- '--output_file',
446
- type=str,
447
- help='Output filename (.csv) (if omitted, will append .csv to the input file)')
448
-
449
- parser.add_argument(
450
- '--datetime_source',
451
- type=str,
452
- default=None,
453
- help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
454
- )
455
-
456
- parser.add_argument(
457
- '--folder_level_columns',
458
- type=str,
459
- default=None,
460
- help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
461
- )
462
-
463
- parser.add_argument(
464
- '--detection_confidence_threshold',
465
- type=float,
466
- default=None,
467
- help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
468
- )
469
-
470
- parser.add_argument(
471
- '--classification_confidence_threshold',
472
- type=float,
473
- default=None,
474
- help='Classification threshold (default {})'.format(default_classification_threshold)
475
- )
476
-
477
- parser.add_argument(
478
- '--verbose',
479
- action='store_true',
480
- help='Enable additional debug output'
481
- )
482
-
483
-
484
- if len(sys.argv[1:]) == 0:
485
- parser.print_help()
486
- parser.exit()
487
-
488
- args = parser.parse_args()
489
-
490
- generate_csv_report(md_results_file=args.md_results_file,
491
- output_file=args.output_file,
492
- datetime_source=args.datetime_source,
493
- folder_level_columns=args.folder_level_columns,
494
- detection_confidence_threshold=args.detection_confidence_threshold,
495
- classification_confidence_threshold=args.classification_confidence_threshold,
496
- verbose=args.verbose)
497
-
498
- if __name__ == '__main__':
499
- main()
1
+ """
2
+
3
+ generate_csv_report.py
4
+
5
+ Generates a .csv report from a MD-formatted .json file with the following columns:
6
+
7
+ * filename
8
+ * datetime (if images or EXIF information is supplied)
9
+ * detection_category
10
+ * max_detection_confidence
11
+ * classification_category
12
+ * max_classification_confidence
13
+ * count
14
+
15
+ One row is generated per category pair per image. For example, these would be unique rows:
16
+
17
+ image0001.jpg,animal,deer,4
18
+ image0001.jpg,animal,lion,4
19
+ image0001.jpg,animal,[none],4
20
+ image0001.jpg,person,[none],2
21
+
22
+ Images with no above-threshold detections will have a single row:
23
+
24
+ image0001.jpg,empty,[none],-1
25
+
26
+ Images with processing errors will have a single row:
27
+
28
+ image0001.jpg,error,error_string,-1
29
+
30
+ """
31
+
32
+ #%% Constants and imports
33
+
34
+ import os
35
+ import json
36
+ import tempfile
37
+ import sys
38
+ import argparse
39
+ import uuid
40
+
41
+ import pandas as pd
42
+
43
+ from copy import deepcopy
44
+
45
+ from megadetector.utils.wi_utils import load_md_or_speciesnet_file
46
+ from megadetector.utils.ct_utils import get_max_conf
47
+ from megadetector.utils.ct_utils import is_list_sorted
48
+ from megadetector.detection.run_detector import \
49
+ get_typical_confidence_threshold_from_results
50
+ from megadetector.data_management.read_exif import \
51
+ read_exif_from_folder, ReadExifOptions, minimal_exif_tags
52
+
53
+ default_classification_threshold = 0.3
54
+ unknown_datetime_tag = ''
55
+
56
+
57
+ #%% Functions
58
+
59
+ def generate_csv_report(md_results_file,
60
+ output_file=None,
61
+ datetime_source=None,
62
+ folder_level_columns=None,
63
+ detection_confidence_threshold=None,
64
+ classification_confidence_threshold=None,
65
+ verbose=True):
66
+ """
67
+ Generates a .csv report from a MD-formatted .json file
68
+
69
+ Args:
70
+ md_results_file (str): MD results .json file for which we should generate a report
71
+ output_file (str, optional): .csv file to write; if this is None, we'll use md_results_file.csv
72
+ datetime_source (str, optional): if datetime information is required, this should point to
73
+ a folder of images, a MD results .json file (can be the same as the input file), or
74
+ an exif_info.json file created with read_exif().
75
+ folder_level_columns (list of int, optional): list of folder levels (where zero is the top-level
76
+ folder in a path name) for which we should create separate columns. Should be zero-indexed ints,
77
+ or a comma-delimited list of zero-indexed int-strings.
78
+ detection_confidence_threshold (float, optional): detections below this confidence threshold will not
79
+ be included in the output data. Defaults to the recommended value based on the .json file.
80
+ classification_confidence_threshold (float, optional): classifications below this confidence threshold will
81
+ not be included in the output data (i.e., detections will be considered "animal").
82
+ verbose (bool, optional): enable debug output, including the progress bar,
83
+
84
+ Returns:
85
+ str: the output .csv filename
86
+ """
87
+
88
+ ##%% Load results file
89
+
90
+ results = load_md_or_speciesnet_file(md_results_file)
91
+
92
+ print('Loaded results for {} images'.format(len(results['images'])))
93
+
94
+ detection_category_id_to_name = results['detection_categories']
95
+ classification_category_id_to_name = None
96
+ if 'classification_categories' in results:
97
+ classification_category_id_to_name = results['classification_categories']
98
+
99
+ if output_file is None:
100
+ output_file = md_results_file + '.csv'
101
+
102
+ ##%% Read datetime information if necessary
103
+
104
+ filename_to_datetime_string = None
105
+
106
+ if datetime_source is not None:
107
+
108
+ all_exif_results = None
109
+
110
+ if os.path.isdir(datetime_source):
111
+
112
+ # Read EXIF info from images
113
+ read_exif_options = ReadExifOptions()
114
+ read_exif_options.tags_to_include = minimal_exif_tags
115
+ read_exif_options.byte_handling = 'delete'
116
+ exif_cache_file = os.path.join(tempfile.gettempdir(),
117
+ 'md-exif-data',
118
+ str(uuid.uuid1())+'.json')
119
+ print('Reading EXIF datetime info from {}, writing to {}'.format(
120
+ datetime_source,exif_cache_file))
121
+ os.makedirs(os.path.dirname(exif_cache_file),exist_ok=True)
122
+
123
+ all_exif_results = read_exif_from_folder(input_folder=datetime_source,
124
+ output_file=exif_cache_file,
125
+ options=read_exif_options,
126
+ recursive=True)
127
+
128
+ else:
129
+ assert os.path.isfile(datetime_source), \
130
+ 'datetime source {} is neither a folder nor a file'.format(datetime_source)
131
+
132
+ # Is this the same file we've already read?
133
+
134
+ # Load this, decide whether it's a MD file or an exif_info file
135
+ with open(datetime_source,'r') as f:
136
+ d = json.load(f)
137
+
138
+ if isinstance(d,list):
139
+ all_exif_results = d
140
+ else:
141
+ assert isinstance(d,dict), 'Unrecognized file format supplied as datetime source'
142
+ assert 'images' in d,\
143
+ 'The datetime source you provided doesn\'t look like a valid source .json file'
144
+ all_exif_results = []
145
+ found_datetime = False
146
+ for im in d['images']:
147
+ exif_result = {'file_name':im['file']}
148
+ if 'datetime' in im:
149
+ found_datetime = True
150
+ exif_result['exif_tags'] = {'DateTimeOriginal':im['datetime']}
151
+ all_exif_results.append(exif_result)
152
+ if not found_datetime:
153
+ print('Warning: a MD results file was supplied as the datetime source, but it does not appear '
154
+ 'to contain datetime information.')
155
+
156
+ assert all_exif_results is not None
157
+
158
+ filename_to_datetime_string = {}
159
+
160
+ for exif_result in all_exif_results:
161
+ datetime_string = unknown_datetime_tag
162
+ if ('exif_tags' in exif_result) and \
163
+ (exif_result['exif_tags'] is not None) and \
164
+ ('DateTimeOriginal' in exif_result['exif_tags']):
165
+ datetime_string = exif_result['exif_tags']['DateTimeOriginal']
166
+ if datetime_string is None:
167
+ datetime_string = ''
168
+ else:
169
+ assert isinstance(datetime_string,str), 'Unrecognized datetime format'
170
+ filename_to_datetime_string[exif_result['file_name']] = datetime_string
171
+
172
+ image_files = [im['file'] for im in results['images']]
173
+ image_files_set = set(image_files)
174
+
175
+ files_in_exif_but_not_in_results = []
176
+ files_in_results_but_not_in_exif = []
177
+ files_with_no_datetime_info = []
178
+
179
+ for fn in filename_to_datetime_string:
180
+ dts = filename_to_datetime_string[fn]
181
+ if (dts is None) or (dts == unknown_datetime_tag) or (len(dts) == 0):
182
+ files_with_no_datetime_info.append(fn)
183
+ if fn not in image_files_set:
184
+ files_in_exif_but_not_in_results.append(fn)
185
+
186
+ for fn in image_files_set:
187
+ if fn not in filename_to_datetime_string:
188
+ files_in_results_but_not_in_exif.append(fn)
189
+
190
+ print('{} files (of {}) in EXIF info not found in MD results'.format(
191
+ len(files_in_exif_but_not_in_results),len(filename_to_datetime_string)
192
+ ))
193
+
194
+ print('{} files (of {}) in MD results not found in MD EXIF info'.format(
195
+ len(files_in_results_but_not_in_exif),len(image_files_set)
196
+ ))
197
+
198
+ print('Failed to read datetime information for {} files (of {}) in EXIF info'.format(
199
+ len(files_with_no_datetime_info),len(filename_to_datetime_string)
200
+ ))
201
+
202
+ # ...if we need to deal with datetimes
203
+
204
+
205
+ ##%% Parse folder level column specifier
206
+
207
+ if folder_level_columns is not None:
208
+
209
+ if isinstance(folder_level_columns,str):
210
+ tokens = folder_level_columns.split(',')
211
+ folder_level_columns = [int(s) for s in tokens]
212
+ for folder_level in folder_level_columns:
213
+ if (not isinstance(folder_level,int)) or (folder_level < 0):
214
+ raise ValueError('Illegal folder level specifier {}'.format(
215
+ str(folder_level_columns)))
216
+
217
+
218
+ ##%% Fill in default thresholds
219
+
220
+ if classification_confidence_threshold is None:
221
+ classification_confidence_threshold = default_classification_threshold
222
+ if detection_confidence_threshold is None:
223
+ detection_confidence_threshold = \
224
+ get_typical_confidence_threshold_from_results(results)
225
+
226
+ assert detection_confidence_threshold is not None
227
+
228
+
229
+ ##%% Fill in output records
230
+
231
+ output_records = []
232
+
233
+ # For each image
234
+ #
235
+ # im = results['images'][0]
236
+ for im in results['images']:
237
+
238
+ """
239
+ * filename
240
+ * datetime (if images or EXIF information is supplied)
241
+ * detection_category
242
+ * max_detection_confidence
243
+ * classification_category
244
+ * max_classification_confidence
245
+ * count
246
+ """
247
+
248
+ base_record = {}
249
+
250
+ base_record['filename'] = im['file'].replace('\\','/')
251
+
252
+ # Datetime (if necessary)
253
+ if filename_to_datetime_string is not None:
254
+ if im['file'] in filename_to_datetime_string:
255
+ datetime_string = filename_to_datetime_string[im['file']]
256
+ else:
257
+ datetime_string = ''
258
+ base_record['datetime'] = datetime_string
259
+
260
+ for s in ['detection_category','max_detection_confidence',
261
+ 'classification_category','max_classification_confidence',
262
+ 'count']:
263
+ base_record[s] = ''
264
+
265
+ # Folder level columns
266
+ tokens = im['file'].split('/')
267
+
268
+ if folder_level_columns is not None:
269
+
270
+ for folder_level in folder_level_columns:
271
+ folder_level_column_name = 'folder_level_' + str(folder_level).zfill(2)
272
+ if folder_level >= len(tokens):
273
+ folder_level_value = ''
274
+ else:
275
+ folder_level_value = tokens[folder_level]
276
+ base_record[folder_level_column_name] = folder_level_value
277
+
278
+ records_this_image = []
279
+
280
+ # Create one output row if this image failed
281
+ if 'failure' in im and im['failure'] is not None and len(im['failure']) > 0:
282
+
283
+ record = deepcopy(base_record)
284
+ record['detection_category'] = 'error'
285
+ record['classification_category'] = im['failure']
286
+ records_this_image.append(record)
287
+ assert 'detections' not in im or im['detections'] is None
288
+
289
+ else:
290
+
291
+ assert 'detections' in im and im['detections'] is not None
292
+
293
+ # Count above-threshold detections
294
+ detections_above_threshold = []
295
+ for det in im['detections']:
296
+ if det['conf'] >= detection_confidence_threshold:
297
+ detections_above_threshold.append(det)
298
+ max_detection_conf = get_max_conf(im)
299
+
300
+ # Create one output row if this image is empty (i.e., has no
301
+ # above-threshold detections)
302
+ if len(detections_above_threshold) == 0:
303
+
304
+ record = deepcopy(base_record)
305
+ record['detection_category'] = 'empty'
306
+ record['max_detection_confidence'] = max_detection_conf
307
+ records_this_image.append(record)
308
+
309
+ # ...if this image is empty
310
+
311
+ else:
312
+
313
+ # Maps a string of the form:
314
+ #
315
+ # detection_category:classification_category
316
+ #
317
+ # ...to a dict with fields ['max_detection_conf','max_classification_conf','count']
318
+ category_info_string_to_record = {}
319
+
320
+ for det in detections_above_threshold:
321
+
322
+ assert det['conf'] >= detection_confidence_threshold
323
+
324
+ detection_category_name = detection_category_id_to_name[det['category']]
325
+ detection_confidence = det['conf']
326
+ classification_category_name = ''
327
+ classification_confidence = 0.0
328
+
329
+ if ('classifications' in det) and (len(det['classifications']) > 0):
330
+
331
+ # Classifications should always be sorted by confidence. Not
332
+ # technically required, but always true in practice.
333
+ assert is_list_sorted([c[1] for c in det['classifications']]), \
334
+ 'This script does not yet support unsorted classifications'
335
+ assert classification_category_id_to_name is not None, \
336
+ 'If classifications are present, category mappings should be present'
337
+
338
+ # Only use the first classification
339
+ classification = det['classifications'][0]
340
+ if classification[1] >= classification_confidence_threshold:
341
+ classification_category_name = \
342
+ classification_category_id_to_name[classification[0]]
343
+ classification_confidence = classification[1]
344
+
345
+ # ...if classifications are present
346
+
347
+ # E.g. "animal:rodent", or "vehicle:"
348
+ category_info_string = detection_category_name + ':' + classification_category_name
349
+
350
+ if category_info_string not in category_info_string_to_record:
351
+ category_info_string_to_record[category_info_string] = {
352
+ 'max_detection_confidence':0.0,
353
+ 'max_classification_confidence':0.0,
354
+ 'count':0,
355
+ 'detection_category':detection_category_name,
356
+ 'classification_category':classification_category_name
357
+ }
358
+
359
+ record = category_info_string_to_record[category_info_string]
360
+ record['count'] += 1
361
+ if detection_confidence > record['max_detection_confidence']:
362
+ record['max_detection_confidence'] = detection_confidence
363
+ if classification_confidence > record['max_classification_confidence']:
364
+ record['max_classification_confidence'] = classification_confidence
365
+
366
+ # ...for each detection
367
+
368
+ for record_in in category_info_string_to_record.values():
369
+ assert record_in['count'] > 0
370
+ record_out = deepcopy(base_record)
371
+ for k in record_in.keys():
372
+ assert k in record_out.keys()
373
+ record_out[k] = record_in[k]
374
+ records_this_image.append(record_out)
375
+
376
+ # ...is this empty/non-empty?
377
+
378
+ # ...if this image failed/didn't fail
379
+
380
+ # Add to [records]
381
+ output_records.extend(records_this_image)
382
+
383
+ # ...for each image
384
+
385
+ # Make sure every record has the same columns
386
+ column_names = output_records[0].keys()
387
+ for record in output_records:
388
+ assert record.keys() == column_names
389
+
390
+ # Write to .csv
391
+ df = pd.DataFrame(output_records)
392
+ df.to_csv(output_file,header=True,index=False)
393
+
394
+ # from megadetector.utils.path_utils import open_file; open_file(output_file)
395
+
396
+ # ...generate_csv_report(...)
397
+
398
+
399
+ # %%
400
+
401
+ #%% Interactive driver
402
+
403
+ if False:
404
+
405
+ pass
406
+
407
+ #%% Configure options
408
+
409
+ r"""
410
+ python run_detector_batch.py MDV5A "g:\temp\md-test-images"
411
+ "g:\temp\md-test-images\md_results_with_datetime.json"
412
+ --recursive --output_relative_filenames --include_image_timestamp --include_exif_data
413
+ """
414
+
415
+ md_results_file = 'g:/temp/csv-report-test/md-results.json'
416
+ datetime_source = 'g:/temp/csv-report-test/exif_data.json'
417
+
418
+ # datetime_source = 'g:/temp/md-test-images'
419
+ # datetime_source = 'g:/temp/md-test-images/md_results_with_datetime.json'
420
+ # md_results_file = 'g:/temp/md-test-images/md_results_with_datetime.json'
421
+ # md_results_file = 'g:/temp/md-test-images/speciesnet_results_md_format.json'
422
+
423
+ output_file = None
424
+ folder_level_columns = [0,1,2,3]
425
+ detection_confidence_threshold = None
426
+ classification_confidence_threshold = None
427
+ verbose = True
428
+
429
+
430
+ #%% Programmatic execution
431
+
432
+ generate_csv_report(md_results_file=md_results_file,
433
+ output_file=output_file,
434
+ datetime_source=datetime_source,
435
+ folder_level_columns=folder_level_columns,
436
+ detection_confidence_threshold=detection_confidence_threshold,
437
+ classification_confidence_threshold=classification_confidence_threshold,
438
+ verbose=verbose)
439
+
440
+
441
+ #%% Command-line driver
442
+
443
+ def main(): # noqa
444
+
445
+ parser = argparse.ArgumentParser(
446
+ description='Generates a .csv report from a MD-formatted .json file')
447
+
448
+ parser.add_argument(
449
+ 'md_results_file',
450
+ type=str,
451
+ help='Path to MD results file (.json)')
452
+
453
+ parser.add_argument(
454
+ '--output_file',
455
+ type=str,
456
+ help='Output filename (.csv) (if omitted, will append .csv to the input file)')
457
+
458
+ parser.add_argument(
459
+ '--datetime_source',
460
+ type=str,
461
+ default=None,
462
+ help='Image folder, exif_info.json file, or MD results file from which we should read datetime information'
463
+ )
464
+
465
+ parser.add_argument(
466
+ '--folder_level_columns',
467
+ type=str,
468
+ default=None,
469
+ help='Comma-separated list of zero-indexed folder levels that should become columns in the output file'
470
+ )
471
+
472
+ parser.add_argument(
473
+ '--detection_confidence_threshold',
474
+ type=float,
475
+ default=None,
476
+ help='Detection threshold (if omitted, chooses a reasonable default based on the .json file)'
477
+ )
478
+
479
+ parser.add_argument(
480
+ '--classification_confidence_threshold',
481
+ type=float,
482
+ default=None,
483
+ help='Classification threshold (default {})'.format(default_classification_threshold)
484
+ )
485
+
486
+ parser.add_argument(
487
+ '--verbose',
488
+ action='store_true',
489
+ help='Enable additional debug output'
490
+ )
491
+
492
+
493
+ if len(sys.argv[1:]) == 0:
494
+ parser.print_help()
495
+ parser.exit()
496
+
497
+ args = parser.parse_args()
498
+
499
+ generate_csv_report(md_results_file=args.md_results_file,
500
+ output_file=args.output_file,
501
+ datetime_source=args.datetime_source,
502
+ folder_level_columns=args.folder_level_columns,
503
+ detection_confidence_threshold=args.detection_confidence_threshold,
504
+ classification_confidence_threshold=args.classification_confidence_threshold,
505
+ verbose=args.verbose)
506
+
507
+ if __name__ == '__main__':
508
+ main()