megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -1,340 +1,339 @@
1
- """
2
-
3
- mewc_to_md.py
4
-
5
- Converts the output of the MEWC inference scripts to the MD output format.
6
-
7
- """
8
-
9
- #%% Imports and constants
10
-
11
- import os
12
- import json
13
- import pandas as pd
14
-
15
- from copy import deepcopy
16
- from collections import defaultdict
17
- from megadetector.utils.ct_utils import sort_list_of_dicts_by_key, invert_dictionary # noqa
18
- from megadetector.utils.path_utils import recursive_file_list
19
-
20
- from megadetector.postprocessing.validate_batch_results import \
21
- ValidateBatchResultsOptions, validate_batch_results
22
-
23
- default_mewc_mount_prefix = '/images/'
24
- default_mewc_category_name_column = 'class_id'
25
-
26
-
27
- #%% Functions
28
-
29
- def mewc_to_md(mewc_input_folder,
30
- output_file=None,
31
- mount_prefix=default_mewc_mount_prefix,
32
- category_name_column=default_mewc_category_name_column,
33
- mewc_out_filename='mewc_out.csv',
34
- md_out_filename='md_out.json'):
35
- """
36
-
37
- Args:
38
- mewc_input_folder (str): the folder we'll search for MEWC output files
39
- output_file (str, optional): .json file to write with class information
40
- mount_prefix (str, optional): string to remove from all filenames in the MD
41
- .json file, typically the prefix used to mount the image folder.
42
- category_name_column (str, optional): column in the MEWC results .csv to use for
43
- category naming.
44
-
45
- Returns:
46
- dict: an MD-formatted dict, the same as what's written to [output_file]
47
- """
48
-
49
- ##%% Read input files
50
-
51
- assert os.path.isdir(mewc_input_folder), \
52
- 'Could not find folder {}'.format(mewc_input_folder)
53
-
54
-
55
- ##%% Find MEWC output files
56
-
57
- relative_path_to_mewc_info = {}
58
-
59
- print('Listing files in folder {}'.format(mewc_input_folder))
60
- all_files_relative = set(recursive_file_list(mewc_input_folder,return_relative_paths=True))
61
-
62
- for fn_relative in all_files_relative:
63
- if fn_relative.endswith(mewc_out_filename):
64
- folder_relative = '/'.join(fn_relative.split('/')[:-1])
65
- assert folder_relative not in relative_path_to_mewc_info
66
- md_output_file_relative = os.path.join(folder_relative,md_out_filename).replace('\\','/')
67
- assert md_output_file_relative in all_files_relative, \
68
- 'Could not find MD output file {} to match to {}'.format(
69
- md_output_file_relative,fn_relative)
70
- relative_path_to_mewc_info[folder_relative] = \
71
- {'mewc_predict_file':fn_relative,'md_file':md_output_file_relative}
72
-
73
- del folder_relative
74
-
75
- print('Found {} MEWC results files'.format(len(relative_path_to_mewc_info)))
76
-
77
-
78
- ##%% Prepare to loop over results files
79
-
80
- md_results_all = {}
81
- md_results_all['images'] = []
82
- md_results_all['detection_categories'] = {}
83
- md_results_all['classification_categories'] = {}
84
- md_results_all['info'] = None
85
-
86
- classification_category_name_to_id = {}
87
-
88
-
89
- ##%% Loop over results files
90
-
91
- # relative_folder = next(iter(relative_path_to_mewc_info.keys()))
92
- for relative_folder in relative_path_to_mewc_info:
93
-
94
- ##%%
95
-
96
- mewc_info = relative_path_to_mewc_info[relative_folder]
97
- mewc_csv_fn_abs = os.path.join(mewc_input_folder,mewc_info['mewc_predict_file'])
98
- mewc_md_fn_abs = os.path.join(mewc_input_folder,mewc_info['md_file'])
99
-
100
- mewc_classification_info = pd.read_csv(mewc_csv_fn_abs)
101
- mewc_classification_info = mewc_classification_info.to_dict('records')
102
-
103
- assert os.path.isfile(mewc_md_fn_abs), \
104
- 'Could not find file {}'.format(mewc_md_fn_abs)
105
- with open(mewc_md_fn_abs,'r') as f:
106
- md_results = json.load(f)
107
-
108
-
109
- ##%% Remove the mount prefix from MD files if necessary
110
- if mount_prefix is not None and len(mount_prefix) > 0:
111
-
112
- n_files_without_mount_prefix = 0
113
-
114
- # im = md_results['images'][0]
115
- for im in md_results['images']:
116
- if not im['file'].startswith(mount_prefix):
117
- n_files_without_mount_prefix += 1
118
- else:
119
- im['file'] = im['file'].replace(mount_prefix,'',1)
120
-
121
- if n_files_without_mount_prefix > 0:
122
- print('Warning {} of {} files in the MD results did not include the mount prefix {}'.format(
123
- n_files_without_mount_prefix,len(md_results['images']),mount_prefix))
124
-
125
-
126
- ##%% Convert MEWC snip IDs to image files
127
-
128
- # r = mewc_classification_info[0]
129
- for r in mewc_classification_info:
130
-
131
- # E.g. "IMG0-0.jpg"
132
- snip_file = r['filename']
133
-
134
- # E.g. "IMG0-0"
135
- snip_file_no_ext = os.path.splitext(snip_file)[0]
136
- ext = os.path.splitext(snip_file)[1] # noqa
137
-
138
- tokens = snip_file_no_ext.split('-')
139
-
140
- if len(tokens) == 1:
141
- print('Warning: in folder {}, detection ID not found in snip filename {}, skipping'.format(
142
- relative_folder,snip_file_no_ext))
143
- r['image_filename_without_extension'] = snip_file_no_ext
144
- r['snip_id'] = None
145
-
146
- continue
147
-
148
- filename_without_snip_id = '-'.join(tokens[0:-1])
149
- snip_id = int(tokens[-1])
150
- image_filename_without_extension = filename_without_snip_id
151
-
152
- r['image_filename_without_extension'] = image_filename_without_extension
153
- r['snip_id'] = snip_id
154
-
155
- # ...for each MEWC result record
156
-
157
-
158
- ##%% Make sure MD results and MEWC results refer to the same files
159
-
160
- images_in_md_results_no_extension = \
161
- set([os.path.splitext(im['file'])[0] for im in md_results['images']])
162
- images_in_mewc_results_no_extension = set(r['image_filename_without_extension'] \
163
- for r in mewc_classification_info)
164
-
165
- # All files with classification results should also have detection results
166
- for fn in images_in_mewc_results_no_extension:
167
- assert fn in images_in_md_results_no_extension, \
168
- 'Error: file {} is present in mewc-predict results, but not in MD results'.format(fn)
169
-
170
- # This is just a note to self: no classification results are present for empty images
171
- if False:
172
- for fn in images_in_md_results_no_extension:
173
- if fn not in images_in_mewc_results_no_extension:
174
- print('Warning: file {}/{} is present in MD results, but not in mewc-predict results'.format(
175
- relative_folder,fn))
176
-
177
-
178
- ##%% Validate images
179
-
180
- for im in md_results['images']:
181
- fn_relative = im['file']
182
- fn_abs = os.path.join(mewc_input_folder,relative_folder,fn_relative)
183
- if not os.path.isfile(fn_abs):
184
- print('Warning: image file {} does not exist'.format(fn_abs))
185
-
186
-
187
- ##%% Map filenames to MEWC results
188
-
189
- image_id_to_mewc_records = defaultdict(list)
190
- for r in mewc_classification_info:
191
- image_id_to_mewc_records[r['image_filename_without_extension']].append(r)
192
-
193
-
194
- ##%% Add classification info to MD results
195
-
196
- # im = md_results['images'][0]
197
- for im in md_results['images']:
198
-
199
- if ('detections' not in im) or (im['detections'] is None) or (len(im['detections']) == 0):
200
- continue
201
-
202
- detections = im['detections']
203
-
204
- # *Don't* sort by confidence, it looks like snip IDs use the original sort order
205
- # detections = sort_list_of_dicts_by_key(detections,'conf',reverse=True)
206
-
207
- # This is just a debug assist, so I can run this cell more than once
208
- for det in detections:
209
- det['classifications'] = []
210
-
211
- image_id = os.path.splitext(im['file'])[0]
212
- mewc_records_this_image = image_id_to_mewc_records[image_id]
213
-
214
- # r = mewc_records_this_image[0]
215
- for r in mewc_records_this_image:
216
-
217
- if r['snip_id'] is None:
218
- continue
219
-
220
- category_name = r[category_name_column]
221
-
222
- # This is a *global* list of category mappings, across all mewc .csv files
223
- if category_name not in classification_category_name_to_id:
224
- category_id = str(len(classification_category_name_to_id))
225
- classification_category_name_to_id[category_name] = category_id
226
- else:
227
- category_id = classification_category_name_to_id[category_name]
228
-
229
- snip_id = r['snip_id']
230
- if snip_id >= len(detections):
231
- print('Warning: image {} has a classified snip ID of {}, but only {} detections are present'.format(
232
- image_id,snip_id,len(detections)))
233
- continue
234
-
235
- det = detections[snip_id]
236
-
237
- if 'classifications' not in det:
238
- det['classifications'] = []
239
- det['classifications'].append([category_id,r['prob']])
240
-
241
- # ...for each classification in this image
242
-
243
- # ...for each image
244
-
245
- ##%% Map MD results to the global level
246
-
247
- if md_results_all['info'] is None:
248
- md_results_all['info'] = md_results['info']
249
-
250
- for category_id in md_results['detection_categories']:
251
- if category_id not in md_results_all['detection_categories']:
252
- md_results_all['detection_categories'][category_id] = \
253
- md_results['detection_categories'][category_id]
254
- else:
255
- assert md_results_all['detection_categories'][category_id] == \
256
- md_results['detection_categories'][category_id], \
257
- 'MD results present with incompatible detection categories'
258
-
259
- # im = md_results['images'][0]
260
- for im in md_results['images']:
261
- im_copy = deepcopy(im)
262
- im_copy['file'] = os.path.join(relative_folder,im['file']).replace('\\','/')
263
- md_results_all['images'].append(im_copy)
264
-
265
- # ...for each folder that contains MEWC results
266
-
267
- del md_results
268
-
269
- ##%% Write output
270
-
271
- md_results_all['classification_categories'] = invert_dictionary(classification_category_name_to_id)
272
-
273
- if output_file is not None:
274
- output_dir = os.path.dirname(output_file)
275
- os.makedirs(output_dir,exist_ok=True)
276
- with open(output_file,'w') as f:
277
- json.dump(md_results_all,f,indent=1)
278
-
279
- validation_options = ValidateBatchResultsOptions()
280
- validation_options.check_image_existence = True
281
- validation_options.relative_path_base = mewc_input_folder
282
- validation_options.raise_errors = True
283
- validation_results = validate_batch_results(output_file,validation_options) # noqa
284
-
285
- # ...def mewc_to_md(...)
286
-
287
-
288
- #%% Interactive driver
289
-
290
- if False:
291
-
292
- pass
293
-
294
- #%%
295
-
296
- mewc_input_folder = r'G:\temp\mewc-test'
297
- mount_prefix = '/images/'
298
- output_file = os.path.join(mewc_input_folder,'results_with_classes.json')
299
-
300
- _ = mewc_to_md(mewc_input_folder=mewc_input_folder,
301
- output_file=output_file,
302
- mount_prefix=mount_prefix,
303
- category_name_column='class_id')
304
-
305
-
306
- #%% Command-line driver
307
-
308
- import sys
309
- import argparse
310
-
311
- def main():
312
-
313
- parser = argparse.ArgumentParser()
314
-
315
- parser.add_argument(
316
- 'input_folder',type=str,
317
- help='Folder containing images and MEWC .json/.csv files')
318
- parser.add_argument(
319
- 'output_file',type=str,
320
- help='.json file where output will be written')
321
- parser.add_argument(
322
- '--mount_prefix',type=str,default=default_mewc_mount_prefix,
323
- help='prefix to remove from each filename in MEWC results, typically the Docker mount point')
324
- parser.add_argument(
325
- '--category_name_column',type=str,default=default_mewc_category_name_column,
326
- help='column in the MEWC .csv file to use for category names')
327
-
328
- if len(sys.argv[1:]) == 0:
329
- parser.print_help()
330
- parser.exit()
331
-
332
- args = parser.parse_args()
333
-
334
- _ = mewc_to_md(mewc_input_folder=args.input_folder,
335
- output_file=args.output_file,
336
- mount_prefix=args.mount_prefix,
337
- category_name_column=args.category_name_column)
338
-
339
- if __name__ == '__main__':
340
- main()
1
+ """
2
+
3
+ mewc_to_md.py
4
+
5
+ Converts the output of the MEWC inference scripts to the MD output format.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+ import pandas as pd
14
+ import sys
15
+ import argparse
16
+
17
+ from copy import deepcopy
18
+ from collections import defaultdict
19
+ from megadetector.utils.ct_utils import sort_list_of_dicts_by_key, invert_dictionary # noqa
20
+ from megadetector.utils.path_utils import recursive_file_list
21
+
22
+ from megadetector.postprocessing.validate_batch_results import \
23
+ ValidateBatchResultsOptions, validate_batch_results
24
+
25
+ default_mewc_mount_prefix = '/images/'
26
+ default_mewc_category_name_column = 'class_id'
27
+
28
+
29
+ #%% Functions
30
+
31
+ def mewc_to_md(mewc_input_folder,
32
+ output_file=None,
33
+ mount_prefix=default_mewc_mount_prefix,
34
+ category_name_column=default_mewc_category_name_column,
35
+ mewc_out_filename='mewc_out.csv',
36
+ md_out_filename='md_out.json'):
37
+ """
38
+
39
+ Args:
40
+ mewc_input_folder (str): the folder we'll search for MEWC output files
41
+ output_file (str, optional): .json file to write with class information
42
+ mount_prefix (str, optional): string to remove from all filenames in the MD
43
+ .json file, typically the prefix used to mount the image folder.
44
+ category_name_column (str, optional): column in the MEWC results .csv to use for
45
+ category naming.
46
+
47
+ Returns:
48
+ dict: an MD-formatted dict, the same as what's written to [output_file]
49
+ """
50
+
51
+ ##%% Read input files
52
+
53
+ assert os.path.isdir(mewc_input_folder), \
54
+ 'Could not find folder {}'.format(mewc_input_folder)
55
+
56
+
57
+ ##%% Find MEWC output files
58
+
59
+ relative_path_to_mewc_info = {}
60
+
61
+ print('Listing files in folder {}'.format(mewc_input_folder))
62
+ all_files_relative = set(recursive_file_list(mewc_input_folder,return_relative_paths=True))
63
+
64
+ for fn_relative in all_files_relative:
65
+ if fn_relative.endswith(mewc_out_filename):
66
+ folder_relative = '/'.join(fn_relative.split('/')[:-1])
67
+ assert folder_relative not in relative_path_to_mewc_info
68
+ md_output_file_relative = os.path.join(folder_relative,md_out_filename).replace('\\','/')
69
+ assert md_output_file_relative in all_files_relative, \
70
+ 'Could not find MD output file {} to match to {}'.format(
71
+ md_output_file_relative,fn_relative)
72
+ relative_path_to_mewc_info[folder_relative] = \
73
+ {'mewc_predict_file':fn_relative,'md_file':md_output_file_relative}
74
+
75
+ del folder_relative
76
+
77
+ print('Found {} MEWC results files'.format(len(relative_path_to_mewc_info)))
78
+
79
+
80
+ ##%% Prepare to loop over results files
81
+
82
+ md_results_all = {}
83
+ md_results_all['images'] = []
84
+ md_results_all['detection_categories'] = {}
85
+ md_results_all['classification_categories'] = {}
86
+ md_results_all['info'] = None
87
+
88
+ classification_category_name_to_id = {}
89
+
90
+
91
+ ##%% Loop over results files
92
+
93
+ # relative_folder = next(iter(relative_path_to_mewc_info.keys()))
94
+ for relative_folder in relative_path_to_mewc_info:
95
+
96
+ ##%%
97
+
98
+ mewc_info = relative_path_to_mewc_info[relative_folder]
99
+ mewc_csv_fn_abs = os.path.join(mewc_input_folder,mewc_info['mewc_predict_file'])
100
+ mewc_md_fn_abs = os.path.join(mewc_input_folder,mewc_info['md_file'])
101
+
102
+ mewc_classification_info = pd.read_csv(mewc_csv_fn_abs)
103
+ mewc_classification_info = mewc_classification_info.to_dict('records')
104
+
105
+ assert os.path.isfile(mewc_md_fn_abs), \
106
+ 'Could not find file {}'.format(mewc_md_fn_abs)
107
+ with open(mewc_md_fn_abs,'r') as f:
108
+ md_results = json.load(f)
109
+
110
+
111
+ ##%% Remove the mount prefix from MD files if necessary
112
+ if mount_prefix is not None and len(mount_prefix) > 0:
113
+
114
+ n_files_without_mount_prefix = 0
115
+
116
+ # im = md_results['images'][0]
117
+ for im in md_results['images']:
118
+ if not im['file'].startswith(mount_prefix):
119
+ n_files_without_mount_prefix += 1
120
+ else:
121
+ im['file'] = im['file'].replace(mount_prefix,'',1)
122
+
123
+ if n_files_without_mount_prefix > 0:
124
+ print('Warning {} of {} files in the MD results did not include the mount prefix {}'.format(
125
+ n_files_without_mount_prefix,len(md_results['images']),mount_prefix))
126
+
127
+
128
+ ##%% Convert MEWC snip IDs to image files
129
+
130
+ # r = mewc_classification_info[0]
131
+ for r in mewc_classification_info:
132
+
133
+ # E.g. "IMG0-0.jpg"
134
+ snip_file = r['filename']
135
+
136
+ # E.g. "IMG0-0"
137
+ snip_file_no_ext = os.path.splitext(snip_file)[0]
138
+ ext = os.path.splitext(snip_file)[1] # noqa
139
+
140
+ tokens = snip_file_no_ext.split('-')
141
+
142
+ if len(tokens) == 1:
143
+ print('Warning: in folder {}, detection ID not found in snip filename {}, skipping'.format(
144
+ relative_folder,snip_file_no_ext))
145
+ r['image_filename_without_extension'] = snip_file_no_ext
146
+ r['snip_id'] = None
147
+
148
+ continue
149
+
150
+ filename_without_snip_id = '-'.join(tokens[0:-1])
151
+ snip_id = int(tokens[-1])
152
+ image_filename_without_extension = filename_without_snip_id
153
+
154
+ r['image_filename_without_extension'] = image_filename_without_extension
155
+ r['snip_id'] = snip_id
156
+
157
+ # ...for each MEWC result record
158
+
159
+
160
+ ##%% Make sure MD results and MEWC results refer to the same files
161
+
162
+ images_in_md_results_no_extension = \
163
+ set([os.path.splitext(im['file'])[0] for im in md_results['images']])
164
+ images_in_mewc_results_no_extension = set(r['image_filename_without_extension'] \
165
+ for r in mewc_classification_info)
166
+
167
+ # All files with classification results should also have detection results
168
+ for fn in images_in_mewc_results_no_extension:
169
+ assert fn in images_in_md_results_no_extension, \
170
+ 'Error: file {} is present in mewc-predict results, but not in MD results'.format(fn)
171
+
172
+ # This is just a note to self: no classification results are present for empty images
173
+ if False:
174
+ for fn in images_in_md_results_no_extension:
175
+ if fn not in images_in_mewc_results_no_extension:
176
+ print('Warning: file {}/{} is present in MD results, but not in mewc-predict results'.format(
177
+ relative_folder,fn))
178
+
179
+
180
+ ##%% Validate images
181
+
182
+ for im in md_results['images']:
183
+ fn_relative = im['file']
184
+ fn_abs = os.path.join(mewc_input_folder,relative_folder,fn_relative)
185
+ if not os.path.isfile(fn_abs):
186
+ print('Warning: image file {} does not exist'.format(fn_abs))
187
+
188
+
189
+ ##%% Map filenames to MEWC results
190
+
191
+ image_id_to_mewc_records = defaultdict(list)
192
+ for r in mewc_classification_info:
193
+ image_id_to_mewc_records[r['image_filename_without_extension']].append(r)
194
+
195
+
196
+ ##%% Add classification info to MD results
197
+
198
+ # im = md_results['images'][0]
199
+ for im in md_results['images']:
200
+
201
+ if ('detections' not in im) or (im['detections'] is None) or (len(im['detections']) == 0):
202
+ continue
203
+
204
+ detections = im['detections']
205
+
206
+ # *Don't* sort by confidence, it looks like snip IDs use the original sort order
207
+ # detections = sort_list_of_dicts_by_key(detections,'conf',reverse=True)
208
+
209
+ # This is just a debug assist, so I can run this cell more than once
210
+ for det in detections:
211
+ det['classifications'] = []
212
+
213
+ image_id = os.path.splitext(im['file'])[0]
214
+ mewc_records_this_image = image_id_to_mewc_records[image_id]
215
+
216
+ # r = mewc_records_this_image[0]
217
+ for r in mewc_records_this_image:
218
+
219
+ if r['snip_id'] is None:
220
+ continue
221
+
222
+ category_name = r[category_name_column]
223
+
224
+ # This is a *global* list of category mappings, across all mewc .csv files
225
+ if category_name not in classification_category_name_to_id:
226
+ category_id = str(len(classification_category_name_to_id))
227
+ classification_category_name_to_id[category_name] = category_id
228
+ else:
229
+ category_id = classification_category_name_to_id[category_name]
230
+
231
+ snip_id = r['snip_id']
232
+ if snip_id >= len(detections):
233
+ print('Warning: image {} has a classified snip ID of {}, but only {} detections are present'.format(
234
+ image_id,snip_id,len(detections)))
235
+ continue
236
+
237
+ det = detections[snip_id]
238
+
239
+ if 'classifications' not in det:
240
+ det['classifications'] = []
241
+ det['classifications'].append([category_id,r['prob']])
242
+
243
+ # ...for each classification in this image
244
+
245
+ # ...for each image
246
+
247
+ ##%% Map MD results to the global level
248
+
249
+ if md_results_all['info'] is None:
250
+ md_results_all['info'] = md_results['info']
251
+
252
+ for category_id in md_results['detection_categories']:
253
+ if category_id not in md_results_all['detection_categories']:
254
+ md_results_all['detection_categories'][category_id] = \
255
+ md_results['detection_categories'][category_id]
256
+ else:
257
+ assert md_results_all['detection_categories'][category_id] == \
258
+ md_results['detection_categories'][category_id], \
259
+ 'MD results present with incompatible detection categories'
260
+
261
+ # im = md_results['images'][0]
262
+ for im in md_results['images']:
263
+ im_copy = deepcopy(im)
264
+ im_copy['file'] = os.path.join(relative_folder,im['file']).replace('\\','/')
265
+ md_results_all['images'].append(im_copy)
266
+
267
+ # ...for each folder that contains MEWC results
268
+
269
+ del md_results
270
+
271
+ ##%% Write output
272
+
273
+ md_results_all['classification_categories'] = invert_dictionary(classification_category_name_to_id)
274
+
275
+ if output_file is not None:
276
+ output_dir = os.path.dirname(output_file)
277
+ os.makedirs(output_dir,exist_ok=True)
278
+ with open(output_file,'w') as f:
279
+ json.dump(md_results_all,f,indent=1)
280
+
281
+ validation_options = ValidateBatchResultsOptions()
282
+ validation_options.check_image_existence = True
283
+ validation_options.relative_path_base = mewc_input_folder
284
+ validation_options.raise_errors = True
285
+ validation_results = validate_batch_results(output_file,validation_options) # noqa
286
+
287
+ # ...def mewc_to_md(...)
288
+
289
+
290
+ #%% Interactive driver
291
+
292
+ if False:
293
+
294
+ pass
295
+
296
+ #%%
297
+
298
+ mewc_input_folder = r'G:\temp\mewc-test'
299
+ mount_prefix = '/images/'
300
+ output_file = os.path.join(mewc_input_folder,'results_with_classes.json')
301
+
302
+ _ = mewc_to_md(mewc_input_folder=mewc_input_folder,
303
+ output_file=output_file,
304
+ mount_prefix=mount_prefix,
305
+ category_name_column='class_id')
306
+
307
+
308
+ #%% Command-line driver
309
+
310
+ def main(): # noqa
311
+
312
+ parser = argparse.ArgumentParser()
313
+
314
+ parser.add_argument(
315
+ 'input_folder',type=str,
316
+ help='Folder containing images and MEWC .json/.csv files')
317
+ parser.add_argument(
318
+ 'output_file',type=str,
319
+ help='.json file where output will be written')
320
+ parser.add_argument(
321
+ '--mount_prefix',type=str,default=default_mewc_mount_prefix,
322
+ help='prefix to remove from each filename in MEWC results, typically the Docker mount point')
323
+ parser.add_argument(
324
+ '--category_name_column',type=str,default=default_mewc_category_name_column,
325
+ help='column in the MEWC .csv file to use for category names')
326
+
327
+ if len(sys.argv[1:]) == 0:
328
+ parser.print_help()
329
+ parser.exit()
330
+
331
+ args = parser.parse_args()
332
+
333
+ _ = mewc_to_md(mewc_input_folder=args.input_folder,
334
+ output_file=args.output_file,
335
+ mount_prefix=args.mount_prefix,
336
+ category_name_column=args.category_name_column)
337
+
338
+ if __name__ == '__main__':
339
+ main()