megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -1,340 +1,339 @@
1
- """
2
-
3
- mewc_to_md.py
4
-
5
- Converts the output of the MEWC inference scripts to the MD output format.
6
-
7
- """
8
-
9
- #%% Imports and constants
10
-
11
- import os
12
- import json
13
- import pandas as pd
14
-
15
- from copy import deepcopy
16
- from collections import defaultdict
17
- from megadetector.utils.ct_utils import sort_list_of_dicts_by_key, invert_dictionary # noqa
18
- from megadetector.utils.path_utils import recursive_file_list
19
-
20
- from megadetector.postprocessing.validate_batch_results import \
21
- ValidateBatchResultsOptions, validate_batch_results
22
-
23
- default_mewc_mount_prefix = '/images/'
24
- default_mewc_category_name_column = 'class_id'
25
-
26
-
27
- #%% Functions
28
-
29
- def mewc_to_md(mewc_input_folder,
30
- output_file=None,
31
- mount_prefix=default_mewc_mount_prefix,
32
- category_name_column=default_mewc_category_name_column,
33
- mewc_out_filename='mewc_out.csv',
34
- md_out_filename='md_out.json'):
35
- """
36
-
37
- Args:
38
- mewc_input_folder (str): the folder we'll search for MEWC output files
39
- output_file (str, optional): .json file to write with class information
40
- mount_prefix (str, optional): string to remove from all filenames in the MD
41
- .json file, typically the prefix used to mount the image folder.
42
- category_name_column (str, optional): column in the MEWC results .csv to use for
43
- category naming.
44
-
45
- Returns:
46
- dict: an MD-formatted dict, the same as what's written to [output_file]
47
- """
48
-
49
- ##%% Read input files
50
-
51
- assert os.path.isdir(mewc_input_folder), \
52
- 'Could not find folder {}'.format(mewc_input_folder)
53
-
54
-
55
- ##%% Find MEWC output files
56
-
57
- relative_path_to_mewc_info = {}
58
-
59
- print('Listing files in folder {}'.format(mewc_input_folder))
60
- all_files_relative = set(recursive_file_list(mewc_input_folder,return_relative_paths=True))
61
-
62
- for fn_relative in all_files_relative:
63
- if fn_relative.endswith(mewc_out_filename):
64
- folder_relative = '/'.join(fn_relative.split('/')[:-1])
65
- assert folder_relative not in relative_path_to_mewc_info
66
- md_output_file_relative = os.path.join(folder_relative,md_out_filename).replace('\\','/')
67
- assert md_output_file_relative in all_files_relative, \
68
- 'Could not find MD output file {} to match to {}'.format(
69
- md_output_file_relative,fn_relative)
70
- relative_path_to_mewc_info[folder_relative] = \
71
- {'mewc_predict_file':fn_relative,'md_file':md_output_file_relative}
72
-
73
- del folder_relative
74
-
75
- print('Found {} MEWC results files'.format(len(relative_path_to_mewc_info)))
76
-
77
-
78
- ##%% Prepare to loop over results files
79
-
80
- md_results_all = {}
81
- md_results_all['images'] = []
82
- md_results_all['detection_categories'] = {}
83
- md_results_all['classification_categories'] = {}
84
- md_results_all['info'] = None
85
-
86
- classification_category_name_to_id = {}
87
-
88
-
89
- ##%% Loop over results files
90
-
91
- # relative_folder = next(iter(relative_path_to_mewc_info.keys()))
92
- for relative_folder in relative_path_to_mewc_info:
93
-
94
- ##%%
95
-
96
- mewc_info = relative_path_to_mewc_info[relative_folder]
97
- mewc_csv_fn_abs = os.path.join(mewc_input_folder,mewc_info['mewc_predict_file'])
98
- mewc_md_fn_abs = os.path.join(mewc_input_folder,mewc_info['md_file'])
99
-
100
- mewc_classification_info = pd.read_csv(mewc_csv_fn_abs)
101
- mewc_classification_info = mewc_classification_info.to_dict('records')
102
-
103
- assert os.path.isfile(mewc_md_fn_abs), \
104
- 'Could not find file {}'.format(mewc_md_fn_abs)
105
- with open(mewc_md_fn_abs,'r') as f:
106
- md_results = json.load(f)
107
-
108
-
109
- ##%% Remove the mount prefix from MD files if necessary
110
- if mount_prefix is not None and len(mount_prefix) > 0:
111
-
112
- n_files_without_mount_prefix = 0
113
-
114
- # im = md_results['images'][0]
115
- for im in md_results['images']:
116
- if not im['file'].startswith(mount_prefix):
117
- n_files_without_mount_prefix += 1
118
- else:
119
- im['file'] = im['file'].replace(mount_prefix,'',1)
120
-
121
- if n_files_without_mount_prefix > 0:
122
- print('Warning {} of {} files in the MD results did not include the mount prefix {}'.format(
123
- n_files_without_mount_prefix,len(md_results['images']),mount_prefix))
124
-
125
-
126
- ##%% Convert MEWC snip IDs to image files
127
-
128
- # r = mewc_classification_info[0]
129
- for r in mewc_classification_info:
130
-
131
- # E.g. "IMG0-0.jpg"
132
- snip_file = r['filename']
133
-
134
- # E.g. "IMG0-0"
135
- snip_file_no_ext = os.path.splitext(snip_file)[0]
136
- ext = os.path.splitext(snip_file)[1] # noqa
137
-
138
- tokens = snip_file_no_ext.split('-')
139
-
140
- if len(tokens) == 1:
141
- print('Warning: in folder {}, detection ID not found in snip filename {}, skipping'.format(
142
- relative_folder,snip_file_no_ext))
143
- r['image_filename_without_extension'] = snip_file_no_ext
144
- r['snip_id'] = None
145
-
146
- continue
147
-
148
- filename_without_snip_id = '-'.join(tokens[0:-1])
149
- snip_id = int(tokens[-1])
150
- image_filename_without_extension = filename_without_snip_id
151
-
152
- r['image_filename_without_extension'] = image_filename_without_extension
153
- r['snip_id'] = snip_id
154
-
155
- # ...for each MEWC result record
156
-
157
-
158
- ##%% Make sure MD results and MEWC results refer to the same files
159
-
160
- images_in_md_results_no_extension = \
161
- set([os.path.splitext(im['file'])[0] for im in md_results['images']])
162
- images_in_mewc_results_no_extension = set(r['image_filename_without_extension'] \
163
- for r in mewc_classification_info)
164
-
165
- # All files with classification results should also have detection results
166
- for fn in images_in_mewc_results_no_extension:
167
- assert fn in images_in_md_results_no_extension, \
168
- 'Error: file {} is present in mewc-predict results, but not in MD results'.format(fn)
169
-
170
- # This is just a note to self: no classification results are present for empty images
171
- if False:
172
- for fn in images_in_md_results_no_extension:
173
- if fn not in images_in_mewc_results_no_extension:
174
- print('Warning: file {}/{} is present in MD results, but not in mewc-predict results'.format(
175
- relative_folder,fn))
176
-
177
-
178
- ##%% Validate images
179
-
180
- for im in md_results['images']:
181
- fn_relative = im['file']
182
- fn_abs = os.path.join(mewc_input_folder,relative_folder,fn_relative)
183
- if not os.path.isfile(fn_abs):
184
- print('Warning: image file {} does not exist'.format(fn_abs))
185
-
186
-
187
- ##%% Map filenames to MEWC results
188
-
189
- image_id_to_mewc_records = defaultdict(list)
190
- for r in mewc_classification_info:
191
- image_id_to_mewc_records[r['image_filename_without_extension']].append(r)
192
-
193
-
194
- ##%% Add classification info to MD results
195
-
196
- # im = md_results['images'][0]
197
- for im in md_results['images']:
198
-
199
- if ('detections' not in im) or (im['detections'] is None) or (len(im['detections']) == 0):
200
- continue
201
-
202
- detections = im['detections']
203
-
204
- # *Don't* sort by confidence, it looks like snip IDs use the original sort order
205
- # detections = sort_list_of_dicts_by_key(detections,'conf',reverse=True)
206
-
207
- # This is just a debug assist, so I can run this cell more than once
208
- for det in detections:
209
- det['classifications'] = []
210
-
211
- image_id = os.path.splitext(im['file'])[0]
212
- mewc_records_this_image = image_id_to_mewc_records[image_id]
213
-
214
- # r = mewc_records_this_image[0]
215
- for r in mewc_records_this_image:
216
-
217
- if r['snip_id'] is None:
218
- continue
219
-
220
- category_name = r[category_name_column]
221
-
222
- # This is a *global* list of category mappings, across all mewc .csv files
223
- if category_name not in classification_category_name_to_id:
224
- category_id = str(len(classification_category_name_to_id))
225
- classification_category_name_to_id[category_name] = category_id
226
- else:
227
- category_id = classification_category_name_to_id[category_name]
228
-
229
- snip_id = r['snip_id']
230
- if snip_id >= len(detections):
231
- print('Warning: image {} has a classified snip ID of {}, but only {} detections are present'.format(
232
- image_id,snip_id,len(detections)))
233
- continue
234
-
235
- det = detections[snip_id]
236
-
237
- if 'classifications' not in det:
238
- det['classifications'] = []
239
- det['classifications'].append([category_id,r['prob']])
240
-
241
- # ...for each classification in this image
242
-
243
- # ...for each image
244
-
245
- ##%% Map MD reults to the global level
246
-
247
- if md_results_all['info'] is None:
248
- md_results_all['info'] = md_results['info']
249
-
250
- for category_id in md_results['detection_categories']:
251
- if category_id not in md_results_all['detection_categories']:
252
- md_results_all['detection_categories'][category_id] = \
253
- md_results['detection_categories'][category_id]
254
- else:
255
- assert md_results_all['detection_categories'][category_id] == \
256
- md_results['detection_categories'][category_id], \
257
- 'MD results present with incompatible detection categories'
258
-
259
- # im = md_results['images'][0]
260
- for im in md_results['images']:
261
- im_copy = deepcopy(im)
262
- im_copy['file'] = os.path.join(relative_folder,im['file']).replace('\\','/')
263
- md_results_all['images'].append(im_copy)
264
-
265
- # ...for each folder that contains MEWC results
266
-
267
- del md_results
268
-
269
- ##%% Write output
270
-
271
- md_results_all['classification_categories'] = invert_dictionary(classification_category_name_to_id)
272
-
273
- if output_file is not None:
274
- output_dir = os.path.dirname(output_file)
275
- os.makedirs(output_dir,exist_ok=True)
276
- with open(output_file,'w') as f:
277
- json.dump(md_results_all,f,indent=1)
278
-
279
- validation_options = ValidateBatchResultsOptions()
280
- validation_options.check_image_existence = True
281
- validation_options.relative_path_base = mewc_input_folder
282
- validation_options.raise_errors = True
283
- validation_results = validate_batch_results(output_file,validation_options) # noqa
284
-
285
- # ...def mewc_to_md(...)
286
-
287
-
288
- #%% Interactive driver
289
-
290
- if False:
291
-
292
- pass
293
-
294
- #%%
295
-
296
- mewc_input_folder = r'G:\temp\mewc-test'
297
- mount_prefix = '/images/'
298
- output_file = os.path.join(mewc_input_folder,'results_with_classes.json')
299
-
300
- _ = mewc_to_md(mewc_input_folder=mewc_input_folder,
301
- output_file=output_file,
302
- mount_prefix=mount_prefix,
303
- category_name_column='class_id')
304
-
305
-
306
- #%% Command-line driver
307
-
308
- import sys
309
- import argparse
310
-
311
- def main():
312
-
313
- parser = argparse.ArgumentParser()
314
-
315
- parser.add_argument(
316
- 'input_folder',type=str,
317
- help='Folder containing images and MEWC .json/.csv files')
318
- parser.add_argument(
319
- 'output_file',type=str,
320
- help='.json file where output will be written')
321
- parser.add_argument(
322
- '--mount_prefix',type=str,default=default_mewc_mount_prefix,
323
- help='prefix to remove from each filename in MEWC results, typically the Docker mount point')
324
- parser.add_argument(
325
- '--category_name_column',type=str,default=default_mewc_category_name_column,
326
- help='column in the MEWC .csv file to use for category names')
327
-
328
- if len(sys.argv[1:]) == 0:
329
- parser.print_help()
330
- parser.exit()
331
-
332
- args = parser.parse_args()
333
-
334
- _ = mewc_to_md(mewc_input_folder=args.input_folder,
335
- output_file=args.output_file,
336
- mount_prefix=args.mount_prefix,
337
- category_name_column=args.category_name_column)
338
-
339
- if __name__ == '__main__':
340
- main()
1
+ """
2
+
3
+ mewc_to_md.py
4
+
5
+ Converts the output of the MEWC inference scripts to the MD output format.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+ import pandas as pd
14
+ import sys
15
+ import argparse
16
+
17
+ from copy import deepcopy
18
+ from collections import defaultdict
19
+ from megadetector.utils.ct_utils import sort_list_of_dicts_by_key, invert_dictionary # noqa
20
+ from megadetector.utils.path_utils import recursive_file_list
21
+
22
+ from megadetector.postprocessing.validate_batch_results import \
23
+ ValidateBatchResultsOptions, validate_batch_results
24
+
25
+ default_mewc_mount_prefix = '/images/'
26
+ default_mewc_category_name_column = 'class_id'
27
+
28
+
29
+ #%% Functions
30
+
31
+ def mewc_to_md(mewc_input_folder,
32
+ output_file=None,
33
+ mount_prefix=default_mewc_mount_prefix,
34
+ category_name_column=default_mewc_category_name_column,
35
+ mewc_out_filename='mewc_out.csv',
36
+ md_out_filename='md_out.json'):
37
+ """
38
+
39
+ Args:
40
+ mewc_input_folder (str): the folder we'll search for MEWC output files
41
+ output_file (str, optional): .json file to write with class information
42
+ mount_prefix (str, optional): string to remove from all filenames in the MD
43
+ .json file, typically the prefix used to mount the image folder.
44
+ category_name_column (str, optional): column in the MEWC results .csv to use for
45
+ category naming.
46
+
47
+ Returns:
48
+ dict: an MD-formatted dict, the same as what's written to [output_file]
49
+ """
50
+
51
+ ##%% Read input files
52
+
53
+ assert os.path.isdir(mewc_input_folder), \
54
+ 'Could not find folder {}'.format(mewc_input_folder)
55
+
56
+
57
+ ##%% Find MEWC output files
58
+
59
+ relative_path_to_mewc_info = {}
60
+
61
+ print('Listing files in folder {}'.format(mewc_input_folder))
62
+ all_files_relative = set(recursive_file_list(mewc_input_folder,return_relative_paths=True))
63
+
64
+ for fn_relative in all_files_relative:
65
+ if fn_relative.endswith(mewc_out_filename):
66
+ folder_relative = '/'.join(fn_relative.split('/')[:-1])
67
+ assert folder_relative not in relative_path_to_mewc_info
68
+ md_output_file_relative = os.path.join(folder_relative,md_out_filename).replace('\\','/')
69
+ assert md_output_file_relative in all_files_relative, \
70
+ 'Could not find MD output file {} to match to {}'.format(
71
+ md_output_file_relative,fn_relative)
72
+ relative_path_to_mewc_info[folder_relative] = \
73
+ {'mewc_predict_file':fn_relative,'md_file':md_output_file_relative}
74
+
75
+ del folder_relative
76
+
77
+ print('Found {} MEWC results files'.format(len(relative_path_to_mewc_info)))
78
+
79
+
80
+ ##%% Prepare to loop over results files
81
+
82
+ md_results_all = {}
83
+ md_results_all['images'] = []
84
+ md_results_all['detection_categories'] = {}
85
+ md_results_all['classification_categories'] = {}
86
+ md_results_all['info'] = None
87
+
88
+ classification_category_name_to_id = {}
89
+
90
+
91
+ ##%% Loop over results files
92
+
93
+ # relative_folder = next(iter(relative_path_to_mewc_info.keys()))
94
+ for relative_folder in relative_path_to_mewc_info:
95
+
96
+ ##%%
97
+
98
+ mewc_info = relative_path_to_mewc_info[relative_folder]
99
+ mewc_csv_fn_abs = os.path.join(mewc_input_folder,mewc_info['mewc_predict_file'])
100
+ mewc_md_fn_abs = os.path.join(mewc_input_folder,mewc_info['md_file'])
101
+
102
+ mewc_classification_info = pd.read_csv(mewc_csv_fn_abs)
103
+ mewc_classification_info = mewc_classification_info.to_dict('records')
104
+
105
+ assert os.path.isfile(mewc_md_fn_abs), \
106
+ 'Could not find file {}'.format(mewc_md_fn_abs)
107
+ with open(mewc_md_fn_abs,'r') as f:
108
+ md_results = json.load(f)
109
+
110
+
111
+ ##%% Remove the mount prefix from MD files if necessary
112
+ if mount_prefix is not None and len(mount_prefix) > 0:
113
+
114
+ n_files_without_mount_prefix = 0
115
+
116
+ # im = md_results['images'][0]
117
+ for im in md_results['images']:
118
+ if not im['file'].startswith(mount_prefix):
119
+ n_files_without_mount_prefix += 1
120
+ else:
121
+ im['file'] = im['file'].replace(mount_prefix,'',1)
122
+
123
+ if n_files_without_mount_prefix > 0:
124
+ print('Warning {} of {} files in the MD results did not include the mount prefix {}'.format(
125
+ n_files_without_mount_prefix,len(md_results['images']),mount_prefix))
126
+
127
+
128
+ ##%% Convert MEWC snip IDs to image files
129
+
130
+ # r = mewc_classification_info[0]
131
+ for r in mewc_classification_info:
132
+
133
+ # E.g. "IMG0-0.jpg"
134
+ snip_file = r['filename']
135
+
136
+ # E.g. "IMG0-0"
137
+ snip_file_no_ext = os.path.splitext(snip_file)[0]
138
+ ext = os.path.splitext(snip_file)[1] # noqa
139
+
140
+ tokens = snip_file_no_ext.split('-')
141
+
142
+ if len(tokens) == 1:
143
+ print('Warning: in folder {}, detection ID not found in snip filename {}, skipping'.format(
144
+ relative_folder,snip_file_no_ext))
145
+ r['image_filename_without_extension'] = snip_file_no_ext
146
+ r['snip_id'] = None
147
+
148
+ continue
149
+
150
+ filename_without_snip_id = '-'.join(tokens[0:-1])
151
+ snip_id = int(tokens[-1])
152
+ image_filename_without_extension = filename_without_snip_id
153
+
154
+ r['image_filename_without_extension'] = image_filename_without_extension
155
+ r['snip_id'] = snip_id
156
+
157
+ # ...for each MEWC result record
158
+
159
+
160
+ ##%% Make sure MD results and MEWC results refer to the same files
161
+
162
+ images_in_md_results_no_extension = \
163
+ set([os.path.splitext(im['file'])[0] for im in md_results['images']])
164
+ images_in_mewc_results_no_extension = set(r['image_filename_without_extension'] \
165
+ for r in mewc_classification_info)
166
+
167
+ # All files with classification results should also have detection results
168
+ for fn in images_in_mewc_results_no_extension:
169
+ assert fn in images_in_md_results_no_extension, \
170
+ 'Error: file {} is present in mewc-predict results, but not in MD results'.format(fn)
171
+
172
+ # This is just a note to self: no classification results are present for empty images
173
+ if False:
174
+ for fn in images_in_md_results_no_extension:
175
+ if fn not in images_in_mewc_results_no_extension:
176
+ print('Warning: file {}/{} is present in MD results, but not in mewc-predict results'.format(
177
+ relative_folder,fn))
178
+
179
+
180
+ ##%% Validate images
181
+
182
+ for im in md_results['images']:
183
+ fn_relative = im['file']
184
+ fn_abs = os.path.join(mewc_input_folder,relative_folder,fn_relative)
185
+ if not os.path.isfile(fn_abs):
186
+ print('Warning: image file {} does not exist'.format(fn_abs))
187
+
188
+
189
+ ##%% Map filenames to MEWC results
190
+
191
+ image_id_to_mewc_records = defaultdict(list)
192
+ for r in mewc_classification_info:
193
+ image_id_to_mewc_records[r['image_filename_without_extension']].append(r)
194
+
195
+
196
+ ##%% Add classification info to MD results
197
+
198
+ # im = md_results['images'][0]
199
+ for im in md_results['images']:
200
+
201
+ if ('detections' not in im) or (im['detections'] is None) or (len(im['detections']) == 0):
202
+ continue
203
+
204
+ detections = im['detections']
205
+
206
+ # *Don't* sort by confidence, it looks like snip IDs use the original sort order
207
+ # detections = sort_list_of_dicts_by_key(detections,'conf',reverse=True)
208
+
209
+ # This is just a debug assist, so I can run this cell more than once
210
+ for det in detections:
211
+ det['classifications'] = []
212
+
213
+ image_id = os.path.splitext(im['file'])[0]
214
+ mewc_records_this_image = image_id_to_mewc_records[image_id]
215
+
216
+ # r = mewc_records_this_image[0]
217
+ for r in mewc_records_this_image:
218
+
219
+ if r['snip_id'] is None:
220
+ continue
221
+
222
+ category_name = r[category_name_column]
223
+
224
+ # This is a *global* list of category mappings, across all mewc .csv files
225
+ if category_name not in classification_category_name_to_id:
226
+ category_id = str(len(classification_category_name_to_id))
227
+ classification_category_name_to_id[category_name] = category_id
228
+ else:
229
+ category_id = classification_category_name_to_id[category_name]
230
+
231
+ snip_id = r['snip_id']
232
+ if snip_id >= len(detections):
233
+ print('Warning: image {} has a classified snip ID of {}, but only {} detections are present'.format(
234
+ image_id,snip_id,len(detections)))
235
+ continue
236
+
237
+ det = detections[snip_id]
238
+
239
+ if 'classifications' not in det:
240
+ det['classifications'] = []
241
+ det['classifications'].append([category_id,r['prob']])
242
+
243
+ # ...for each classification in this image
244
+
245
+ # ...for each image
246
+
247
+ ##%% Map MD results to the global level
248
+
249
+ if md_results_all['info'] is None:
250
+ md_results_all['info'] = md_results['info']
251
+
252
+ for category_id in md_results['detection_categories']:
253
+ if category_id not in md_results_all['detection_categories']:
254
+ md_results_all['detection_categories'][category_id] = \
255
+ md_results['detection_categories'][category_id]
256
+ else:
257
+ assert md_results_all['detection_categories'][category_id] == \
258
+ md_results['detection_categories'][category_id], \
259
+ 'MD results present with incompatible detection categories'
260
+
261
+ # im = md_results['images'][0]
262
+ for im in md_results['images']:
263
+ im_copy = deepcopy(im)
264
+ im_copy['file'] = os.path.join(relative_folder,im['file']).replace('\\','/')
265
+ md_results_all['images'].append(im_copy)
266
+
267
+ # ...for each folder that contains MEWC results
268
+
269
+ del md_results
270
+
271
+ ##%% Write output
272
+
273
+ md_results_all['classification_categories'] = invert_dictionary(classification_category_name_to_id)
274
+
275
+ if output_file is not None:
276
+ output_dir = os.path.dirname(output_file)
277
+ os.makedirs(output_dir,exist_ok=True)
278
+ with open(output_file,'w') as f:
279
+ json.dump(md_results_all,f,indent=1)
280
+
281
+ validation_options = ValidateBatchResultsOptions()
282
+ validation_options.check_image_existence = True
283
+ validation_options.relative_path_base = mewc_input_folder
284
+ validation_options.raise_errors = True
285
+ validation_results = validate_batch_results(output_file,validation_options) # noqa
286
+
287
+ # ...def mewc_to_md(...)
288
+
289
+
290
+ #%% Interactive driver
291
+
292
+ if False:
293
+
294
+ pass
295
+
296
+ #%%
297
+
298
+ mewc_input_folder = r'G:\temp\mewc-test'
299
+ mount_prefix = '/images/'
300
+ output_file = os.path.join(mewc_input_folder,'results_with_classes.json')
301
+
302
+ _ = mewc_to_md(mewc_input_folder=mewc_input_folder,
303
+ output_file=output_file,
304
+ mount_prefix=mount_prefix,
305
+ category_name_column='class_id')
306
+
307
+
308
+ #%% Command-line driver
309
+
310
+ def main(): # noqa
311
+
312
+ parser = argparse.ArgumentParser()
313
+
314
+ parser.add_argument(
315
+ 'input_folder',type=str,
316
+ help='Folder containing images and MEWC .json/.csv files')
317
+ parser.add_argument(
318
+ 'output_file',type=str,
319
+ help='.json file where output will be written')
320
+ parser.add_argument(
321
+ '--mount_prefix',type=str,default=default_mewc_mount_prefix,
322
+ help='prefix to remove from each filename in MEWC results, typically the Docker mount point')
323
+ parser.add_argument(
324
+ '--category_name_column',type=str,default=default_mewc_category_name_column,
325
+ help='column in the MEWC .csv file to use for category names')
326
+
327
+ if len(sys.argv[1:]) == 0:
328
+ parser.print_help()
329
+ parser.exit()
330
+
331
+ args = parser.parse_args()
332
+
333
+ _ = mewc_to_md(mewc_input_folder=args.input_folder,
334
+ output_file=args.output_file,
335
+ mount_prefix=args.mount_prefix,
336
+ category_name_column=args.category_name_column)
337
+
338
+ if __name__ == '__main__':
339
+ main()