megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -5,7 +5,7 @@ separate_detections_into_folders.py
5
5
  **Overview**
6
6
 
7
7
  Given a .json file with batch processing results, separate the files in that
8
- set of results into folders that contain animals/people/vehicles/nothing,
8
+ set of results into folders that contain animals/people/vehicles/nothing,
9
9
  according to per-class thresholds.
10
10
 
11
11
  Image files are copied, not moved.
@@ -63,15 +63,15 @@ In this scenario, the folders within "animals" will be:
63
63
 
64
64
  deer, cow, multiple, unclassified
65
65
 
66
- "multiple" in this case only means "deer and cow"; if an image is classified as containing a
66
+ "multiple" in this case only means "deer and cow"; if an image is classified as containing a
67
67
  bird and a bear, that would end up in "unclassified", since the folder separation is based only
68
68
  on the categories you provide at the command line.
69
69
 
70
- No classification-based separation is done within the animal_person, animal_vehicle, or
70
+ No classification-based separation is done within the animal_person, animal_vehicle, or
71
71
  animal_person_vehicle folders.
72
72
 
73
73
  """
74
-
74
+
75
75
  #%% Constants and imports
76
76
 
77
77
  import argparse
@@ -108,13 +108,13 @@ class SeparateDetectionsIntoFoldersOptions:
108
108
  """
109
109
  Options used to parameterize separate_detections_into_folders()
110
110
  """
111
-
111
+
112
112
  def __init__(self,threshold=None):
113
-
113
+
114
114
  #: Default threshold for categories not specified in category_name_to_threshold
115
115
  self.threshold = None
116
-
117
- #: Dict mapping category names to thresholds; for example, an image with only a detection of class
116
+
117
+ #: Dict mapping category names to thresholds; for example, an image with only a detection of class
118
118
  #: "animal" whose confidence is greater than or equal to category_name_to_threshold['animal']
119
119
  #: will be put in the "animal" folder.
120
120
  self.category_name_to_threshold = {
@@ -122,130 +122,130 @@ class SeparateDetectionsIntoFoldersOptions:
122
122
  'person': self.threshold,
123
123
  'vehicle': self.threshold
124
124
  }
125
-
125
+
126
126
  #: Number of workers to use, set to <= 1 to disable parallelization
127
127
  self.n_threads = 1
128
-
128
+
129
129
  #: By default, this function errors if you try to output to an existing folder
130
130
  self.allow_existing_directory = False
131
-
131
+
132
132
  #: By default, this function errors if any of the images specified in the results file don't
133
133
  #: exist in the source folder.
134
134
  self.allow_missing_files = False
135
-
135
+
136
136
  #: Whether to overwrite images that already exist in the target folder; only relevant if
137
137
  #: [allow_existing_directory] is True
138
138
  self.overwrite = True
139
-
139
+
140
140
  #: Whether to skip empty images; if this is False, empty images (i.e., images with no detections
141
141
  #: above the corresponding threshold) will be copied to an "empty" folder.
142
142
  self.skip_empty_images = False
143
-
143
+
144
144
  #: The MD results .json file to process
145
145
  self.results_file = None
146
-
146
+
147
147
  #: The folder containing source images; filenames in [results_file] should be relative to this
148
148
  #: folder.
149
149
  self.base_input_folder = None
150
-
150
+
151
151
  #: The folder to which we should write output images; see the module header comment for information
152
152
  #: about how that folder will be structured.
153
153
  self.base_output_folder = None
154
-
154
+
155
155
  #: Should we move rather than copy?
156
156
  self.move_images = False
157
-
157
+
158
158
  #: Should we render boxes on the output images? Makes everything a lot slower.
159
159
  self.render_boxes = False
160
-
160
+
161
161
  #: Line thickness in pixels; only relevant if [render_boxes] is True
162
162
  self.line_thickness = default_line_thickness
163
-
163
+
164
164
  #: Box expansion in pixels; only relevant if [render_boxes] is True
165
165
  self.box_expansion = default_box_expansion
166
-
166
+
167
167
  #: Originally specified as a string that looks like this:
168
168
  #:
169
169
  #: deer=0.75,cow=0.75
170
170
  #:
171
- #: String, converted internally to a dict mapping name:threshold
171
+ #: String, converted internally to a dict mapping name:threshold
172
172
  self.classification_thresholds = None
173
-
173
+
174
174
  ## Debug or internal attributes
175
-
175
+
176
176
  #: Do not set explicitly; populated from data when using classification results
177
177
  self.classification_category_id_to_name = None
178
-
178
+
179
179
  #: Do not set explicitly; populated from data when using classification results
180
180
  self.classification_categories = None
181
-
181
+
182
182
  #: Used to test this script; sets a limit on the number of images to process.
183
183
  self.debug_max_images = None
184
-
184
+
185
185
  #: Do not set explicitly; this gets created based on [results_file]
186
186
  #:
187
187
  #:Dictionary mapping categories (plus combinations of categories, and 'empty') to output folders
188
188
  self.category_name_to_folder = None
189
-
189
+
190
190
  #: Do not set explicitly; this gets loaded from [results_file]
191
191
  self.category_id_to_category_name = None
192
-
192
+
193
193
  #: List of category names for which we should blur detections, most commonly ['person']
194
194
  #:
195
- #: Can also be a comma-separated list.
195
+ #: Can also be a comma-separated list.
196
196
  self.category_names_to_blur = None
197
-
197
+
198
198
  #: Remove all empty folders from the target folder at the end of the process,
199
199
  #: whether or not they were created by this script
200
200
  self.remove_empty_folders = False
201
-
201
+
202
202
  # ...__init__()
203
-
204
- # ...class SeparateDetectionsIntoFoldersOptions
205
-
206
-
203
+
204
+ # ...class SeparateDetectionsIntoFoldersOptions
205
+
206
+
207
207
  #%% Support functions
208
-
208
+
209
209
  def _path_is_abs(p): return (len(p) > 1) and (p[0] == '/' or p[1] == ':')
210
210
 
211
211
  printed_missing_file_warning = False
212
-
212
+
213
213
  def _process_detections(im,options):
214
214
  """
215
215
  Process all detections for a single image
216
-
216
+
217
217
  May modify *im*.
218
218
  """
219
219
 
220
220
  global printed_missing_file_warning
221
-
221
+
222
222
  relative_filename = im['file']
223
-
224
- detections = None
223
+
224
+ detections = None
225
225
  if 'detections' in im:
226
226
  detections = im['detections']
227
-
227
+
228
228
  categories_above_threshold = None
229
-
229
+
230
230
  if detections is None:
231
-
231
+
232
232
  assert im['failure'] is not None and len(im['failure']) > 0
233
233
  target_folder = options.category_name_to_folder['failure']
234
-
234
+
235
235
  else:
236
-
236
+
237
237
  category_name_to_max_confidence = {}
238
238
  category_names = options.category_id_to_category_name.values()
239
239
  for category_name in category_names:
240
240
  category_name_to_max_confidence[category_name] = 0.0
241
-
241
+
242
242
  # Find the maximum confidence for each category
243
243
  #
244
244
  # det = detections[0]
245
245
  for det in detections:
246
-
246
+
247
247
  category_id = det['category']
248
-
248
+
249
249
  # For zero-confidence detections, we occasionally have leftover goop
250
250
  # from COCO classes
251
251
  if category_id not in options.category_id_to_category_name:
@@ -253,79 +253,79 @@ def _process_detections(im,options):
253
253
  category_id,relative_filename))
254
254
  # assert det['conf'] < invalid_category_epsilon
255
255
  continue
256
-
256
+
257
257
  category_name = options.category_id_to_category_name[category_id]
258
258
  if det['conf'] > category_name_to_max_confidence[category_name]:
259
259
  category_name_to_max_confidence[category_name] = det['conf']
260
-
260
+
261
261
  # ...for each detection on this image
262
-
262
+
263
263
  # Count the number of thresholds exceeded
264
264
  categories_above_threshold = []
265
265
  for category_name in category_names:
266
-
266
+
267
267
  threshold = options.category_name_to_threshold[category_name]
268
268
  assert threshold is not None
269
-
269
+
270
270
  max_confidence_this_category = category_name_to_max_confidence[category_name]
271
271
  if max_confidence_this_category >= threshold:
272
272
  categories_above_threshold.append(category_name)
273
-
273
+
274
274
  # ...for each category
275
-
275
+
276
276
  categories_above_threshold.sort()
277
-
277
+
278
278
  using_classification_folders = (options.classification_thresholds is not None and \
279
279
  len(options.classification_thresholds) > 0)
280
-
280
+
281
281
  # If this is above multiple thresholds
282
282
  if len(categories_above_threshold) > 1:
283
-
283
+
284
284
  # Currently "animal_person" images get put into the "animal_person" folder, even if we're
285
285
  # doing species-based separation. Ideally, we would optionally put these in either the "deer"
286
286
  # folder or a "deer_person" folder, but this is pretty esoteric, so not worrying about this
287
287
  # for now.
288
288
  target_folder = options.category_name_to_folder['_'.join(categories_above_threshold)]
289
-
289
+
290
290
  elif len(categories_above_threshold) == 0:
291
-
291
+
292
292
  target_folder = options.category_name_to_folder['empty']
293
-
293
+
294
294
  else:
295
-
295
+
296
296
  assert len(categories_above_threshold) == 1
297
-
297
+
298
298
  target_folder = options.category_name_to_folder[categories_above_threshold[0]]
299
-
299
+
300
300
  # Are we making species classification folders, and is this an animal?
301
301
  if ('animal' in categories_above_threshold) and (using_classification_folders):
302
-
302
+
303
303
  # Do we need to put this into a specific species folder?
304
-
304
+
305
305
  # Find the animal-class detections that are above threshold
306
306
  category_name_to_id = {v: k for k, v in options.category_id_to_category_name.items()}
307
307
  animal_category_id = category_name_to_id['animal']
308
308
  valid_animal_detections = [d for d in detections if \
309
309
  (d['category'] == animal_category_id and \
310
310
  d['conf'] >= options.category_name_to_threshold['animal'])]
311
-
311
+
312
312
  # Count the number of classification categories that are above threshold for at
313
313
  # least one detection
314
314
  classification_categories_above_threshold = set()
315
-
315
+
316
316
  # d = valid_animal_detections[0]
317
317
  for d in valid_animal_detections:
318
-
318
+
319
319
  if 'classifications' not in d or d['classifications'] is None:
320
320
  continue
321
-
321
+
322
322
  # classification = d['classifications'][0]
323
323
  for classification in d['classifications']:
324
-
324
+
325
325
  classification_category_id = classification[0]
326
326
  classification_confidence = classification[1]
327
-
328
- # Do we have a threshold for this category, and if so, is
327
+
328
+ # Do we have a threshold for this category, and if so, is
329
329
  # this classification above threshold?
330
330
  assert options.classification_category_id_to_name is not None
331
331
  classification_category_name = \
@@ -334,78 +334,78 @@ def _process_detections(im,options):
334
334
  (classification_confidence > \
335
335
  options.classification_thresholds[classification_category_name]):
336
336
  classification_categories_above_threshold.add(classification_category_name)
337
-
337
+
338
338
  # ...for each classification
339
-
339
+
340
340
  # ...for each detection
341
-
341
+
342
342
  if len(classification_categories_above_threshold) == 0:
343
343
  classification_folder_name = 'unclassified'
344
-
344
+
345
345
  elif len(classification_categories_above_threshold) > 1:
346
346
  classification_folder_name = 'multiple'
347
-
347
+
348
348
  else:
349
349
  assert len(classification_categories_above_threshold) == 1
350
- classification_folder_name = list(classification_categories_above_threshold)[0]
351
-
350
+ classification_folder_name = list(classification_categories_above_threshold)[0]
351
+
352
352
  target_folder = os.path.join(target_folder,classification_folder_name)
353
-
354
- # ...if we have to deal with classification subfolders
355
-
353
+
354
+ # ...if we have to deal with classification subfolders
355
+
356
356
  # ...if we have 0/1/more categories above threshold
357
-
357
+
358
358
  # ...if this is/isn't a failure case
359
-
359
+
360
360
  source_path = os.path.join(options.base_input_folder,relative_filename)
361
361
  if not os.path.isfile(source_path):
362
362
  if not options.allow_missing_files:
363
363
  raise ValueError('Cannot find file {}'.format(source_path))
364
364
  else:
365
365
  if not printed_missing_file_warning:
366
- print('Warning: cannot find at least one file ({})'.format(source_path))
366
+ print('Warning: cannot find at least one file ({})'.format(source_path))
367
367
  printed_missing_file_warning = True
368
368
  return
369
-
369
+
370
370
  target_path = os.path.join(target_folder,relative_filename)
371
371
  if (not options.overwrite) and (os.path.isfile(target_path)):
372
372
  return
373
-
373
+
374
374
  target_dir = os.path.dirname(target_path)
375
375
  os.makedirs(target_dir,exist_ok=True)
376
-
376
+
377
377
  # Skip this image if it's empty and we're not processing empty images
378
378
  if ((categories_above_threshold is None) or (len(categories_above_threshold) == 0)) and \
379
379
  options.skip_empty_images:
380
380
  return
381
-
381
+
382
382
  # At this point, this image is getting copied; we may or may not also need to
383
383
  # draw bounding boxes or blur pixels.
384
-
384
+
385
385
  # Do a simple copy operation if we don't need to manipulate the images (render boxes, blur pixels)
386
386
  if (not options.render_boxes and (options.category_names_to_blur is None)) or \
387
387
  (categories_above_threshold is None) or \
388
388
  (len(categories_above_threshold) == 0):
389
-
389
+
390
390
  if options.move_images:
391
391
  shutil.move(source_path,target_path)
392
392
  else:
393
393
  shutil.copyfile(source_path,target_path)
394
-
394
+
395
395
  else:
396
-
396
+
397
397
  # Open the source image
398
398
  pil_image = vis_utils.load_image(source_path)
399
-
399
+
400
400
  # Blur regions in the image if necessary
401
401
  category_names_to_blur = options.category_names_to_blur
402
-
402
+
403
403
  if category_names_to_blur is not None:
404
-
404
+
405
405
  if isinstance(category_names_to_blur,str):
406
406
  category_names_to_blur = category_names_to_blur.split(',')
407
407
  category_names_to_blur = [s.strip() for s in category_names_to_blur]
408
-
408
+
409
409
  detections_to_blur = []
410
410
  for d in detections:
411
411
  category_name = options.category_id_to_category_name[d['category']]
@@ -414,74 +414,74 @@ def _process_detections(im,options):
414
414
  detections_to_blur.append(d)
415
415
  if len(detections_to_blur) > 0:
416
416
  blur_detections(pil_image,detections_to_blur)
417
-
417
+
418
418
  # Render bounding boxes for each category separately, because
419
419
  # we allow different thresholds for each category.
420
-
420
+
421
421
  category_name_to_id = {v: k for k, v in options.category_id_to_category_name.items()}
422
422
  assert len(category_name_to_id) == len(options.category_id_to_category_name)
423
-
423
+
424
424
  classification_label_map = None
425
425
  if using_classification_folders:
426
426
  classification_label_map = options.classification_categories
427
-
427
+
428
428
  for category_name in categories_above_threshold:
429
-
429
+
430
430
  category_id = category_name_to_id[category_name]
431
431
  category_threshold = options.category_name_to_threshold[category_name]
432
432
  assert category_threshold is not None
433
433
  category_detections = [d for d in detections if d['category'] == category_id]
434
-
434
+
435
435
  # When we're not using classification folders, remove classification
436
436
  # information to maintain standard detection colors.
437
437
  if not using_classification_folders:
438
438
  for d in category_detections:
439
439
  if 'classifications' in d:
440
440
  del d['classifications']
441
-
441
+
442
442
  vis_utils.render_detection_bounding_boxes(
443
- category_detections,
443
+ category_detections,
444
444
  pil_image,
445
- label_map=options.detection_categories,
445
+ label_map=options.detection_categories,
446
446
  classification_label_map=classification_label_map,
447
447
  confidence_threshold=category_threshold,
448
448
  thickness=options.line_thickness,
449
449
  expansion=options.box_expansion)
450
-
450
+
451
451
  # ...for each category
452
-
452
+
453
453
  # Try to preserve EXIF data and image quality when saving
454
- vis_utils.exif_preserving_save(pil_image,target_path)
455
-
454
+ vis_utils.exif_preserving_save(pil_image,target_path)
455
+
456
456
  # ...if we don't/do need to render boxes
457
-
457
+
458
458
  # ...def _process_detections()
459
-
460
-
459
+
460
+
461
461
  #%% Main function
462
462
 
463
463
  def separate_detections_into_folders(options):
464
464
  """
465
465
  Given a .json file with batch processing results, separate the files in that
466
- set of results into folders that contain animals/people/vehicles/nothing,
467
- according to per-class thresholds. See the header comment of this module for
466
+ set of results into folders that contain animals/people/vehicles/nothing,
467
+ according to per-class thresholds. See the header comment of this module for
468
468
  more details about the output folder structure.
469
-
469
+
470
470
  Args:
471
471
  options (SeparateDetectionsIntoFoldersOptions): parameters guiding image
472
472
  separation, see the SeparateDetectionsIntoFoldersOptions documentation for specific
473
473
  options.
474
474
  """
475
-
475
+
476
476
  # Input validation
477
-
477
+
478
478
  # Currently we don't support moving (instead of copying) when we're also rendering
479
479
  # bounding boxes or blurring humans.
480
480
  assert not (options.render_boxes and options.move_images), \
481
481
  'Cannot specify both render_boxes and move_images'
482
482
  assert not ((options.category_names_to_blur is not None) and options.move_images), \
483
483
  'Cannot specify both category_names_to_blur and move_images'
484
-
484
+
485
485
  # Create output folder if necessary
486
486
  if (os.path.isdir(options.base_output_folder)) and \
487
487
  (len(os.listdir(options.base_output_folder) ) > 0):
@@ -490,113 +490,113 @@ def separate_detections_into_folders(options):
490
490
  'you mean to delete an old version?')
491
491
  else:
492
492
  raise ValueError('Target folder exists and is not empty')
493
- os.makedirs(options.base_output_folder,exist_ok=True)
494
-
495
- # Load detection results
493
+ os.makedirs(options.base_output_folder,exist_ok=True)
494
+
495
+ # Load detection results
496
496
  print('Loading detection results')
497
497
  results = json.load(open(options.results_file))
498
498
  images = results['images']
499
-
499
+
500
500
  for im in images:
501
501
  fn = im['file']
502
502
  assert not _path_is_abs(fn), 'Cannot process results with absolute image paths'
503
-
503
+
504
504
  print('Processing detections for {} images'.format(len(images)))
505
-
505
+
506
506
  default_threshold = options.threshold
507
-
508
- if default_threshold is None:
509
- default_threshold = get_typical_confidence_threshold_from_results(results)
510
-
511
- detection_categories = results['detection_categories']
507
+
508
+ if default_threshold is None:
509
+ default_threshold = get_typical_confidence_threshold_from_results(results)
510
+
511
+ detection_categories = results['detection_categories']
512
512
  options.detection_categories = detection_categories
513
513
  options.category_id_to_category_name = detection_categories
514
-
514
+
515
515
  # Map class names to output folders
516
516
  options.category_name_to_folder = {}
517
517
  options.category_name_to_folder['empty'] = os.path.join(options.base_output_folder,'empty')
518
518
  options.category_name_to_folder['failure'] =\
519
519
  os.path.join(options.base_output_folder,'processing_failure')
520
-
520
+
521
521
  # Create all combinations of categories
522
522
  category_names = list(detection_categories.values())
523
523
  category_names.sort()
524
524
 
525
525
  # category_name = category_names[0]
526
- for category_name in category_names:
526
+ for category_name in category_names:
527
527
 
528
528
  # Do we have a custom threshold for this category?
529
529
  if category_name not in options.category_name_to_threshold:
530
530
  print('Warning: category {} in detection file, but not in threshold mapping'.format(
531
531
  category_name))
532
532
  options.category_name_to_threshold[category_name] = None
533
-
533
+
534
534
  if options.category_name_to_threshold[category_name] is None:
535
535
  options.category_name_to_threshold[category_name] = default_threshold
536
-
536
+
537
537
  category_threshold = options.category_name_to_threshold[category_name]
538
538
  print('Processing category {} at threshold {}'.format(category_name,category_threshold))
539
-
539
+
540
540
  target_category_names = []
541
541
  for c in category_names:
542
-
542
+
543
543
  target_category_names.append(c)
544
-
544
+
545
545
  for combination_length in range(2,len(category_names)+1):
546
-
546
+
547
547
  combined_category_names = list(itertools.combinations(category_names,combination_length))
548
-
549
- for combination in combined_category_names:
548
+
549
+ for combination in combined_category_names:
550
550
  combined_name = '_'.join(combination)
551
551
  target_category_names.append(combined_name)
552
-
552
+
553
553
  # Create folder mappings for each category
554
554
  for category_name in target_category_names:
555
-
555
+
556
556
  folder_name = category_name
557
-
558
- if category_name in friendly_folder_names:
557
+
558
+ if category_name in friendly_folder_names:
559
559
  folder_name = friendly_folder_names[category_name]
560
-
560
+
561
561
  options.category_name_to_folder[category_name] = \
562
562
  os.path.join(options.base_output_folder,folder_name)
563
-
563
+
564
564
  # Create the actual folders
565
565
  for folder in options.category_name_to_folder.values():
566
- os.makedirs(folder,exist_ok=True)
567
-
566
+ os.makedirs(folder,exist_ok=True)
567
+
568
568
  # Handle species classification thresholds, if specified
569
569
  if options.classification_thresholds is not None:
570
-
570
+
571
571
  assert 'classification_categories' in results and \
572
572
  results['classification_categories'] is not None, \
573
573
  'Classification thresholds specified, but no classification results available'
574
-
574
+
575
575
  classification_categories = results['classification_categories']
576
576
  classification_category_name_to_id = {v: k for k, v in classification_categories.items()}
577
577
  classification_category_id_to_name = {k: v for k, v in classification_categories.items()}
578
578
  options.classification_category_id_to_name = classification_category_id_to_name
579
579
  options.classification_categories = classification_categories
580
-
580
+
581
581
  if isinstance(options.classification_thresholds,str):
582
-
582
+
583
583
  # E.g. deer=0.75,cow=0.75
584
584
  tokens = options.classification_thresholds.split(',')
585
585
  classification_thresholds = {}
586
-
586
+
587
587
  # token = tokens[0]
588
588
  for token in tokens:
589
589
  subtokens = token.split('=')
590
590
  assert len(subtokens) == 2 and is_float(subtokens[1]), \
591
- 'Illegal classification threshold {}'.format(token)
591
+ 'Illegal classification threshold {}'.format(token)
592
592
  classification_thresholds[subtokens[0]] = float(subtokens[1])
593
-
593
+
594
594
  # ...for each token
595
-
596
- options.classification_thresholds = classification_thresholds
597
-
595
+
596
+ options.classification_thresholds = classification_thresholds
597
+
598
598
  # ...if classification thresholds are still in string format
599
-
599
+
600
600
  # Validate the classes in the threshold list
601
601
  for class_name in options.classification_thresholds.keys():
602
602
  assert class_name in classification_category_name_to_id, \
@@ -604,40 +604,40 @@ def separate_detections_into_folders(options):
604
604
  class_name)
605
605
 
606
606
  # ...if we need to deal with classification categories
607
-
607
+
608
608
  if options.n_threads <= 1 or options.debug_max_images is not None:
609
-
609
+
610
610
  # i_image = 14; im = images[i_image]; im
611
611
  for i_image,im in enumerate(tqdm(images)):
612
612
  if options.debug_max_images is not None and i_image > options.debug_max_images:
613
613
  break
614
614
  _process_detections(im,options)
615
615
  # ...for each image
616
-
616
+
617
617
  else:
618
-
618
+
619
619
  print('Starting a pool with {} threads'.format(options.n_threads))
620
620
  pool = ThreadPool(options.n_threads)
621
621
  process_detections_with_options = partial(_process_detections, options=options)
622
622
  _ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
623
-
623
+
624
624
  if options.remove_empty_folders:
625
625
  print('Removing empty folders from {}'.format(options.base_output_folder))
626
626
  remove_empty_folders(options.base_output_folder)
627
-
627
+
628
628
  # ...def separate_detections_into_folders
629
629
 
630
630
 
631
631
  #%% Interactive driver
632
-
632
+
633
633
  if False:
634
634
 
635
635
  pass
636
636
 
637
637
  #%%
638
-
638
+
639
639
  options = SeparateDetectionsIntoFoldersOptions()
640
-
640
+
641
641
  options.results_file = os.path.expanduser(
642
642
  '~/data/snapshot-safari-2022-08-16-KRU-v5a.0.0_detections.json')
643
643
  options.base_input_folder = os.path.expanduser('~/data/KRU/KRU_public')
@@ -645,11 +645,11 @@ if False:
645
645
  options.n_threads = 100
646
646
  options.render_boxes = True
647
647
  options.allow_existing_directory = True
648
-
648
+
649
649
  #%%
650
-
650
+
651
651
  options = SeparateDetectionsIntoFoldersOptions()
652
-
652
+
653
653
  options.results_file = os.path.expanduser('~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json')
654
654
  options.base_input_folder = os.path.expanduser('~/data/ENA24/images')
655
655
  options.base_output_folder = os.path.expanduser('~/data/ENA24-separated')
@@ -657,31 +657,45 @@ if False:
657
657
  options.classification_thresholds = 'deer=0.75,cow=0.75,bird=0.75'
658
658
  options.render_boxes = True
659
659
  options.allow_existing_directory = True
660
-
660
+
661
661
  #%%
662
-
662
+
663
663
  separate_detections_into_folders(options)
664
-
664
+
665
665
  #%% Testing various command-line invocations
666
-
666
+
667
667
  """
668
668
  # With boxes, no classification
669
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --render_boxes --line_thickness 10 --box_expansion 10
670
-
669
+ python separate_detections_into_folders.py \
670
+ ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json \
671
+ ~/data/ENA24/images ~/data/ENA24-separated \
672
+ --threshold 0.17 --animal_threshold 0.2 --n_threads 10 \
673
+ --allow_existing_directory --render_boxes --line_thickness 10 --box_expansion 10
674
+
671
675
  # No boxes, no classification (default)
672
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory
673
-
676
+ python separate_detections_into_folders.py \
677
+ ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json \
678
+ ~/data/ENA24/images ~/data/ENA24-separated \
679
+ --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory
680
+
674
681
  # With boxes, with classification
675
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --render_boxes --line_thickness 10 --box_expansion 10 --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
676
-
682
+ python separate_detections_into_folders.py \
683
+ ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated \
684
+ --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory \
685
+ --render_boxes --line_thickness 10 --box_expansion 10 \
686
+ --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
687
+
677
688
  # No boxes, with classification
678
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
679
- """
680
-
689
+ python separate_detections_into_folders.py \
690
+ ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated \
691
+ --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory \
692
+ --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
693
+ """
694
+
681
695
  #%% Command-line driver
682
696
 
683
- def main():
684
-
697
+ def main(): # noqa
698
+
685
699
  parser = argparse.ArgumentParser()
686
700
  parser.add_argument('results_file', type=str, help='Input .json filename')
687
701
  parser.add_argument('base_input_folder', type=str, help='Input image folder')
@@ -690,7 +704,7 @@ def main():
690
704
  parser.add_argument('--threshold', type=float, default=None,
691
705
  help='Default confidence threshold for all categories (defaults to ' + \
692
706
  'selection based on model version, other options may override this ' + \
693
- 'for specific categories)')
707
+ 'for specific categories)')
694
708
  parser.add_argument('--animal_threshold', type=float, default=None,
695
709
  help='Confidence threshold for the animal category')
696
710
  parser.add_argument('--human_threshold', type=float, default=None,
@@ -700,21 +714,21 @@ def main():
700
714
  parser.add_argument('--classification_thresholds', type=str, default=None,
701
715
  help='List of classification thresholds to use for species-based folder ' + \
702
716
  'separation, formatted as, e.g., "deer=0.75,cow=0.75"')
703
-
717
+
704
718
  parser.add_argument('--n_threads', type=int, default=1,
705
719
  help='Number of threads to use for parallel operation (default=1)')
706
-
707
- parser.add_argument('--allow_existing_directory', action='store_true',
720
+
721
+ parser.add_argument('--allow_existing_directory', action='store_true',
708
722
  help='Proceed even if the target directory exists and is not empty')
709
- parser.add_argument('--no_overwrite', action='store_true',
723
+ parser.add_argument('--no_overwrite', action='store_true',
710
724
  help='Skip images that already exist in the target folder, must also ' + \
711
- 'specify --allow_existing_directory')
725
+ 'specify --allow_existing_directory')
712
726
  parser.add_argument('--skip_empty_images', action='store_true',
713
727
  help='Do not copy empty images to the output folder')
714
728
  parser.add_argument('--move_images', action='store_true',
715
729
  help='Move images (rather than copying) (not recommended this if you have not ' + \
716
730
  'backed up your data!)')
717
-
731
+
718
732
  parser.add_argument('--render_boxes', action='store_true',
719
733
  help='Render bounding boxes on output images; may result in some ' + \
720
734
  'metadata not being transferred')
@@ -727,20 +741,21 @@ def main():
727
741
  'using render_boxes (defaults to {})'.format(
728
742
  default_box_expansion))
729
743
  parser.add_argument('--category_names_to_blur', type=str, default=None,
730
- help='Comma-separated list of category names to blur (or a single category name, e.g. "person")')
744
+ help='Comma-separated list of category names to blur ' + \
745
+ '(or a single category name, e.g. "person")')
731
746
  parser.add_argument('--remove_empty_folders', action='store_true',
732
747
  help='Remove all empty folders from the target folder at the end of the process, ' + \
733
748
  'whether or not they were created by this script')
734
-
749
+
735
750
  if len(sys.argv[1:])==0:
736
751
  parser.print_help()
737
752
  parser.exit()
738
-
739
- args = parser.parse_args()
740
-
753
+
754
+ args = parser.parse_args()
755
+
741
756
  # Convert to an options object
742
757
  options = SeparateDetectionsIntoFoldersOptions()
743
-
758
+
744
759
  args_to_object(args, options)
745
760
 
746
761
  def validate_threshold(v,name):
@@ -748,27 +763,27 @@ def main():
748
763
  if v is not None:
749
764
  assert v >= 0.0 and v <= 1.0, \
750
765
  'Illegal {} threshold {}'.format(name,v)
751
-
766
+
752
767
  validate_threshold(args.threshold,'default')
753
768
  validate_threshold(args.animal_threshold,'animal')
754
769
  validate_threshold(args.vehicle_threshold,'vehicle')
755
770
  validate_threshold(args.human_threshold,'human')
756
-
771
+
757
772
  if args.threshold is not None:
758
773
  if args.animal_threshold is not None \
759
774
  and args.human_threshold is not None \
760
775
  and args.vehicle_threshold is not None:
761
776
  raise ValueError('Default threshold specified, but all category thresholds ' + \
762
777
  'also specified... not exactly wrong, but it\'s likely that you ' + \
763
- 'meant something else.')
764
-
778
+ 'meant something else.')
779
+
765
780
  options.category_name_to_threshold['animal'] = args.animal_threshold
766
781
  options.category_name_to_threshold['person'] = args.human_threshold
767
782
  options.category_name_to_threshold['vehicle'] = args.vehicle_threshold
768
-
783
+
769
784
  options.overwrite = (not args.no_overwrite)
770
-
785
+
771
786
  separate_detections_into_folders(options)
772
-
773
- if __name__ == '__main__':
787
+
788
+ if __name__ == '__main__':
774
789
  main()